diff --git a/douzero/dmc/arguments.py b/douzero/dmc/arguments.py index 7f4b9f2..06190b5 100644 --- a/douzero/dmc/arguments.py +++ b/douzero/dmc/arguments.py @@ -13,19 +13,17 @@ parser.add_argument('--objective', default='adp', type=str, choices=['adp', 'wp' # Training settings parser.add_argument('--onnx_sync_interval', default=120, type=int, help='Time interval (in seconds) at which to sync the onnx model') -parser.add_argument('--actor_device_cpu', action='store_true', - help='Use CPU as actor device') parser.add_argument('--gpu_devices', default='0', type=str, help='Which GPUs to be used for training') parser.add_argument('--infer_devices', default='0', type=str, help='Which device to be used for infer') -parser.add_argument('--num_infer', default=3, type=int, +parser.add_argument('--num_infer', default=2, type=int, help='The number of process used for infer') parser.add_argument('--num_actor_devices', default=1, type=int, help='The number of devices used for simulation') -parser.add_argument('--num_actors', default=4, type=int, +parser.add_argument('--num_actors', default=3, type=int, help='The number of actors for each simulation device') -parser.add_argument('--num_actors_thread', default=6, type=int, +parser.add_argument('--num_actors_thread', default=4, type=int, help='The number of actors for each simulation device') parser.add_argument('--training_device', default='0', type=str, help='The index of the GPU used for training models. `cpu` means using cpu') diff --git a/douzero/dmc/dmc.py b/douzero/dmc/dmc.py index cf8ed83..20febab 100644 --- a/douzero/dmc/dmc.py +++ b/douzero/dmc/dmc.py @@ -70,9 +70,9 @@ def train(flags): Then it will start subprocesses as actors. Then, it will call learning function with multiple threads. """ - if not flags.actor_device_cpu or flags.training_device != 'cpu': + if flags.training_device != 'cpu' or flags.infer_devices != 'cpu': if not torch.cuda.is_available(): - raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --actor_device_cpu --training_device cpu`") + raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --infer_devices cpu --training_device cpu`") plogger = FileWriter( xpid=flags.xpid, xp_args=flags.__dict__, diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py index 89a2793..3fce8ed 100644 --- a/douzero/dmc/models.py +++ b/douzero/dmc/models.py @@ -493,8 +493,11 @@ model_dict_new_lite['landlord_up'] = GeneralModelLite model_dict_new_lite['landlord_front'] = GeneralModelLite model_dict_new_lite['landlord_down'] = GeneralModelLite -def forward_logic(self_model, position, z, x, return_value=False, flags=None): +def forward_logic(self_model, position, z, x, device='cpu', return_value=False, flags=None): legal_count = len(z) + if not flags.enable_onnx: + z = torch.tensor(z, device=device) + x = torch.tensor(x, device=device) if legal_count >= 80: partition_count = int(legal_count / 40) sub_z = np.array_split(z, partition_count) @@ -577,8 +580,8 @@ class OldModel: def get_onnx_params(self, position): self.models[position].get_onnx_params(self.device) - def forward(self, position, z, x, return_value=False, flags=None): - return forward_logic(self, position, z, x, return_value, flags) + def forward(self, position, z, x, device='cpu', return_value=False, flags=None): + return forward_logic(self, position, z, x, device, return_value, flags) def share_memory(self): if self.models['landlord'] is not None: @@ -646,8 +649,8 @@ class Model: def get_onnx_params(self, position): self.models[position].get_onnx_params(self.device) - def forward(self, position, z, x, return_value=False, flags=None, debug=False): - return forward_logic(self, position, z, x, return_value, flags) + def forward(self, position, z, x, device='cpu', return_value=False, flags=None): + return forward_logic(self, position, z, x, device, return_value, flags) def share_memory(self): if self.models['landlord'] is not None: diff --git a/douzero/dmc/utils.py b/douzero/dmc/utils.py index bb8b53f..646cbc8 100644 --- a/douzero/dmc/utils.py +++ b/douzero/dmc/utils.py @@ -131,7 +131,7 @@ def infer_logic(i, device, infer_queues, model, flags, onnx_frame): try: task = infer_queue['input'].get_nowait() with torch.no_grad(): - result = model.forward(task['position'], task['z_batch'], task['x_batch'], return_value=True, flags=flags) + result = model.forward(task['position'], task['z_batch'], task['x_batch'], device=device, return_value=True, flags=flags) infer_queue['output'].put({ 'values': result['values'] })