infer逻辑调整

This commit is contained in:
zhiyang7 2022-01-04 11:12:36 +08:00
parent 4571bb3dfc
commit 2b8586303b
4 changed files with 14 additions and 13 deletions

View File

@ -13,19 +13,17 @@ parser.add_argument('--objective', default='adp', type=str, choices=['adp', 'wp'
# Training settings
parser.add_argument('--onnx_sync_interval', default=120, type=int,
help='Time interval (in seconds) at which to sync the onnx model')
parser.add_argument('--actor_device_cpu', action='store_true',
help='Use CPU as actor device')
parser.add_argument('--gpu_devices', default='0', type=str,
help='Which GPUs to be used for training')
parser.add_argument('--infer_devices', default='0', type=str,
help='Which device to be used for infer')
parser.add_argument('--num_infer', default=3, type=int,
parser.add_argument('--num_infer', default=2, type=int,
help='The number of process used for infer')
parser.add_argument('--num_actor_devices', default=1, type=int,
help='The number of devices used for simulation')
parser.add_argument('--num_actors', default=4, type=int,
parser.add_argument('--num_actors', default=3, type=int,
help='The number of actors for each simulation device')
parser.add_argument('--num_actors_thread', default=6, type=int,
parser.add_argument('--num_actors_thread', default=4, type=int,
help='The number of actors for each simulation device')
parser.add_argument('--training_device', default='0', type=str,
help='The index of the GPU used for training models. `cpu` means using cpu')

View File

@ -70,9 +70,9 @@ def train(flags):
Then it will start subprocesses as actors. Then, it will call
learning function with multiple threads.
"""
if not flags.actor_device_cpu or flags.training_device != 'cpu':
if flags.training_device != 'cpu' or flags.infer_devices != 'cpu':
if not torch.cuda.is_available():
raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --actor_device_cpu --training_device cpu`")
raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --infer_devices cpu --training_device cpu`")
plogger = FileWriter(
xpid=flags.xpid,
xp_args=flags.__dict__,

View File

@ -493,8 +493,11 @@ model_dict_new_lite['landlord_up'] = GeneralModelLite
model_dict_new_lite['landlord_front'] = GeneralModelLite
model_dict_new_lite['landlord_down'] = GeneralModelLite
def forward_logic(self_model, position, z, x, return_value=False, flags=None):
def forward_logic(self_model, position, z, x, device='cpu', return_value=False, flags=None):
legal_count = len(z)
if not flags.enable_onnx:
z = torch.tensor(z, device=device)
x = torch.tensor(x, device=device)
if legal_count >= 80:
partition_count = int(legal_count / 40)
sub_z = np.array_split(z, partition_count)
@ -577,8 +580,8 @@ class OldModel:
def get_onnx_params(self, position):
self.models[position].get_onnx_params(self.device)
def forward(self, position, z, x, return_value=False, flags=None):
return forward_logic(self, position, z, x, return_value, flags)
def forward(self, position, z, x, device='cpu', return_value=False, flags=None):
return forward_logic(self, position, z, x, device, return_value, flags)
def share_memory(self):
if self.models['landlord'] is not None:
@ -646,8 +649,8 @@ class Model:
def get_onnx_params(self, position):
self.models[position].get_onnx_params(self.device)
def forward(self, position, z, x, return_value=False, flags=None, debug=False):
return forward_logic(self, position, z, x, return_value, flags)
def forward(self, position, z, x, device='cpu', return_value=False, flags=None):
return forward_logic(self, position, z, x, device, return_value, flags)
def share_memory(self):
if self.models['landlord'] is not None:

View File

@ -131,7 +131,7 @@ def infer_logic(i, device, infer_queues, model, flags, onnx_frame):
try:
task = infer_queue['input'].get_nowait()
with torch.no_grad():
result = model.forward(task['position'], task['z_batch'], task['x_batch'], return_value=True, flags=flags)
result = model.forward(task['position'], task['z_batch'], task['x_batch'], device=device, return_value=True, flags=flags)
infer_queue['output'].put({
'values': result['values']
})