From 6fa590a697aa3710a5c8007eb8e5233613e3d77d Mon Sep 17 00:00:00 2001 From: zhiyang7 Date: Wed, 15 Dec 2021 10:03:26 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DBUG?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douzero/dmc/dmc.py | 6 ++---- douzero/dmc/models.py | 5 ++--- douzero/dmc/utils.py | 4 ++-- douzero/evaluation/deep_agent.py | 2 +- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/douzero/dmc/dmc.py b/douzero/dmc/dmc.py index 9ecdb5d..9e0444f 100644 --- a/douzero/dmc/dmc.py +++ b/douzero/dmc/dmc.py @@ -19,7 +19,6 @@ import psutil import shutil mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']} -onnx_frame = mp.Value('d', -1) def compute_loss(logits, targets): loss = ((logits.squeeze(-1) - targets)**2).mean() @@ -80,7 +79,6 @@ def train(flags): Then it will start subprocesses as actors. Then, it will call learning function with multiple threads. """ - global onnx_frame if not flags.actor_device_cpu or flags.training_device != 'cpu': if not torch.cuda.is_available(): raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --actor_device_cpu --training_device cpu`") @@ -116,6 +114,7 @@ def train(flags): actor_processes = [] ctx = mp.get_context('spawn') batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), 'landlord_front': ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()} + onnx_frame = ctx.Value('d', -1) # Learner model for training if flags.old_model: @@ -216,7 +215,6 @@ def train(flags): threads.append(thread) def checkpoint(frames): - global onnx_frame if flags.disable_checkpoint: return log.info('Saving checkpoint to %s', checkpointpath) @@ -256,7 +254,7 @@ def train(flags): } } ) - onnx_frame = frames + onnx_frame.value = frames shutil.move(checkpointpath + '.new', checkpointpath) diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py index 42aa19d..d943958 100644 --- a/douzero/dmc/models.py +++ b/douzero/dmc/models.py @@ -495,7 +495,7 @@ class Model: self.models['bidding'] = BidModel().to(torch.device(device)) def set_onnx_model(self, position, model_path): - self.onnx_models[position] = get_example(model_path) + self.onnx_models[position] = onnxruntime.InferenceSession(get_example(model_path)) def forward(self, position, z, x, return_value=False, flags=None, debug=False): model = self.onnx_models[position] @@ -503,8 +503,7 @@ class Model: model = self.models[position] values = model.forward(z, x)['values'] else: - sess = onnxruntime.InferenceSession(model) - onnx_out = sess.run(None, {'z_batch': to_numpy(z), 'x_batch': to_numpy(x)}) + onnx_out = model.run(None, {'z_batch': to_numpy(z), 'x_batch': to_numpy(x)}) values = torch.tensor(onnx_out[0]) if return_value: return dict(values=values) diff --git a/douzero/dmc/utils.py b/douzero/dmc/utils.py index 7e84fbb..94a67fe 100644 --- a/douzero/dmc/utils.py +++ b/douzero/dmc/utils.py @@ -113,8 +113,8 @@ def act(i, device, batch_queues, model, flags, onnx_frame): last_onnx_frame = -1 while True: # print("posi", position) - if onnx_frame != last_onnx_frame: - last_onnx_frame = onnx_frame + if onnx_frame.value != last_onnx_frame: + last_onnx_frame = onnx_frame.value for p in positions: if p != 'bidding': model_path = '%s/%s/model_%s.onnx' % (flags.savedir, flags.xpid, p) diff --git a/douzero/evaluation/deep_agent.py b/douzero/evaluation/deep_agent.py index 4c99765..d907651 100644 --- a/douzero/evaluation/deep_agent.py +++ b/douzero/evaluation/deep_agent.py @@ -49,7 +49,7 @@ class DeepAgent: x_batch = torch.from_numpy(obs['x_batch']).float() if torch.cuda.is_available(): z_batch, x_batch = z_batch.cuda(), x_batch.cuda() - y_pred = self.model.forward(z_batch, x_batch, return_value=True)['values'] + y_pred = self.model.forward(z_batch, x_batch)['values'] y_pred = y_pred.detach().cpu().numpy() best_action_index = np.argmax(y_pred, axis=0)[0]