From 6fa590a697aa3710a5c8007eb8e5233613e3d77d Mon Sep 17 00:00:00 2001
From: zhiyang7 <zhiyang7@iflytek.com>
Date: Wed, 15 Dec 2021 10:03:26 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DBUG?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 douzero/dmc/dmc.py               | 6 ++----
 douzero/dmc/models.py            | 5 ++---
 douzero/dmc/utils.py             | 4 ++--
 douzero/evaluation/deep_agent.py | 2 +-
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/douzero/dmc/dmc.py b/douzero/dmc/dmc.py
index 9ecdb5d..9e0444f 100644
--- a/douzero/dmc/dmc.py
+++ b/douzero/dmc/dmc.py
@@ -19,7 +19,6 @@ import psutil
 import shutil
 
 mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']}
-onnx_frame = mp.Value('d', -1)
 
 def compute_loss(logits, targets):
     loss = ((logits.squeeze(-1) - targets)**2).mean()
@@ -80,7 +79,6 @@ def train(flags):
     Then it will start subprocesses as actors. Then, it will call
     learning function with  multiple threads.
     """
-    global onnx_frame
     if not flags.actor_device_cpu or flags.training_device != 'cpu':
         if not torch.cuda.is_available():
             raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --actor_device_cpu --training_device cpu`")
@@ -116,6 +114,7 @@ def train(flags):
     actor_processes = []
     ctx = mp.get_context('spawn')
     batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), 'landlord_front': ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()}
+    onnx_frame = ctx.Value('d', -1)
 
     # Learner model for training
     if flags.old_model:
@@ -216,7 +215,6 @@ def train(flags):
                 threads.append(thread)
 
     def checkpoint(frames):
-        global onnx_frame
         if flags.disable_checkpoint:
             return
         log.info('Saving checkpoint to %s', checkpointpath)
@@ -256,7 +254,7 @@ def train(flags):
                         }
                     }
                 )
-        onnx_frame = frames
+        onnx_frame.value = frames
         shutil.move(checkpointpath + '.new', checkpointpath)
 
 
diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py
index 42aa19d..d943958 100644
--- a/douzero/dmc/models.py
+++ b/douzero/dmc/models.py
@@ -495,7 +495,7 @@ class Model:
         self.models['bidding'] = BidModel().to(torch.device(device))
 
     def set_onnx_model(self, position, model_path):
-        self.onnx_models[position] = get_example(model_path)
+        self.onnx_models[position] = onnxruntime.InferenceSession(get_example(model_path))
 
     def forward(self, position, z, x, return_value=False, flags=None, debug=False):
         model = self.onnx_models[position]
@@ -503,8 +503,7 @@ class Model:
             model = self.models[position]
             values = model.forward(z, x)['values']
         else:
-            sess = onnxruntime.InferenceSession(model)
-            onnx_out = sess.run(None, {'z_batch': to_numpy(z), 'x_batch': to_numpy(x)})
+            onnx_out = model.run(None, {'z_batch': to_numpy(z), 'x_batch': to_numpy(x)})
             values = torch.tensor(onnx_out[0])
         if return_value:
             return dict(values=values)
diff --git a/douzero/dmc/utils.py b/douzero/dmc/utils.py
index 7e84fbb..94a67fe 100644
--- a/douzero/dmc/utils.py
+++ b/douzero/dmc/utils.py
@@ -113,8 +113,8 @@ def act(i, device, batch_queues, model, flags, onnx_frame):
         last_onnx_frame = -1
         while True:
             # print("posi", position)
-            if onnx_frame != last_onnx_frame:
-                last_onnx_frame = onnx_frame
+            if onnx_frame.value != last_onnx_frame:
+                last_onnx_frame = onnx_frame.value
                 for p in positions:
                     if p != 'bidding':
                         model_path = '%s/%s/model_%s.onnx' % (flags.savedir, flags.xpid, p)
diff --git a/douzero/evaluation/deep_agent.py b/douzero/evaluation/deep_agent.py
index 4c99765..d907651 100644
--- a/douzero/evaluation/deep_agent.py
+++ b/douzero/evaluation/deep_agent.py
@@ -49,7 +49,7 @@ class DeepAgent:
         x_batch = torch.from_numpy(obs['x_batch']).float()
         if torch.cuda.is_available():
             z_batch, x_batch = z_batch.cuda(), x_batch.cuda()
-        y_pred = self.model.forward(z_batch, x_batch, return_value=True)['values']
+        y_pred = self.model.forward(z_batch, x_batch)['values']
         y_pred = y_pred.detach().cpu().numpy()
 
         best_action_index = np.argmax(y_pred, axis=0)[0]