修复BUG

2021-12-15 10:03:26 +08:00 · 2021-12-15 10:03:26 +08:00 · 6fa590a697
parent 9f2e8f74f3
commit 6fa590a697
4 changed files with 7 additions and 10 deletions
--- a/douzero/dmc/dmc.py
+++ b/douzero/dmc/dmc.py
@ -19,7 +19,6 @@ import psutil
 import shutil

 mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']}
-onnx_frame = mp.Value('d', -1)

 def compute_loss(logits, targets):
    loss = ((logits.squeeze(-1) - targets)**2).mean()
@ -80,7 +79,6 @@ def train(flags):
    Then it will start subprocesses as actors. Then, it will call
    learning function with  multiple threads.
    """
-    global onnx_frame
    if not flags.actor_device_cpu or flags.training_device != 'cpu':
        if not torch.cuda.is_available():
            raise AssertionError("CUDA not available. If you have GPUs, please specify the ID after `--gpu_devices`. Otherwise, please train with CPU with `python3 train.py --actor_device_cpu --training_device cpu`")
@ -116,6 +114,7 @@ def train(flags):
    actor_processes = []
    ctx = mp.get_context('spawn')
    batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), 'landlord_front': ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()}
+    onnx_frame = ctx.Value('d', -1)

    # Learner model for training
    if flags.old_model:
@ -216,7 +215,6 @@ def train(flags):
                threads.append(thread)

    def checkpoint(frames):
-        global onnx_frame
        if flags.disable_checkpoint:
            return
        log.info('Saving checkpoint to %s', checkpointpath)
@ -256,7 +254,7 @@ def train(flags):
                        }
                    }
                )
-        onnx_frame = frames
+        onnx_frame.value = frames
        shutil.move(checkpointpath + '.new', checkpointpath)


--- a/douzero/dmc/models.py
+++ b/douzero/dmc/models.py
@ -495,7 +495,7 @@ class Model:
        self.models['bidding'] = BidModel().to(torch.device(device))

    def set_onnx_model(self, position, model_path):
-        self.onnx_models[position] = get_example(model_path)
+        self.onnx_models[position] = onnxruntime.InferenceSession(get_example(model_path))

    def forward(self, position, z, x, return_value=False, flags=None, debug=False):
        model = self.onnx_models[position]
@ -503,8 +503,7 @@ class Model:
            model = self.models[position]
            values = model.forward(z, x)['values']
        else:
-            sess = onnxruntime.InferenceSession(model)
-            onnx_out = sess.run(None, {'z_batch': to_numpy(z), 'x_batch': to_numpy(x)})
+            onnx_out = model.run(None, {'z_batch': to_numpy(z), 'x_batch': to_numpy(x)})
            values = torch.tensor(onnx_out[0])
        if return_value:
            return dict(values=values)
--- a/douzero/dmc/utils.py
+++ b/douzero/dmc/utils.py
@ -113,8 +113,8 @@ def act(i, device, batch_queues, model, flags, onnx_frame):
        last_onnx_frame = -1
        while True:
            # print("posi", position)
-            if onnx_frame != last_onnx_frame:
-                last_onnx_frame = onnx_frame
+            if onnx_frame.value != last_onnx_frame:
+                last_onnx_frame = onnx_frame.value
                for p in positions:
                    if p != 'bidding':
                        model_path = '%s/%s/model_%s.onnx' % (flags.savedir, flags.xpid, p)
--- a/douzero/evaluation/deep_agent.py
+++ b/douzero/evaluation/deep_agent.py
@ -49,7 +49,7 @@ class DeepAgent:
        x_batch = torch.from_numpy(obs['x_batch']).float()
        if torch.cuda.is_available():
            z_batch, x_batch = z_batch.cuda(), x_batch.cuda()
-        y_pred = self.model.forward(z_batch, x_batch, return_value=True)['values']
+        y_pred = self.model.forward(z_batch, x_batch)['values']
        y_pred = y_pred.detach().cpu().numpy()

        best_action_index = np.argmax(y_pred, axis=0)[0]