调整reward

2021-12-20 16:04:33 +08:00 · 2021-12-20 16:04:33 +08:00 · 7e190c2353
parent dba179db0e
commit 7e190c2353
1 changed files with 7 additions and 7 deletions
--- a/douzero/env/env.py
+++ b/douzero/env/env.py
@ -46,6 +46,7 @@ class Env:
        will perform the actual action in the game engine.
        """
        self.objective = objective
+        self.use_legacy = legacy_model
        self.use_general = not old_model

        # Initialize players
@ -93,7 +94,6 @@ class Env:
                              }
            for key in card_play_data:
                card_play_data[key].sort()
-            card_play_data["landlord"].sort()
            player_ids = {
                'landlord': 0,
                'landlord_down': 1,
@ -150,18 +150,18 @@ class Env:
        self_bomb_num = self._env.pos_bomb_num[pos]
        if winner == 'landlord':
            if self.objective == 'adp':
-                return (1.1 - self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
+                return (1.1 - self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
            elif self.objective == 'logadp':
-                return (1.0 - self._env.step_count * 0.00125) * 1.3**self_bomb_num / 4
+                return (1.0 - self._env.step_count * 0.0005) * 1.3**self_bomb_num / 4
            else:
-                return 1.0 - self._env.step_count * 0.00125
+                return 1.0 - self._env.step_count * 0.0005
        else:
            if self.objective == 'adp':
-                return (-1.1 + self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
+                return (-1.1 + self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
            elif self.objective == 'logadp':
-                return (-1.0 + self._env.step_count * 0.00125) * 1.3**(self_bomb_num) / 4
+                return (-1.0 + self._env.step_count * 0.0005) * 1.3**(self_bomb_num) / 4
            else:
-                return -1.0 + self._env.step_count * 0.00125
+                return -1.0 + self._env.step_count * 0.0005

    @property
    def _game_infoset(self):