diff --git a/douzero/env/env.py b/douzero/env/env.py index 75b2bd2..822ee32 100644 --- a/douzero/env/env.py +++ b/douzero/env/env.py @@ -46,6 +46,7 @@ class Env: will perform the actual action in the game engine. """ self.objective = objective + self.use_legacy = legacy_model self.use_general = not old_model # Initialize players @@ -93,7 +94,6 @@ class Env: } for key in card_play_data: card_play_data[key].sort() - card_play_data["landlord"].sort() player_ids = { 'landlord': 0, 'landlord_down': 1, @@ -150,18 +150,18 @@ class Env: self_bomb_num = self._env.pos_bomb_num[pos] if winner == 'landlord': if self.objective == 'adp': - return (1.1 - self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8 + return (1.1 - self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8 elif self.objective == 'logadp': - return (1.0 - self._env.step_count * 0.00125) * 1.3**self_bomb_num / 4 + return (1.0 - self._env.step_count * 0.0005) * 1.3**self_bomb_num / 4 else: - return 1.0 - self._env.step_count * 0.00125 + return 1.0 - self._env.step_count * 0.0005 else: if self.objective == 'adp': - return (-1.1 + self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8 + return (-1.1 + self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8 elif self.objective == 'logadp': - return (-1.0 + self._env.step_count * 0.00125) * 1.3**(self_bomb_num) / 4 + return (-1.0 + self._env.step_count * 0.0005) * 1.3**(self_bomb_num) / 4 else: - return -1.0 + self._env.step_count * 0.00125 + return -1.0 + self._env.step_count * 0.0005 @property def _game_infoset(self):