diff --git a/douzero/env/env.py b/douzero/env/env.py index 91947e2..550e8b2 100644 --- a/douzero/env/env.py +++ b/douzero/env/env.py @@ -283,17 +283,14 @@ class Env: self_bomb_num = self._env.pos_bomb_num[pos] if winner == 'landlord': if self.objective == 'adp': - return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] + self._env.multiply_count[pos]) /8 - return (2.0 ** bomb_num[0]) * (3.0 ** bomb_num[1]) + return (1.1 - self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8 elif self.objective == 'logadp': return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4 - return bomb_num[0] + bomb_num[1] + 1.0 else: return 1.0 - self._env.step_count * 0.0033 - return 1.0 else: if self.objective == 'adp': - return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] +self._env.multiply_count[pos]) /8 + return (-1.1 + self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8 elif self.objective == 'logadp': return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4 else: diff --git a/douzero/env/game.py b/douzero/env/game.py index 99c00d5..4afdc3f 100644 --- a/douzero/env/game.py +++ b/douzero/env/game.py @@ -139,8 +139,7 @@ class GameEnv(object): def step(self): action = self.players[self.acting_player_position].act( self.game_infoset) - assert action in self.game_infoset.legal_actions - + self.step_count += 1 if len(action) > 0: self.last_pid = self.acting_player_position