调整激励算法

This commit is contained in:
zhiyang7 2021-12-07 10:33:18 +08:00
parent cfa9da6b2c
commit c7f105d20d
2 changed files with 3 additions and 7 deletions

7
douzero/env/env.py vendored
View File

@ -283,17 +283,14 @@ class Env:
self_bomb_num = self._env.pos_bomb_num[pos]
if winner == 'landlord':
if self.objective == 'adp':
return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] + self._env.multiply_count[pos]) /8
return (2.0 ** bomb_num[0]) * (3.0 ** bomb_num[1])
return (1.1 - self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
elif self.objective == 'logadp':
return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
return bomb_num[0] + bomb_num[1] + 1.0
else:
return 1.0 - self._env.step_count * 0.0033
return 1.0
else:
if self.objective == 'adp':
return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] +self._env.multiply_count[pos]) /8
return (-1.1 + self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
elif self.objective == 'logadp':
return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4
else:

3
douzero/env/game.py vendored
View File

@ -139,8 +139,7 @@ class GameEnv(object):
def step(self):
action = self.players[self.acting_player_position].act(
self.game_infoset)
assert action in self.game_infoset.legal_actions
self.step_count += 1
if len(action) > 0:
self.last_pid = self.acting_player_position