调整reward

This commit is contained in:
zhiyang7 2021-12-20 16:04:33 +08:00
parent dba179db0e
commit 7e190c2353
1 changed files with 7 additions and 7 deletions

14
douzero/env/env.py vendored
View File

@ -46,6 +46,7 @@ class Env:
will perform the actual action in the game engine.
"""
self.objective = objective
self.use_legacy = legacy_model
self.use_general = not old_model
# Initialize players
@ -93,7 +94,6 @@ class Env:
}
for key in card_play_data:
card_play_data[key].sort()
card_play_data["landlord"].sort()
player_ids = {
'landlord': 0,
'landlord_down': 1,
@ -150,18 +150,18 @@ class Env:
self_bomb_num = self._env.pos_bomb_num[pos]
if winner == 'landlord':
if self.objective == 'adp':
return (1.1 - self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
return (1.1 - self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
elif self.objective == 'logadp':
return (1.0 - self._env.step_count * 0.00125) * 1.3**self_bomb_num / 4
return (1.0 - self._env.step_count * 0.0005) * 1.3**self_bomb_num / 4
else:
return 1.0 - self._env.step_count * 0.00125
return 1.0 - self._env.step_count * 0.0005
else:
if self.objective == 'adp':
return (-1.1 + self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
return (-1.1 + self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
elif self.objective == 'logadp':
return (-1.0 + self._env.step_count * 0.00125) * 1.3**(self_bomb_num) / 4
return (-1.0 + self._env.step_count * 0.0005) * 1.3**(self_bomb_num) / 4
else:
return -1.0 + self._env.step_count * 0.00125
return -1.0 + self._env.step_count * 0.0005
@property
def _game_infoset(self):