调整reward
This commit is contained in:
parent
dba179db0e
commit
7e190c2353
|
@ -46,6 +46,7 @@ class Env:
|
|||
will perform the actual action in the game engine.
|
||||
"""
|
||||
self.objective = objective
|
||||
self.use_legacy = legacy_model
|
||||
self.use_general = not old_model
|
||||
|
||||
# Initialize players
|
||||
|
@ -93,7 +94,6 @@ class Env:
|
|||
}
|
||||
for key in card_play_data:
|
||||
card_play_data[key].sort()
|
||||
card_play_data["landlord"].sort()
|
||||
player_ids = {
|
||||
'landlord': 0,
|
||||
'landlord_down': 1,
|
||||
|
@ -150,18 +150,18 @@ class Env:
|
|||
self_bomb_num = self._env.pos_bomb_num[pos]
|
||||
if winner == 'landlord':
|
||||
if self.objective == 'adp':
|
||||
return (1.1 - self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||
return (1.1 - self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||
elif self.objective == 'logadp':
|
||||
return (1.0 - self._env.step_count * 0.00125) * 1.3**self_bomb_num / 4
|
||||
return (1.0 - self._env.step_count * 0.0005) * 1.3**self_bomb_num / 4
|
||||
else:
|
||||
return 1.0 - self._env.step_count * 0.00125
|
||||
return 1.0 - self._env.step_count * 0.0005
|
||||
else:
|
||||
if self.objective == 'adp':
|
||||
return (-1.1 + self._env.step_count * 0.00125) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||
return (-1.1 + self._env.step_count * 0.0005) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||
elif self.objective == 'logadp':
|
||||
return (-1.0 + self._env.step_count * 0.00125) * 1.3**(self_bomb_num) / 4
|
||||
return (-1.0 + self._env.step_count * 0.0005) * 1.3**(self_bomb_num) / 4
|
||||
else:
|
||||
return -1.0 + self._env.step_count * 0.00125
|
||||
return -1.0 + self._env.step_count * 0.0005
|
||||
|
||||
@property
|
||||
def _game_infoset(self):
|
||||
|
|
Loading…
Reference in New Issue