调整激励算法
This commit is contained in:
parent
cfa9da6b2c
commit
c7f105d20d
|
@ -283,17 +283,14 @@ class Env:
|
|||
self_bomb_num = self._env.pos_bomb_num[pos]
|
||||
if winner == 'landlord':
|
||||
if self.objective == 'adp':
|
||||
return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] + self._env.multiply_count[pos]) /8
|
||||
return (2.0 ** bomb_num[0]) * (3.0 ** bomb_num[1])
|
||||
return (1.1 - self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||
elif self.objective == 'logadp':
|
||||
return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
|
||||
return bomb_num[0] + bomb_num[1] + 1.0
|
||||
else:
|
||||
return 1.0 - self._env.step_count * 0.0033
|
||||
return 1.0
|
||||
else:
|
||||
if self.objective == 'adp':
|
||||
return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] +self._env.multiply_count[pos]) /8
|
||||
return (-1.1 + self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||
elif self.objective == 'logadp':
|
||||
return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4
|
||||
else:
|
||||
|
|
|
@ -139,8 +139,7 @@ class GameEnv(object):
|
|||
def step(self):
|
||||
action = self.players[self.acting_player_position].act(
|
||||
self.game_infoset)
|
||||
assert action in self.game_infoset.legal_actions
|
||||
|
||||
self.step_count += 1
|
||||
if len(action) > 0:
|
||||
self.last_pid = self.acting_player_position
|
||||
|
||||
|
|
Loading…
Reference in New Issue