调整激励算法
This commit is contained in:
parent
cfa9da6b2c
commit
c7f105d20d
|
@ -283,17 +283,14 @@ class Env:
|
||||||
self_bomb_num = self._env.pos_bomb_num[pos]
|
self_bomb_num = self._env.pos_bomb_num[pos]
|
||||||
if winner == 'landlord':
|
if winner == 'landlord':
|
||||||
if self.objective == 'adp':
|
if self.objective == 'adp':
|
||||||
return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] + self._env.multiply_count[pos]) /8
|
return (1.1 - self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||||
return (2.0 ** bomb_num[0]) * (3.0 ** bomb_num[1])
|
|
||||||
elif self.objective == 'logadp':
|
elif self.objective == 'logadp':
|
||||||
return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
|
return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
|
||||||
return bomb_num[0] + bomb_num[1] + 1.0
|
|
||||||
else:
|
else:
|
||||||
return 1.0 - self._env.step_count * 0.0033
|
return 1.0 - self._env.step_count * 0.0033
|
||||||
return 1.0
|
|
||||||
else:
|
else:
|
||||||
if self.objective == 'adp':
|
if self.objective == 'adp':
|
||||||
return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] +self._env.multiply_count[pos]) /8
|
return (-1.1 + self._env.step_count * 0.0033) * (1.3 ** bomb_num[0]) * (1.95 ** bomb_num[1]) / 8
|
||||||
elif self.objective == 'logadp':
|
elif self.objective == 'logadp':
|
||||||
return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4
|
return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -139,8 +139,7 @@ class GameEnv(object):
|
||||||
def step(self):
|
def step(self):
|
||||||
action = self.players[self.acting_player_position].act(
|
action = self.players[self.acting_player_position].act(
|
||||||
self.game_infoset)
|
self.game_infoset)
|
||||||
assert action in self.game_infoset.legal_actions
|
self.step_count += 1
|
||||||
|
|
||||||
if len(action) > 0:
|
if len(action) > 0:
|
||||||
self.last_pid = self.acting_player_position
|
self.last_pid = self.acting_player_position
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue