随机切换明牌模式进行训练
This commit is contained in:
parent
16198fbb65
commit
9b9b9219e0
|
@ -130,6 +130,7 @@ class Env:
|
||||||
# Initialize the internal environment
|
# Initialize the internal environment
|
||||||
self._env = GameEnv(self.players)
|
self._env = GameEnv(self.players)
|
||||||
self.total_round = 0
|
self.total_round = 0
|
||||||
|
self.face_up_level = 0
|
||||||
self.infoset = None
|
self.infoset = None
|
||||||
|
|
||||||
def reset(self, flags=None):
|
def reset(self, flags=None):
|
||||||
|
@ -153,6 +154,13 @@ class Env:
|
||||||
}
|
}
|
||||||
for key in card_play_data:
|
for key in card_play_data:
|
||||||
card_play_data[key].sort()
|
card_play_data[key].sort()
|
||||||
|
rint = np.random.randint(0, 100)
|
||||||
|
if rint < 45:
|
||||||
|
face_up_level = 0
|
||||||
|
elif rint < 90:
|
||||||
|
face_up_level = 0x01
|
||||||
|
else:
|
||||||
|
face_up_level = 0x02
|
||||||
player_ids = {
|
player_ids = {
|
||||||
'landlord': 0,
|
'landlord': 0,
|
||||||
'landlord_down': 1,
|
'landlord_down': 1,
|
||||||
|
@ -165,6 +173,7 @@ class Env:
|
||||||
for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
|
for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
|
||||||
pid = player_ids[pos]
|
pid = player_ids[pos]
|
||||||
self._env.info_sets[pos].player_id = pid
|
self._env.info_sets[pos].player_id = pid
|
||||||
|
self._env.info_sets[pos].face_up_level = face_up_level
|
||||||
self.infoset = self._game_infoset
|
self.infoset = self._game_infoset
|
||||||
|
|
||||||
return get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model, self.use_unified)
|
return get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model, self.use_unified)
|
||||||
|
@ -1065,10 +1074,7 @@ def _get_obs_general(infoset, position, compressed_form = False):
|
||||||
}
|
}
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
'''
|
def _get_obs_unified(infoset, position, compressed_form = True):
|
||||||
face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down
|
|
||||||
'''
|
|
||||||
def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = 0):
|
|
||||||
num_legal_actions = len(infoset.legal_actions)
|
num_legal_actions = len(infoset.legal_actions)
|
||||||
my_handcards = _cards2array(infoset.player_hand_cards, compressed_form)
|
my_handcards = _cards2array(infoset.player_hand_cards, compressed_form)
|
||||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||||
|
@ -1104,7 +1110,7 @@ def _get_obs_unified(infoset, position, compressed_form = True, face_up_level =
|
||||||
landlord_down_played_cards = _cards2array(
|
landlord_down_played_cards = _cards2array(
|
||||||
infoset.played_cards['landlord_down'], compressed_form)
|
infoset.played_cards['landlord_down'], compressed_form)
|
||||||
|
|
||||||
if (face_up_level & 0x01) > 0:
|
if (infoset.face_up_level & 0x01) > 0:
|
||||||
three_landlord_cards = _cards2array(
|
three_landlord_cards = _cards2array(
|
||||||
infoset.three_landlord_cards, compressed_form)
|
infoset.three_landlord_cards, compressed_form)
|
||||||
|
|
||||||
|
@ -1117,28 +1123,28 @@ def _get_obs_unified(infoset, position, compressed_form = True, face_up_level =
|
||||||
three_landlord_cards_all = _cards2noise(
|
three_landlord_cards_all = _cards2noise(
|
||||||
infoset.three_landlord_cards_all, compressed_form)
|
infoset.three_landlord_cards_all, compressed_form)
|
||||||
|
|
||||||
if (face_up_level & 0x02) > 0:
|
if (infoset.face_up_level & 0x02) > 0:
|
||||||
landlord_cards = _cards2array(
|
landlord_cards = _cards2array(
|
||||||
infoset.all_handcards['landlord'], compressed_form)
|
infoset.all_handcards['landlord'], compressed_form)
|
||||||
else:
|
else:
|
||||||
landlord_cards = _cards2noise(
|
landlord_cards = _cards2noise(
|
||||||
infoset.all_handcards['landlord'], compressed_form)
|
infoset.all_handcards['landlord'], compressed_form)
|
||||||
|
|
||||||
if (face_up_level & 0x04) > 0:
|
if (infoset.face_up_level & 0x04) > 0:
|
||||||
landlord_up_cards = _cards2array(
|
landlord_up_cards = _cards2array(
|
||||||
infoset.all_handcards['landlord_up'], compressed_form)
|
infoset.all_handcards['landlord_up'], compressed_form)
|
||||||
else:
|
else:
|
||||||
landlord_up_cards = _cards2noise(
|
landlord_up_cards = _cards2noise(
|
||||||
infoset.all_handcards['landlord_up'], compressed_form)
|
infoset.all_handcards['landlord_up'], compressed_form)
|
||||||
|
|
||||||
if (face_up_level & 0x08) > 0:
|
if (infoset.face_up_level & 0x08) > 0:
|
||||||
landlord_front_cards = _cards2array(
|
landlord_front_cards = _cards2array(
|
||||||
infoset.all_handcards['landlord_front'], compressed_form)
|
infoset.all_handcards['landlord_front'], compressed_form)
|
||||||
else:
|
else:
|
||||||
landlord_front_cards = _cards2noise(
|
landlord_front_cards = _cards2noise(
|
||||||
infoset.all_handcards['landlord_front'], compressed_form)
|
infoset.all_handcards['landlord_front'], compressed_form)
|
||||||
|
|
||||||
if (face_up_level & 0x10) > 0:
|
if (infoset.face_up_level & 0x10) > 0:
|
||||||
landlord_down_cards = _cards2array(
|
landlord_down_cards = _cards2array(
|
||||||
infoset.all_handcards['landlord_down'], compressed_form)
|
infoset.all_handcards['landlord_down'], compressed_form)
|
||||||
else:
|
else:
|
||||||
|
@ -1149,7 +1155,7 @@ def _get_obs_unified(infoset, position, compressed_form = True, face_up_level =
|
||||||
infoset.bomb_num, compressed_form=compressed_form) # 56/95
|
infoset.bomb_num, compressed_form=compressed_form) # 56/95
|
||||||
base_info = np.hstack((
|
base_info = np.hstack((
|
||||||
PositionInfoArray[position], # 4
|
PositionInfoArray[position], # 4
|
||||||
FaceUpLevelArray[face_up_level], # 9
|
FaceUpLevelArray[infoset.face_up_level], # 9
|
||||||
bomb_num, #56
|
bomb_num, #56
|
||||||
))
|
))
|
||||||
num_cards_left = np.hstack((
|
num_cards_left = np.hstack((
|
||||||
|
|
|
@ -168,6 +168,7 @@ class GameEnv(object):
|
||||||
"landlord_down": 0
|
"landlord_down": 0
|
||||||
}
|
}
|
||||||
self.last_pid = 'landlord'
|
self.last_pid = 'landlord'
|
||||||
|
self.face_up_level = 0
|
||||||
self.step_count = 0
|
self.step_count = 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -192,6 +193,16 @@ class GameEnv(object):
|
||||||
else:
|
else:
|
||||||
self.three_landlord_cards = card_play_data['three_landlord_cards'][:]
|
self.three_landlord_cards = card_play_data['three_landlord_cards'][:]
|
||||||
self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:]
|
self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:]
|
||||||
|
if 'face_up_level' in card_play_data.keys():
|
||||||
|
self.info_sets['landlord'].face_up_level = card_play_data['face_up_level']
|
||||||
|
self.info_sets['landlord_up'].face_up_level = card_play_data['face_up_level']
|
||||||
|
self.info_sets['landlord_front'].face_up_level = card_play_data['face_up_level']
|
||||||
|
self.info_sets['landlord_down'].face_up_level = card_play_data['face_up_level']
|
||||||
|
else:
|
||||||
|
self.info_sets['landlord'].face_up_level = 0
|
||||||
|
self.info_sets['landlord_up'].face_up_level = 0
|
||||||
|
self.info_sets['landlord_front'].face_up_level = 0
|
||||||
|
self.info_sets['landlord_down'].face_up_level = 0
|
||||||
self.get_acting_player_position()
|
self.get_acting_player_position()
|
||||||
self.game_infoset = self.get_infoset()
|
self.game_infoset = self.get_infoset()
|
||||||
|
|
||||||
|
@ -364,6 +375,11 @@ class GameEnv(object):
|
||||||
'landlord_front': InfoSet('landlord_front'),
|
'landlord_front': InfoSet('landlord_front'),
|
||||||
'landlord_down': InfoSet('landlord_down')}
|
'landlord_down': InfoSet('landlord_down')}
|
||||||
|
|
||||||
|
self.info_sets['landlord'].face_up_level = self.face_up_level
|
||||||
|
self.info_sets['landlord_up'].face_up_level = self.face_up_level
|
||||||
|
self.info_sets['landlord_front'].face_up_level = self.face_up_level
|
||||||
|
self.info_sets['landlord_down'].face_up_level = self.face_up_level
|
||||||
|
|
||||||
self.bomb_num = [0, 0, 0]
|
self.bomb_num = [0, 0, 0]
|
||||||
self.pos_bomb_num = {
|
self.pos_bomb_num = {
|
||||||
"landlord": 0,
|
"landlord": 0,
|
||||||
|
@ -456,3 +472,5 @@ class InfoSet(object):
|
||||||
# The number of bombs played so far
|
# The number of bombs played so far
|
||||||
self.bomb_num = None
|
self.bomb_num = None
|
||||||
self.player_id = None
|
self.player_id = None
|
||||||
|
# face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down
|
||||||
|
self.face_up_level = 0
|
||||||
|
|
Loading…
Reference in New Issue