diff --git a/douzero/env/env.py b/douzero/env/env.py index 85c9d4b..89fbd86 100644 --- a/douzero/env/env.py +++ b/douzero/env/env.py @@ -130,6 +130,7 @@ class Env: # Initialize the internal environment self._env = GameEnv(self.players) self.total_round = 0 + self.face_up_level = 0 self.infoset = None def reset(self, flags=None): @@ -153,6 +154,13 @@ class Env: } for key in card_play_data: card_play_data[key].sort() + rint = np.random.randint(0, 100) + if rint < 45: + face_up_level = 0 + elif rint < 90: + face_up_level = 0x01 + else: + face_up_level = 0x02 player_ids = { 'landlord': 0, 'landlord_down': 1, @@ -165,6 +173,7 @@ class Env: for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]: pid = player_ids[pos] self._env.info_sets[pos].player_id = pid + self._env.info_sets[pos].face_up_level = face_up_level self.infoset = self._game_infoset return get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model, self.use_unified) @@ -1065,10 +1074,7 @@ def _get_obs_general(infoset, position, compressed_form = False): } return obs -''' - face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down -''' -def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = 0): +def _get_obs_unified(infoset, position, compressed_form = True): num_legal_actions = len(infoset.legal_actions) my_handcards = _cards2array(infoset.player_hand_cards, compressed_form) my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], @@ -1104,7 +1110,7 @@ def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = landlord_down_played_cards = _cards2array( infoset.played_cards['landlord_down'], compressed_form) - if (face_up_level & 0x01) > 0: + if (infoset.face_up_level & 0x01) > 0: three_landlord_cards = _cards2array( infoset.three_landlord_cards, compressed_form) @@ -1117,28 +1123,28 @@ def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = three_landlord_cards_all = _cards2noise( infoset.three_landlord_cards_all, compressed_form) - if (face_up_level & 0x02) > 0: + if (infoset.face_up_level & 0x02) > 0: landlord_cards = _cards2array( infoset.all_handcards['landlord'], compressed_form) else: landlord_cards = _cards2noise( infoset.all_handcards['landlord'], compressed_form) - if (face_up_level & 0x04) > 0: + if (infoset.face_up_level & 0x04) > 0: landlord_up_cards = _cards2array( infoset.all_handcards['landlord_up'], compressed_form) else: landlord_up_cards = _cards2noise( infoset.all_handcards['landlord_up'], compressed_form) - if (face_up_level & 0x08) > 0: + if (infoset.face_up_level & 0x08) > 0: landlord_front_cards = _cards2array( infoset.all_handcards['landlord_front'], compressed_form) else: landlord_front_cards = _cards2noise( infoset.all_handcards['landlord_front'], compressed_form) - if (face_up_level & 0x10) > 0: + if (infoset.face_up_level & 0x10) > 0: landlord_down_cards = _cards2array( infoset.all_handcards['landlord_down'], compressed_form) else: @@ -1149,7 +1155,7 @@ def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = infoset.bomb_num, compressed_form=compressed_form) # 56/95 base_info = np.hstack(( PositionInfoArray[position], # 4 - FaceUpLevelArray[face_up_level], # 9 + FaceUpLevelArray[infoset.face_up_level], # 9 bomb_num, #56 )) num_cards_left = np.hstack(( diff --git a/douzero/env/game.py b/douzero/env/game.py index df77d1a..4032bf2 100644 --- a/douzero/env/game.py +++ b/douzero/env/game.py @@ -168,6 +168,7 @@ class GameEnv(object): "landlord_down": 0 } self.last_pid = 'landlord' + self.face_up_level = 0 self.step_count = 0 @@ -186,12 +187,22 @@ class GameEnv(object): card_play_data['landlord_front'] self.info_sets['landlord_down'].player_hand_cards = \ card_play_data['landlord_down'] - if 'three_landlord_cards' not in card_play_data.keys(): - self.three_landlord_cards = card_play_data['landlord'][25:33] - self.three_landlord_cards_all = card_play_data['landlord'][25:33] - else: - self.three_landlord_cards = card_play_data['three_landlord_cards'][:] - self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:] + if 'three_landlord_cards' not in card_play_data.keys(): + self.three_landlord_cards = card_play_data['landlord'][25:33] + self.three_landlord_cards_all = card_play_data['landlord'][25:33] + else: + self.three_landlord_cards = card_play_data['three_landlord_cards'][:] + self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:] + if 'face_up_level' in card_play_data.keys(): + self.info_sets['landlord'].face_up_level = card_play_data['face_up_level'] + self.info_sets['landlord_up'].face_up_level = card_play_data['face_up_level'] + self.info_sets['landlord_front'].face_up_level = card_play_data['face_up_level'] + self.info_sets['landlord_down'].face_up_level = card_play_data['face_up_level'] + else: + self.info_sets['landlord'].face_up_level = 0 + self.info_sets['landlord_up'].face_up_level = 0 + self.info_sets['landlord_front'].face_up_level = 0 + self.info_sets['landlord_down'].face_up_level = 0 self.get_acting_player_position() self.game_infoset = self.get_infoset() @@ -364,6 +375,11 @@ class GameEnv(object): 'landlord_front': InfoSet('landlord_front'), 'landlord_down': InfoSet('landlord_down')} + self.info_sets['landlord'].face_up_level = self.face_up_level + self.info_sets['landlord_up'].face_up_level = self.face_up_level + self.info_sets['landlord_front'].face_up_level = self.face_up_level + self.info_sets['landlord_down'].face_up_level = self.face_up_level + self.bomb_num = [0, 0, 0] self.pos_bomb_num = { "landlord": 0, @@ -456,3 +472,5 @@ class InfoSet(object): # The number of bombs played so far self.bomb_num = None self.player_id = None + # face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down + self.face_up_level = 0