from . import move_detector as md, move_selector as ms from .move_generator import MovesGener import random import pickle EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q', 13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'} RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12, 'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30} cards_idx = [x for x in range(3, 15)] cards_idx.extend([17, 20, 30]) bombs = [[[x] * 6 for x in cards_idx[:-2]], [[x] * 8 for x in cards_idx[:-2]], [[x] * 4 for x in cards_idx[:-2]]] # Rocket bomb bombs[0].extend([[x] * 7 for x in cards_idx[:-2]]) # King bomb bombs[1].extend([[20, 20, 30, 30]]) # Normal bomb bombs[2].extend([[x] * 5 for x in cards_idx[:-2]]) def get_legal_card_play_actions(player_hand_cards, rival_move): mg = MovesGener(player_hand_cards) rival_type = md.get_move_type(rival_move) rival_move_type = rival_type['type'] rival_move_len = rival_type.get('len', 1) moves = list() if rival_move_type == md.TYPE_0_PASS: moves = mg.gen_moves() elif rival_move_type == md.TYPE_1_SINGLE: all_moves = mg.gen_type_1_single() moves = ms.filter_type_1_single(all_moves, rival_move) elif rival_move_type == md.TYPE_2_PAIR: all_moves = mg.gen_type_2_pair() moves = ms.filter_type_2_pair(all_moves, rival_move) elif rival_move_type == md.TYPE_3_TRIPLE: all_moves = mg.gen_type_3_triple() moves = ms.filter_type_3_triple(all_moves, rival_move) elif rival_move_type == md.TYPE_4_BOMB: all_moves = mg.gen_type_4_bomb(4) moves = ms.filter_type_4_bomb(all_moves, rival_move) moves += mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb() elif rival_move_type == md.TYPE_4_BOMB5: all_moves = mg.gen_type_4_bomb(5) moves = ms.filter_type_4_bomb(all_moves, rival_move) moves += mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb() elif rival_move_type == md.TYPE_4_BOMB6: all_moves = mg.gen_type_4_bomb(6) moves = ms.filter_type_4_bomb(all_moves, rival_move) moves += mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb() elif rival_move_type == md.TYPE_4_BOMB7: all_moves = mg.gen_type_4_bomb(7) moves = ms.filter_type_4_bomb(all_moves, rival_move) moves += mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb() elif rival_move_type == md.TYPE_4_BOMB8: all_moves = mg.gen_type_4_bomb(8) moves = ms.filter_type_4_bomb(all_moves, rival_move) moves += mg.gen_type_5_king_bomb() elif rival_move_type == md.TYPE_5_KING_BOMB: moves = [] # elif rival_move_type == md.TYPE_6_3_1: # all_moves = mg.gen_type_6_3_1() # moves = ms.filter_type_6_3_1(all_moves, rival_move) elif rival_move_type == md.TYPE_7_3_2: all_moves = mg.gen_type_7_3_2() moves = ms.filter_type_7_3_2(all_moves, rival_move) elif rival_move_type == md.TYPE_8_SERIAL_SINGLE: all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len) moves = ms.filter_type_8_serial_single(all_moves, rival_move) elif rival_move_type == md.TYPE_9_SERIAL_PAIR: all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len) moves = ms.filter_type_9_serial_pair(all_moves, rival_move) elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE: all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len) moves = ms.filter_type_10_serial_triple(all_moves, rival_move) # elif rival_move_type == md.TYPE_11_SERIAL_3_1: # all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len) # moves = ms.filter_type_11_serial_3_1(all_moves, rival_move) elif rival_move_type == md.TYPE_12_SERIAL_3_2: all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len) moves = ms.filter_type_12_serial_3_2(all_moves, rival_move) # elif rival_move_type == md.TYPE_13_4_2: # all_moves = mg.gen_type_13_4_2() # moves = ms.filter_type_13_4_2(all_moves, rival_move) # elif rival_move_type == md.TYPE_14_4_22: # all_moves = mg.gen_type_14_4_22() # moves = ms.filter_type_14_4_22(all_moves, rival_move) if rival_move_type != md.TYPE_0_PASS and rival_move_type < md.TYPE_4_BOMB: moves = moves + mg.gen_type_4_bomb(4) + mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb() if len(rival_move) != 0: # rival_move is not 'pass' moves = moves + [[]] for m in moves: m.sort() return moves class GameEnv(object): def __init__(self, players): self.card_play_action_seq = [] self.three_landlord_cards = None self.three_landlord_cards_all = None self.game_over = False self.acting_player_position = None self.player_utility_dict = None self.players = players self.last_move_dict = {'landlord': [], 'landlord_up': [], 'landlord_front': [], 'landlord_down': []} self.played_cards = {'landlord': [], 'landlord_up': [], 'landlord_front': [], 'landlord_down': []} self.last_move = [] self.last_two_moves = [] self.num_wins = {'landlord': 0, 'farmer': 0} self.num_scores = {'landlord': 0, 'farmer': 0} self.info_sets = {'landlord': InfoSet('landlord'), 'landlord_up': InfoSet('landlord_up'), 'landlord_front': InfoSet('landlord_front'), 'landlord_down': InfoSet('landlord_down')} self.bomb_num = [0, 0, 0] self.pos_bomb_num = { "landlord": 0, "landlord_up": 0, "landlord_front": 0, "landlord_down": 0 } self.last_pid = 'landlord' self.face_up_level = 0 self.step_count = 0 def card_play_init(self, card_play_data): if 'play' in card_play_data.keys(): self.info_sets['landlord'].player_hand_cards = card_play_data['play']['landlord'] self.info_sets['landlord_up'].player_hand_cards = card_play_data['play']['landlord_up'] self.info_sets['landlord_front'].player_hand_cards = card_play_data['play']['landlord_front'] self.info_sets['landlord_down'].player_hand_cards = card_play_data['play']['landlord_down'] else: self.info_sets['landlord'].player_hand_cards = \ card_play_data['landlord'] self.info_sets['landlord_up'].player_hand_cards = \ card_play_data['landlord_up'] self.info_sets['landlord_front'].player_hand_cards = \ card_play_data['landlord_front'] self.info_sets['landlord_down'].player_hand_cards = \ card_play_data['landlord_down'] if 'three_landlord_cards' not in card_play_data.keys(): self.three_landlord_cards = card_play_data['landlord'][25:33] self.three_landlord_cards_all = card_play_data['landlord'][25:33] else: self.three_landlord_cards = card_play_data['three_landlord_cards'][:] self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:] if 'face_up_level' in card_play_data.keys(): self.info_sets['landlord'].face_up_level = card_play_data['face_up_level'] self.info_sets['landlord_up'].face_up_level = card_play_data['face_up_level'] self.info_sets['landlord_front'].face_up_level = card_play_data['face_up_level'] self.info_sets['landlord_down'].face_up_level = card_play_data['face_up_level'] else: self.info_sets['landlord'].face_up_level = 0 self.info_sets['landlord_up'].face_up_level = 0 self.info_sets['landlord_front'].face_up_level = 0 self.info_sets['landlord_down'].face_up_level = 0 self.get_acting_player_position() self.game_infoset = self.get_infoset() def game_done(self): if len(self.info_sets['landlord'].player_hand_cards) == 0 or \ len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \ len(self.info_sets['landlord_front'].player_hand_cards) == 0 or \ len(self.info_sets['landlord_down'].player_hand_cards) == 0: # if one of the three players discards his hand, # then game is over. self.compute_player_utility() self.update_num_wins_scores() self.game_over = True def compute_player_utility(self): if len(self.info_sets['landlord'].player_hand_cards) == 0: self.player_utility_dict = {'landlord': 3, 'farmer': -1} else: self.player_utility_dict = {'landlord': -3, 'farmer': 1} def update_num_wins_scores(self): for pos, utility in self.player_utility_dict.items(): base_score = 3 if pos == 'landlord' else 1 if utility > 0: self.num_wins[pos] += 1 self.winner = pos self.num_scores[pos] += base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1]) else: self.num_scores[pos] -= base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1]) def get_winner(self): return self.winner def get_bomb_num(self): return self.bomb_num def step(self): action = self.players[self.acting_player_position].act( self.game_infoset) self.step_count += 1 if len(action) > 0: self.last_pid = self.acting_player_position if action in bombs[0]: self.bomb_num[0] += 1 self.pos_bomb_num[self.acting_player_position] += 1 if action in bombs[1]: self.bomb_num[1] += 1 self.pos_bomb_num[self.acting_player_position] += 1 if action in bombs[2]: self.bomb_num[2] += 1 self.last_move_dict[ self.acting_player_position] = action.copy() self.card_play_action_seq.append((self.acting_player_position, action)) self.update_acting_player_hand_cards(action) self.played_cards[self.acting_player_position] += action if self.acting_player_position == 'landlord' and \ len(action) > 0 and \ len(self.three_landlord_cards) > 0: for card in action: if len(self.three_landlord_cards) > 0: if card in self.three_landlord_cards: self.three_landlord_cards.remove(card) else: break self.game_done() if not self.game_over: self.get_acting_player_position() self.game_infoset = self.get_infoset() return action def get_last_move(self): last_move = [] if len(self.card_play_action_seq) != 0: if len(self.card_play_action_seq[-1]) == 0: if len(self.card_play_action_seq[-2]) == 0: last_move = self.card_play_action_seq[-3][1] else: last_move = self.card_play_action_seq[-2][1] else: last_move = self.card_play_action_seq[-1][1] return last_move def get_last_two_moves(self): last_two_moves = [[], []] for card in self.card_play_action_seq[-2:]: last_two_moves.insert(0, card) last_two_moves = last_two_moves[:2] return last_two_moves def get_acting_player_position(self): if self.acting_player_position is None: self.acting_player_position = 'landlord' else: if self.acting_player_position == 'landlord': self.acting_player_position = 'landlord_down' elif self.acting_player_position == 'landlord_down': self.acting_player_position = 'landlord_front' elif self.acting_player_position == 'landlord_front': self.acting_player_position = 'landlord_up' else: self.acting_player_position = 'landlord' return self.acting_player_position def update_acting_player_hand_cards(self, action): if action != []: for card in action: self.info_sets[ self.acting_player_position].player_hand_cards.remove(card) self.info_sets[self.acting_player_position].player_hand_cards.sort() def get_legal_card_play_actions(self): action_sequence = self.card_play_action_seq rival_move = [] if len(action_sequence) != 0: if len(action_sequence[-1][1]) == 0: if len(action_sequence[-2][1]) == 0: rival_move = action_sequence[-3][1] else: rival_move = action_sequence[-2][1] else: rival_move = action_sequence[-1][1] return get_legal_card_play_actions(self.info_sets[self.acting_player_position].player_hand_cards, rival_move) def reset(self): self.card_play_action_seq = [] self.three_landlord_cards = None self.three_landlord_cards_all = None self.game_over = False self.acting_player_position = None self.player_utility_dict = None self.last_move_dict = {'landlord': [], 'landlord_up': [], 'landlord_front': [], 'landlord_down': []} self.played_cards = {'landlord': [], 'landlord_up': [], 'landlord_front': [], 'landlord_down': []} self.last_move = [] self.last_two_moves = [] self.info_sets = {'landlord': InfoSet('landlord'), 'landlord_up': InfoSet('landlord_up'), 'landlord_front': InfoSet('landlord_front'), 'landlord_down': InfoSet('landlord_down')} self.info_sets['landlord'].face_up_level = self.face_up_level self.info_sets['landlord_up'].face_up_level = self.face_up_level self.info_sets['landlord_front'].face_up_level = self.face_up_level self.info_sets['landlord_down'].face_up_level = self.face_up_level self.bomb_num = [0, 0, 0] self.pos_bomb_num = { "landlord": 0, "landlord_up": 0, "landlord_front": 0, "landlord_down": 0 } self.step_count = 0 def get_infoset(self): self.info_sets[ self.acting_player_position].legal_actions = \ self.get_legal_card_play_actions() self.info_sets[ self.acting_player_position].bomb_num = self.bomb_num self.info_sets[ self.acting_player_position].last_move = self.get_last_move() self.info_sets[ self.acting_player_position].last_two_moves = self.get_last_two_moves() self.info_sets[ self.acting_player_position].last_move_dict = self.last_move_dict self.info_sets[self.acting_player_position].num_cards_left_dict = \ {pos: len(self.info_sets[pos].player_hand_cards) for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']} self.info_sets[self.acting_player_position].other_hand_cards = [] for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']: if pos != self.acting_player_position: self.info_sets[ self.acting_player_position].other_hand_cards += \ self.info_sets[pos].player_hand_cards self.info_sets[self.acting_player_position].played_cards = \ self.played_cards self.info_sets[self.acting_player_position].three_landlord_cards = \ self.three_landlord_cards self.info_sets[self.acting_player_position].three_landlord_cards_all = \ self.three_landlord_cards_all self.info_sets[self.acting_player_position].card_play_action_seq = \ self.card_play_action_seq self.info_sets[ self.acting_player_position].all_handcards = \ {pos: self.info_sets[pos].player_hand_cards for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']} return pickle.loads(pickle.dumps(self.info_sets[self.acting_player_position])) class InfoSet(object): """ The game state is described as infoset, which includes all the information in the current situation, such as the hand cards of the three players, the historical moves, etc. """ def __init__(self, player_position): # The player position, i.e., landlord, landlord_down, or landlord_up self.player_position = player_position # The hand cands of the current player. A list. self.player_hand_cards = None # The number of cards left for each player. It is a dict with str-->int self.num_cards_left_dict = None # The three landload cards. A list. self.three_landlord_cards = None self.three_landlord_cards_all = None # The historical moves. It is a list of list self.card_play_action_seq = None # The union of the hand cards of the other two players for the current player self.other_hand_cards = None # The legal actions for the current move. It is a list of list self.legal_actions = None # The most recent valid move self.last_move = None # The most recent two moves self.last_two_moves = None # The last moves for all the postions self.last_move_dict = None # The played cands so far. It is a list. self.played_cards = None # The hand cards of all the players. It is a dict. self.all_handcards = None # Last player position that plays a valid move, i.e., not `pass` self.last_pid = None # The number of bombs played so far self.bomb_num = None self.player_id = None # face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down self.face_up_level = 0