Douzero_Resnet/douzero/env/game.py

from . import move_detector as md, move_selector as ms
from .move_generator import MovesGener
import random
import pickle

EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
                    8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q',
                    13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'}

RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
                    '8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
                    'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}

cards_idx = [x for x in range(3, 15)]
cards_idx.extend([17, 20, 30])

bombs = [[[x] * 6 for x in cards_idx[:-2]], [[x] * 8 for x in cards_idx[:-2]], [[x] * 4 for x in cards_idx[:-2]]]
# Rocket bomb
bombs[0].extend([[x] * 7 for x in cards_idx[:-2]])
# King bomb
bombs[1].extend([[20, 20, 30, 30]])
# Normal bomb
bombs[2].extend([[x] * 5 for x in cards_idx[:-2]])


def get_legal_card_play_actions(player_hand_cards, rival_move):
    mg = MovesGener(player_hand_cards)

    rival_type = md.get_move_type(rival_move)
    rival_move_type = rival_type['type']
    rival_move_len = rival_type.get('len', 1)
    moves = list()

    if rival_move_type == md.TYPE_0_PASS:
        moves = mg.gen_moves()

    elif rival_move_type == md.TYPE_1_SINGLE:
        all_moves = mg.gen_type_1_single()
        moves = ms.filter_type_1_single(all_moves, rival_move)

    elif rival_move_type == md.TYPE_2_PAIR:
        all_moves = mg.gen_type_2_pair()
        moves = ms.filter_type_2_pair(all_moves, rival_move)

    elif rival_move_type == md.TYPE_3_TRIPLE:
        all_moves = mg.gen_type_3_triple()
        moves = ms.filter_type_3_triple(all_moves, rival_move)

    elif rival_move_type == md.TYPE_4_BOMB:
        all_moves = mg.gen_type_4_bomb(4)
        moves = ms.filter_type_4_bomb(all_moves, rival_move)
        moves += mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()

    elif rival_move_type == md.TYPE_4_BOMB5:
        all_moves = mg.gen_type_4_bomb(5)
        moves = ms.filter_type_4_bomb(all_moves, rival_move)
        moves += mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()

    elif rival_move_type == md.TYPE_4_BOMB6:
        all_moves = mg.gen_type_4_bomb(6)
        moves = ms.filter_type_4_bomb(all_moves, rival_move)
        moves += mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()

    elif rival_move_type == md.TYPE_4_BOMB7:
        all_moves = mg.gen_type_4_bomb(7)
        moves = ms.filter_type_4_bomb(all_moves, rival_move)
        moves += mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()

    elif rival_move_type == md.TYPE_4_BOMB8:
        all_moves = mg.gen_type_4_bomb(8)
        moves = ms.filter_type_4_bomb(all_moves, rival_move)
        moves += mg.gen_type_5_king_bomb()

    elif rival_move_type == md.TYPE_5_KING_BOMB:
        moves = []

    # elif rival_move_type == md.TYPE_6_3_1:
    #     all_moves = mg.gen_type_6_3_1()
    #     moves = ms.filter_type_6_3_1(all_moves, rival_move)

    elif rival_move_type == md.TYPE_7_3_2:
        all_moves = mg.gen_type_7_3_2()
        moves = ms.filter_type_7_3_2(all_moves, rival_move)

    elif rival_move_type == md.TYPE_8_SERIAL_SINGLE:
        all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len)
        moves = ms.filter_type_8_serial_single(all_moves, rival_move)

    elif rival_move_type == md.TYPE_9_SERIAL_PAIR:
        all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len)
        moves = ms.filter_type_9_serial_pair(all_moves, rival_move)

    elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE:
        all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
        moves = ms.filter_type_10_serial_triple(all_moves, rival_move)

    # elif rival_move_type == md.TYPE_11_SERIAL_3_1:
    #     all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
    #     moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)

    elif rival_move_type == md.TYPE_12_SERIAL_3_2:
        all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
        moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)

    # elif rival_move_type == md.TYPE_13_4_2:
    #     all_moves = mg.gen_type_13_4_2()
    #     moves = ms.filter_type_13_4_2(all_moves, rival_move)

    # elif rival_move_type == md.TYPE_14_4_22:
    #     all_moves = mg.gen_type_14_4_22()
    #     moves = ms.filter_type_14_4_22(all_moves, rival_move)

    if rival_move_type != md.TYPE_0_PASS and rival_move_type < md.TYPE_4_BOMB:
        moves = moves + mg.gen_type_4_bomb(4) + mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()

    if len(rival_move) != 0:  # rival_move is not 'pass'
        moves = moves + [[]]

    for m in moves:
        m.sort()

    return moves

class GameEnv(object):

    def __init__(self, players):

        self.card_play_action_seq = []

        # self.three_landlord_cards = None
        self.game_over = False

        self.acting_player_position = None
        self.player_utility_dict = None

        self.players = players

        self.last_move_dict = {'landlord': [],
                               'landlord_up': [],
                               'landlord_front': [],
                               'landlord_down': []}

        self.played_cards = {'landlord': [],
                             'landlord_up': [],
                             'landlord_front': [],
                             'landlord_down': []}

        self.last_move = []
        self.last_two_moves = []

        self.num_wins = {'landlord': 0,
                         'farmer': 0}

        self.num_scores = {'landlord': 0,
                           'farmer': 0}

        self.info_sets = {'landlord': InfoSet('landlord'),
                         'landlord_up': InfoSet('landlord_up'),
                         'landlord_front': InfoSet('landlord_front'),
                         'landlord_down': InfoSet('landlord_down')}

        self.bomb_num = [0, 0, 0]
        self.pos_bomb_num = {
            "landlord": 0,
            "landlord_up": 0,
            "landlord_front": 0,
            "landlord_down": 0
        }
        self.last_pid = 'landlord'
        self.step_count = 0


    def card_play_init(self, card_play_data):
        if 'play' in card_play_data.keys():
            self.info_sets['landlord'].player_hand_cards = card_play_data['play']['landlord']
            self.info_sets['landlord_up'].player_hand_cards = card_play_data['play']['landlord_up']
            self.info_sets['landlord_front'].player_hand_cards = card_play_data['play']['landlord_front']
            self.info_sets['landlord_down'].player_hand_cards = card_play_data['play']['landlord_down']
        else:
            self.info_sets['landlord'].player_hand_cards = \
                card_play_data['landlord']
            self.info_sets['landlord_up'].player_hand_cards = \
                card_play_data['landlord_up']
            self.info_sets['landlord_front'].player_hand_cards = \
                card_play_data['landlord_front']
            self.info_sets['landlord_down'].player_hand_cards = \
                card_play_data['landlord_down']
            # self.three_landlord_cards = card_play_data['three_landlord_cards']
        self.get_acting_player_position()
        self.game_infoset = self.get_infoset()


    def game_done(self):
        if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
                len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
                len(self.info_sets['landlord_front'].player_hand_cards) == 0 or \
                len(self.info_sets['landlord_down'].player_hand_cards) == 0:
            # if one of the three players discards his hand,
            # then game is over.
            self.compute_player_utility()
            self.update_num_wins_scores()

            self.game_over = True

    def compute_player_utility(self):

        if len(self.info_sets['landlord'].player_hand_cards) == 0:
            self.player_utility_dict = {'landlord': 3,
                                        'farmer': -1}
        else:
            self.player_utility_dict = {'landlord': -3,
                                        'farmer': 1}

    def update_num_wins_scores(self):
        for pos, utility in self.player_utility_dict.items():
            base_score = 3 if pos == 'landlord' else 1
            if utility > 0:
                self.num_wins[pos] += 1
                self.winner = pos
                self.num_scores[pos] += base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
            else:
                self.num_scores[pos] -= base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])

    def get_winner(self):
        return self.winner

    def get_bomb_num(self):
        return self.bomb_num

    def step(self):
        action = self.players[self.acting_player_position].act(
            self.game_infoset)
        self.step_count += 1
        if len(action) > 0:
            self.last_pid = self.acting_player_position

        if action in bombs[0]:
            self.bomb_num[0] += 1
            self.pos_bomb_num[self.acting_player_position] += 1

        if action in bombs[1]:
            self.bomb_num[1] += 1
            self.pos_bomb_num[self.acting_player_position] += 1

        if action in bombs[2]:
            self.bomb_num[2] += 1

        self.last_move_dict[
            self.acting_player_position] = action.copy()

        self.card_play_action_seq.append((self.acting_player_position, action))
        self.update_acting_player_hand_cards(action)

        self.played_cards[self.acting_player_position] += action

        # if self.acting_player_position == 'landlord' and \
        #         len(action) > 0 and \
        #         len(self.three_landlord_cards) > 0:
        #     for card in action:
        #         if len(self.three_landlord_cards) > 0:
        #             if card in self.three_landlord_cards:
        #                 self.three_landlord_cards.remove(card)
        #         else:
        #             break

        self.game_done()
        if not self.game_over:
            self.get_acting_player_position()
            self.game_infoset = self.get_infoset()
        return action

    def get_last_move(self):
        last_move = []
        if len(self.card_play_action_seq) != 0:
            if len(self.card_play_action_seq[-1]) == 0:
                if len(self.card_play_action_seq[-2]) == 0:
                    last_move = self.card_play_action_seq[-3][1]
                else:
                    last_move = self.card_play_action_seq[-2][1]
            else:
                last_move = self.card_play_action_seq[-1][1]

        return last_move

    def get_last_two_moves(self):
        last_two_moves = [[], []]
        for card in self.card_play_action_seq[-2:]:
            last_two_moves.insert(0, card)
            last_two_moves = last_two_moves[:2]
        return last_two_moves

    def get_acting_player_position(self):
        if self.acting_player_position is None:
            self.acting_player_position = 'landlord'

        else:
            if self.acting_player_position == 'landlord':
                self.acting_player_position = 'landlord_down'

            elif self.acting_player_position == 'landlord_down':
                self.acting_player_position = 'landlord_front'

            elif self.acting_player_position == 'landlord_front':
                self.acting_player_position = 'landlord_up'

            else:
                self.acting_player_position = 'landlord'

        return self.acting_player_position

    def update_acting_player_hand_cards(self, action):
        if action != []:
            for card in action:
                self.info_sets[
                    self.acting_player_position].player_hand_cards.remove(card)
            self.info_sets[self.acting_player_position].player_hand_cards.sort()

    def get_legal_card_play_actions(self):
        action_sequence = self.card_play_action_seq

        rival_move = []
        if len(action_sequence) != 0:
            if len(action_sequence[-1][1]) == 0:
                if len(action_sequence[-2][1]) == 0:
                    rival_move = action_sequence[-3][1]
                else:
                    rival_move = action_sequence[-2][1]
            else:
                rival_move = action_sequence[-1][1]

        return get_legal_card_play_actions(self.info_sets[self.acting_player_position].player_hand_cards, rival_move)

    def reset(self):
        self.card_play_action_seq = []

        # self.three_landlord_cards = None
        self.game_over = False

        self.acting_player_position = None
        self.player_utility_dict = None

        self.last_move_dict = {'landlord': [],
                               'landlord_up': [],
                               'landlord_front': [],
                               'landlord_down': []}

        self.played_cards = {'landlord': [],
                             'landlord_up': [],
                             'landlord_front': [],
                             'landlord_down': []}

        self.last_move = []
        self.last_two_moves = []

        self.info_sets = {'landlord': InfoSet('landlord'),
                         'landlord_up': InfoSet('landlord_up'),
                         'landlord_front': InfoSet('landlord_front'),
                         'landlord_down': InfoSet('landlord_down')}

        self.bomb_num = [0, 0, 0]
        self.pos_bomb_num = {
            "landlord": 0,
            "landlord_up": 0,
            "landlord_front": 0,
            "landlord_down": 0
        }
        self.step_count = 0

    def get_infoset(self):

        self.info_sets[
            self.acting_player_position].legal_actions = \
            self.get_legal_card_play_actions()

        self.info_sets[
            self.acting_player_position].bomb_num = self.bomb_num

        self.info_sets[
            self.acting_player_position].last_move = self.get_last_move()

        self.info_sets[
            self.acting_player_position].last_two_moves = self.get_last_two_moves()

        self.info_sets[
            self.acting_player_position].last_move_dict = self.last_move_dict

        self.info_sets[self.acting_player_position].num_cards_left_dict = \
            {pos: len(self.info_sets[pos].player_hand_cards)
             for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}

        self.info_sets[self.acting_player_position].other_hand_cards = []
        for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
            if pos != self.acting_player_position:
                self.info_sets[
                    self.acting_player_position].other_hand_cards += \
                    self.info_sets[pos].player_hand_cards

        self.info_sets[self.acting_player_position].played_cards = \
            self.played_cards
        # self.info_sets[self.acting_player_position].three_landlord_cards = \
        #     self.three_landlord_cards
        self.info_sets[self.acting_player_position].card_play_action_seq = \
            self.card_play_action_seq

        self.info_sets[
            self.acting_player_position].all_handcards = \
            {pos: self.info_sets[pos].player_hand_cards
             for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}

        return pickle.loads(pickle.dumps(self.info_sets[self.acting_player_position]))

class InfoSet(object):
    """
    The game state is described as infoset, which
    includes all the information in the current situation,
    such as the hand cards of the three players, the
    historical moves, etc.
    """
    def __init__(self, player_position):
        # The player position, i.e., landlord, landlord_down, or landlord_up
        self.player_position = player_position
        # The hand cands of the current player. A list.
        self.player_hand_cards = None
        # The number of cards left for each player. It is a dict with str-->int
        self.num_cards_left_dict = None
        # The three landload cards. A list.
        # self.three_landlord_cards = None
        # The historical moves. It is a list of list
        self.card_play_action_seq = None
        # The union of the hand cards of the other two players for the current player
        self.other_hand_cards = None
        # The legal actions for the current move. It is a list of list
        self.legal_actions = None
        # The most recent valid move
        self.last_move = None
        # The most recent two moves
        self.last_two_moves = None
        # The last moves for all the postions
        self.last_move_dict = None
        # The played cands so far. It is a list.
        self.played_cards = None
        # The hand cards of all the players. It is a dict.
        self.all_handcards = None
        # Last player position that plays a valid move, i.e., not `pass`
        self.last_pid = None
        # The number of bombs played so far
        self.bomb_num = None
        self.player_id = None