Douzero_Resnet/douzero/env/game.py

483 lines
18 KiB
Python
Raw Normal View History

2021-09-07 17:19:25 +08:00
from copy import deepcopy
from . import move_detector as md, move_selector as ms
from .move_generator import MovesGener
import random
2021-12-14 14:16:30 +08:00
import pickle
2021-09-07 17:19:25 +08:00
EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q',
13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'}
RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
'8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
cards_idx = [x for x in range(3, 15)]
cards_idx.extend([17, 20, 30])
bombs = [[[x] * 6 for x in cards_idx[:-2]], [[x] * 8 for x in cards_idx[:-2]], [[x] * 4 for x in cards_idx[:-2]]]
# Rocket bomb
bombs[0].extend([[x] * 7 for x in cards_idx[:-2]])
# King bomb
bombs[1].extend([[20, 20, 30, 30]])
# Normal bomb
bombs[2].extend([[x] * 5 for x in cards_idx[:-2]])
2021-09-07 17:19:25 +08:00
class GameEnv(object):
def __init__(self, players):
self.card_play_action_seq = []
2021-12-05 12:03:30 +08:00
# self.three_landlord_cards = None
2021-09-07 17:19:25 +08:00
self.game_over = False
self.acting_player_position = None
self.player_utility_dict = None
self.players = players
self.last_move_dict = {'landlord': [],
'landlord_up': [],
2021-12-05 12:03:30 +08:00
'landlord_front': [],
2021-09-07 17:19:25 +08:00
'landlord_down': []}
self.played_cards = {'landlord': [],
'landlord_up': [],
2021-12-05 12:03:30 +08:00
'landlord_front': [],
2021-09-07 17:19:25 +08:00
'landlord_down': []}
self.last_move = []
self.last_two_moves = []
self.num_wins = {'landlord': 0,
'farmer': 0}
self.num_scores = {'landlord': 0,
'farmer': 0}
self.info_sets = {'landlord': InfoSet('landlord'),
2021-12-05 12:03:30 +08:00
'landlord_up': InfoSet('landlord_up'),
'landlord_front': InfoSet('landlord_front'),
'landlord_down': InfoSet('landlord_down')}
2021-09-07 17:19:25 +08:00
self.bomb_num = [0, 0, 0]
2021-09-07 17:19:25 +08:00
self.pos_bomb_num = {
"landlord": 0,
"landlord_up": 0,
2021-12-05 12:03:30 +08:00
"landlord_front": 0,
2021-09-07 17:19:25 +08:00
"landlord_down": 0
}
self.last_pid = 'landlord'
2021-12-05 12:03:30 +08:00
self.bid_info = [[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1]]
2021-09-07 17:19:25 +08:00
self.bid_count = 0
self.multiply_count = {'landlord': 0,
'landlord_up': 0,
2021-12-05 12:03:30 +08:00
'landlord_front': 0,
2021-09-07 17:19:25 +08:00
'landlord_down': 0}
self.step_count = 0
def card_play_init(self, card_play_data):
2021-12-10 10:11:44 +08:00
if 'bid' in card_play_data.keys():
self.info_sets['landlord'].bid_info = card_play_data['bid']['landlord']
self.info_sets['landlord_up'].bid_info = card_play_data['bid']['landlord_up']
self.info_sets['landlord_front'].bid_info = card_play_data['bid']['landlord_front']
self.info_sets['landlord_down'].bid_info = card_play_data['bid']['landlord_down']
self.info_sets['landlord'].player_hand_cards = card_play_data['play']['landlord']
self.info_sets['landlord_up'].player_hand_cards = card_play_data['play']['landlord_up']
self.info_sets['landlord_front'].player_hand_cards = card_play_data['play']['landlord_front']
self.info_sets['landlord_down'].player_hand_cards = card_play_data['play']['landlord_down']
else:
self.info_sets['landlord'].player_hand_cards = \
card_play_data['landlord']
self.info_sets['landlord_up'].player_hand_cards = \
card_play_data['landlord_up']
self.info_sets['landlord_front'].player_hand_cards = \
card_play_data['landlord_front']
self.info_sets['landlord_down'].player_hand_cards = \
card_play_data['landlord_down']
# self.three_landlord_cards = card_play_data['three_landlord_cards']
2021-09-07 17:19:25 +08:00
self.get_acting_player_position()
self.game_infoset = self.get_infoset()
def game_done(self):
if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
2021-12-05 12:03:30 +08:00
len(self.info_sets['landlord_front'].player_hand_cards) == 0 or \
2021-09-07 17:19:25 +08:00
len(self.info_sets['landlord_down'].player_hand_cards) == 0:
# if one of the three players discards his hand,
# then game is over.
self.compute_player_utility()
self.update_num_wins_scores()
self.game_over = True
def compute_player_utility(self):
if len(self.info_sets['landlord'].player_hand_cards) == 0:
2021-12-05 12:03:30 +08:00
self.player_utility_dict = {'landlord': 3,
2021-09-07 17:19:25 +08:00
'farmer': -1}
else:
2021-12-05 12:03:30 +08:00
self.player_utility_dict = {'landlord': -3,
2021-09-07 17:19:25 +08:00
'farmer': 1}
def update_num_wins_scores(self):
for pos, utility in self.player_utility_dict.items():
2021-12-05 12:03:30 +08:00
base_score = 3 if pos == 'landlord' else 1
2021-09-07 17:19:25 +08:00
if utility > 0:
self.num_wins[pos] += 1
self.winner = pos
2021-12-05 12:03:30 +08:00
self.num_scores[pos] += base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
2021-09-07 17:19:25 +08:00
else:
2021-12-05 12:03:30 +08:00
self.num_scores[pos] -= base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
2021-09-07 17:19:25 +08:00
def get_winner(self):
return self.winner
def get_bomb_num(self):
return self.bomb_num
def step(self):
action = self.players[self.acting_player_position].act(
self.game_infoset)
2021-12-07 10:33:18 +08:00
self.step_count += 1
2021-09-07 17:19:25 +08:00
if len(action) > 0:
self.last_pid = self.acting_player_position
2021-12-05 12:03:30 +08:00
if action in bombs[0]:
self.bomb_num[0] += 1
self.pos_bomb_num[self.acting_player_position] += 1
if action in bombs[1]:
self.bomb_num[1] += 1
2021-09-07 17:19:25 +08:00
self.pos_bomb_num[self.acting_player_position] += 1
if action in bombs[2]:
self.bomb_num[2] += 1
2021-09-07 17:19:25 +08:00
self.last_move_dict[
self.acting_player_position] = action.copy()
self.card_play_action_seq.append((self.acting_player_position, action))
self.update_acting_player_hand_cards(action)
self.played_cards[self.acting_player_position] += action
2021-12-05 12:03:30 +08:00
# if self.acting_player_position == 'landlord' and \
# len(action) > 0 and \
# len(self.three_landlord_cards) > 0:
# for card in action:
# if len(self.three_landlord_cards) > 0:
# if card in self.three_landlord_cards:
# self.three_landlord_cards.remove(card)
# else:
# break
2021-09-07 17:19:25 +08:00
self.game_done()
if not self.game_over:
self.get_acting_player_position()
self.game_infoset = self.get_infoset()
return action
def get_last_move(self):
last_move = []
if len(self.card_play_action_seq) != 0:
2021-12-05 12:03:30 +08:00
if len(self.card_play_action_seq[-1]) == 0:
2021-09-07 17:19:25 +08:00
last_move = self.card_play_action_seq[-2][1]
else:
last_move = self.card_play_action_seq[-1][1]
return last_move
def get_last_two_moves(self):
last_two_moves = [[], []]
for card in self.card_play_action_seq[-2:]:
2021-12-05 12:03:30 +08:00
last_two_moves.insert(0, card)
2021-09-07 17:19:25 +08:00
last_two_moves = last_two_moves[:2]
return last_two_moves
def get_acting_player_position(self):
if self.acting_player_position is None:
self.acting_player_position = 'landlord'
else:
if self.acting_player_position == 'landlord':
self.acting_player_position = 'landlord_down'
elif self.acting_player_position == 'landlord_down':
2021-12-05 12:03:30 +08:00
self.acting_player_position = 'landlord_front'
elif self.acting_player_position == 'landlord_front':
2021-09-07 17:19:25 +08:00
self.acting_player_position = 'landlord_up'
else:
self.acting_player_position = 'landlord'
return self.acting_player_position
def update_acting_player_hand_cards(self, action):
if action != []:
for card in action:
self.info_sets[
self.acting_player_position].player_hand_cards.remove(card)
self.info_sets[self.acting_player_position].player_hand_cards.sort()
def get_legal_card_play_actions(self):
mg = MovesGener(
self.info_sets[self.acting_player_position].player_hand_cards)
action_sequence = self.card_play_action_seq
rival_move = []
if len(action_sequence) != 0:
if len(action_sequence[-1][1]) == 0:
2021-12-05 12:03:30 +08:00
if len(action_sequence[-2][1]) == 0:
rival_move = action_sequence[-3][1]
else:
rival_move = action_sequence[-2][1]
2021-09-07 17:19:25 +08:00
else:
rival_move = action_sequence[-1][1]
rival_type = md.get_move_type(rival_move)
rival_move_type = rival_type['type']
rival_move_len = rival_type.get('len', 1)
moves = list()
if rival_move_type == md.TYPE_0_PASS:
moves = mg.gen_moves()
elif rival_move_type == md.TYPE_1_SINGLE:
all_moves = mg.gen_type_1_single()
moves = ms.filter_type_1_single(all_moves, rival_move)
elif rival_move_type == md.TYPE_2_PAIR:
all_moves = mg.gen_type_2_pair()
moves = ms.filter_type_2_pair(all_moves, rival_move)
elif rival_move_type == md.TYPE_3_TRIPLE:
all_moves = mg.gen_type_3_triple()
moves = ms.filter_type_3_triple(all_moves, rival_move)
elif rival_move_type == md.TYPE_4_BOMB:
2021-12-05 12:03:30 +08:00
all_moves = mg.gen_type_4_bomb(4)
moves = ms.filter_type_4_bomb(all_moves, rival_move)
2021-12-06 09:49:47 +08:00
moves += mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
2021-12-05 12:03:30 +08:00
elif rival_move_type == md.TYPE_4_BOMB5:
all_moves = mg.gen_type_4_bomb(5)
moves = ms.filter_type_4_bomb(all_moves, rival_move)
2021-12-06 09:49:47 +08:00
moves += mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
2021-12-05 12:03:30 +08:00
elif rival_move_type == md.TYPE_4_BOMB6:
all_moves = mg.gen_type_4_bomb(6)
moves = ms.filter_type_4_bomb(all_moves, rival_move)
2021-12-06 09:49:47 +08:00
moves += mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
2021-12-05 12:03:30 +08:00
elif rival_move_type == md.TYPE_4_BOMB7:
all_moves = mg.gen_type_4_bomb(7)
moves = ms.filter_type_4_bomb(all_moves, rival_move)
2021-12-06 09:49:47 +08:00
moves += mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
2021-12-05 12:03:30 +08:00
elif rival_move_type == md.TYPE_4_BOMB8:
all_moves = mg.gen_type_4_bomb(8)
2021-09-07 17:19:25 +08:00
moves = ms.filter_type_4_bomb(all_moves, rival_move)
2021-12-06 09:49:47 +08:00
moves += mg.gen_type_5_king_bomb()
2021-09-07 17:19:25 +08:00
elif rival_move_type == md.TYPE_5_KING_BOMB:
moves = []
2021-12-05 12:03:30 +08:00
# elif rival_move_type == md.TYPE_6_3_1:
# all_moves = mg.gen_type_6_3_1()
# moves = ms.filter_type_6_3_1(all_moves, rival_move)
2021-09-07 17:19:25 +08:00
elif rival_move_type == md.TYPE_7_3_2:
all_moves = mg.gen_type_7_3_2()
moves = ms.filter_type_7_3_2(all_moves, rival_move)
elif rival_move_type == md.TYPE_8_SERIAL_SINGLE:
all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len)
moves = ms.filter_type_8_serial_single(all_moves, rival_move)
elif rival_move_type == md.TYPE_9_SERIAL_PAIR:
all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len)
moves = ms.filter_type_9_serial_pair(all_moves, rival_move)
elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE:
all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
2021-12-05 12:03:30 +08:00
# elif rival_move_type == md.TYPE_11_SERIAL_3_1:
# all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
# moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
2021-09-07 17:19:25 +08:00
elif rival_move_type == md.TYPE_12_SERIAL_3_2:
all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
2021-12-05 12:03:30 +08:00
# elif rival_move_type == md.TYPE_13_4_2:
# all_moves = mg.gen_type_13_4_2()
# moves = ms.filter_type_13_4_2(all_moves, rival_move)
2021-09-07 17:19:25 +08:00
2021-12-05 12:03:30 +08:00
# elif rival_move_type == md.TYPE_14_4_22:
# all_moves = mg.gen_type_14_4_22()
# moves = ms.filter_type_14_4_22(all_moves, rival_move)
2021-09-07 17:19:25 +08:00
2021-12-05 12:03:30 +08:00
if rival_move_type != md.TYPE_0_PASS and rival_move_type < md.TYPE_4_BOMB:
moves = moves + mg.gen_type_4_bomb(4) + mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
2021-09-07 17:19:25 +08:00
if len(rival_move) != 0: # rival_move is not 'pass'
moves = moves + [[]]
for m in moves:
m.sort()
return moves
def reset(self):
self.card_play_action_seq = []
2021-12-05 12:03:30 +08:00
# self.three_landlord_cards = None
2021-09-07 17:19:25 +08:00
self.game_over = False
self.acting_player_position = None
self.player_utility_dict = None
self.last_move_dict = {'landlord': [],
'landlord_up': [],
2021-12-05 12:03:30 +08:00
'landlord_front': [],
2021-09-07 17:19:25 +08:00
'landlord_down': []}
self.played_cards = {'landlord': [],
'landlord_up': [],
2021-12-05 12:03:30 +08:00
'landlord_front': [],
2021-09-07 17:19:25 +08:00
'landlord_down': []}
self.last_move = []
self.last_two_moves = []
self.info_sets = {'landlord': InfoSet('landlord'),
2021-12-05 12:03:30 +08:00
'landlord_up': InfoSet('landlord_up'),
'landlord_front': InfoSet('landlord_front'),
'landlord_down': InfoSet('landlord_down')}
2021-09-07 17:19:25 +08:00
self.bomb_num = [0, 0, 0]
2021-09-07 17:19:25 +08:00
self.pos_bomb_num = {
"landlord": 0,
"landlord_up": 0,
2021-12-05 12:03:30 +08:00
"landlord_front": 0,
2021-09-07 17:19:25 +08:00
"landlord_down": 0
}
self.last_pid = 'landlord'
2021-12-05 12:03:30 +08:00
self.bid_info = [[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1]]
2021-09-07 17:19:25 +08:00
self.bid_count = 0
self.multiply_count = {'landlord': 0,
'landlord_up': 0,
2021-12-05 12:03:30 +08:00
'landlord_front': 0,
2021-09-07 17:19:25 +08:00
'landlord_down': 0}
self.step_count = 0
def get_infoset(self):
self.info_sets[
self.acting_player_position].last_pid = self.last_pid
self.info_sets[
self.acting_player_position].legal_actions = \
self.get_legal_card_play_actions()
self.info_sets[
self.acting_player_position].bomb_num = self.bomb_num
self.info_sets[
self.acting_player_position].last_move = self.get_last_move()
self.info_sets[
self.acting_player_position].last_two_moves = self.get_last_two_moves()
self.info_sets[
self.acting_player_position].last_move_dict = self.last_move_dict
self.info_sets[self.acting_player_position].num_cards_left_dict = \
{pos: len(self.info_sets[pos].player_hand_cards)
2021-12-05 12:03:30 +08:00
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
2021-09-07 17:19:25 +08:00
self.info_sets[self.acting_player_position].other_hand_cards = []
2021-12-05 12:03:30 +08:00
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
2021-09-07 17:19:25 +08:00
if pos != self.acting_player_position:
self.info_sets[
self.acting_player_position].other_hand_cards += \
self.info_sets[pos].player_hand_cards
self.info_sets[self.acting_player_position].played_cards = \
self.played_cards
2021-12-05 12:03:30 +08:00
# self.info_sets[self.acting_player_position].three_landlord_cards = \
# self.three_landlord_cards
2021-09-07 17:19:25 +08:00
self.info_sets[self.acting_player_position].card_play_action_seq = \
self.card_play_action_seq
self.info_sets[
self.acting_player_position].all_handcards = \
{pos: self.info_sets[pos].player_hand_cards
2021-12-05 12:03:30 +08:00
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
2021-09-07 17:19:25 +08:00
2021-12-14 14:16:30 +08:00
return pickle.loads(pickle.dumps(self.info_sets[self.acting_player_position]))
2021-09-07 17:19:25 +08:00
class InfoSet(object):
"""
The game state is described as infoset, which
includes all the information in the current situation,
such as the hand cards of the three players, the
historical moves, etc.
"""
def __init__(self, player_position):
# The player position, i.e., landlord, landlord_down, or landlord_up
self.player_position = player_position
# The hand cands of the current player. A list.
self.player_hand_cards = None
2021-12-05 12:03:30 +08:00
# The number of cards left for each player. It is a dict with str-->int
2021-09-07 17:19:25 +08:00
self.num_cards_left_dict = None
# The three landload cards. A list.
2021-12-05 12:03:30 +08:00
# self.three_landlord_cards = None
2021-09-07 17:19:25 +08:00
# The historical moves. It is a list of list
self.card_play_action_seq = None
2021-12-05 12:03:30 +08:00
# The union of the hand cards of the other two players for the current player
2021-09-07 17:19:25 +08:00
self.other_hand_cards = None
# The legal actions for the current move. It is a list of list
self.legal_actions = None
# The most recent valid move
self.last_move = None
# The most recent two moves
self.last_two_moves = None
# The last moves for all the postions
self.last_move_dict = None
# The played cands so far. It is a list.
self.played_cards = None
2021-12-05 12:03:30 +08:00
# The hand cards of all the players. It is a dict.
2021-09-07 17:19:25 +08:00
self.all_handcards = None
# Last player position that plays a valid move, i.e., not `pass`
self.last_pid = None
# The number of bombs played so far
self.bomb_num = None
2021-12-05 12:03:30 +08:00
self.bid_info = [[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1],
[-1, -1, -1, -1]]
2021-09-07 17:19:25 +08:00
2021-12-05 12:03:30 +08:00
self.multiply_info = [1, 0, 0, 0]
2021-09-07 17:19:25 +08:00
self.player_id = None