477 lines
18 KiB
Python
477 lines
18 KiB
Python
from . import move_detector as md, move_selector as ms
|
|
from .move_generator import MovesGener
|
|
import random
|
|
import pickle
|
|
|
|
EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
|
|
8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q',
|
|
13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'}
|
|
|
|
RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
|
|
'8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
|
|
'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
|
|
|
|
cards_idx = [x for x in range(3, 15)]
|
|
cards_idx.extend([17, 20, 30])
|
|
|
|
bombs = [[[x] * 6 for x in cards_idx[:-2]], [[x] * 8 for x in cards_idx[:-2]], [[x] * 4 for x in cards_idx[:-2]]]
|
|
# Rocket bomb
|
|
bombs[0].extend([[x] * 7 for x in cards_idx[:-2]])
|
|
# King bomb
|
|
bombs[1].extend([[20, 20, 30, 30]])
|
|
# Normal bomb
|
|
bombs[2].extend([[x] * 5 for x in cards_idx[:-2]])
|
|
|
|
|
|
def get_legal_card_play_actions(player_hand_cards, rival_move):
|
|
mg = MovesGener(player_hand_cards)
|
|
|
|
rival_type = md.get_move_type(rival_move)
|
|
rival_move_type = rival_type['type']
|
|
rival_move_len = rival_type.get('len', 1)
|
|
moves = list()
|
|
|
|
if rival_move_type == md.TYPE_0_PASS:
|
|
moves = mg.gen_moves()
|
|
|
|
elif rival_move_type == md.TYPE_1_SINGLE:
|
|
all_moves = mg.gen_type_1_single()
|
|
moves = ms.filter_type_1_single(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_2_PAIR:
|
|
all_moves = mg.gen_type_2_pair()
|
|
moves = ms.filter_type_2_pair(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_3_TRIPLE:
|
|
all_moves = mg.gen_type_3_triple()
|
|
moves = ms.filter_type_3_triple(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_4_BOMB:
|
|
all_moves = mg.gen_type_4_bomb(4)
|
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
|
moves += mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
|
|
|
elif rival_move_type == md.TYPE_4_BOMB5:
|
|
all_moves = mg.gen_type_4_bomb(5)
|
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
|
moves += mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
|
|
|
elif rival_move_type == md.TYPE_4_BOMB6:
|
|
all_moves = mg.gen_type_4_bomb(6)
|
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
|
moves += mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
|
|
|
elif rival_move_type == md.TYPE_4_BOMB7:
|
|
all_moves = mg.gen_type_4_bomb(7)
|
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
|
moves += mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
|
|
|
elif rival_move_type == md.TYPE_4_BOMB8:
|
|
all_moves = mg.gen_type_4_bomb(8)
|
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
|
moves += mg.gen_type_5_king_bomb()
|
|
|
|
elif rival_move_type == md.TYPE_5_KING_BOMB:
|
|
moves = []
|
|
|
|
# elif rival_move_type == md.TYPE_6_3_1:
|
|
# all_moves = mg.gen_type_6_3_1()
|
|
# moves = ms.filter_type_6_3_1(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_7_3_2:
|
|
all_moves = mg.gen_type_7_3_2()
|
|
moves = ms.filter_type_7_3_2(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_8_SERIAL_SINGLE:
|
|
all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len)
|
|
moves = ms.filter_type_8_serial_single(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_9_SERIAL_PAIR:
|
|
all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len)
|
|
moves = ms.filter_type_9_serial_pair(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE:
|
|
all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
|
|
moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
|
|
|
|
# elif rival_move_type == md.TYPE_11_SERIAL_3_1:
|
|
# all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
|
|
# moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
|
|
|
|
elif rival_move_type == md.TYPE_12_SERIAL_3_2:
|
|
all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
|
|
moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
|
|
|
|
# elif rival_move_type == md.TYPE_13_4_2:
|
|
# all_moves = mg.gen_type_13_4_2()
|
|
# moves = ms.filter_type_13_4_2(all_moves, rival_move)
|
|
|
|
# elif rival_move_type == md.TYPE_14_4_22:
|
|
# all_moves = mg.gen_type_14_4_22()
|
|
# moves = ms.filter_type_14_4_22(all_moves, rival_move)
|
|
|
|
if rival_move_type != md.TYPE_0_PASS and rival_move_type < md.TYPE_4_BOMB:
|
|
moves = moves + mg.gen_type_4_bomb(4) + mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
|
|
|
if len(rival_move) != 0: # rival_move is not 'pass'
|
|
moves = moves + [[]]
|
|
|
|
for m in moves:
|
|
m.sort()
|
|
|
|
return moves
|
|
|
|
class GameEnv(object):
|
|
|
|
def __init__(self, players):
|
|
|
|
self.card_play_action_seq = []
|
|
|
|
self.three_landlord_cards = None
|
|
self.three_landlord_cards_all = None
|
|
self.game_over = False
|
|
|
|
self.acting_player_position = None
|
|
self.player_utility_dict = None
|
|
|
|
self.players = players
|
|
|
|
self.last_move_dict = {'landlord': [],
|
|
'landlord_up': [],
|
|
'landlord_front': [],
|
|
'landlord_down': []}
|
|
|
|
self.played_cards = {'landlord': [],
|
|
'landlord_up': [],
|
|
'landlord_front': [],
|
|
'landlord_down': []}
|
|
|
|
self.last_move = []
|
|
self.last_two_moves = []
|
|
|
|
self.num_wins = {'landlord': 0,
|
|
'farmer': 0}
|
|
|
|
self.num_scores = {'landlord': 0,
|
|
'farmer': 0}
|
|
|
|
self.info_sets = {'landlord': InfoSet('landlord'),
|
|
'landlord_up': InfoSet('landlord_up'),
|
|
'landlord_front': InfoSet('landlord_front'),
|
|
'landlord_down': InfoSet('landlord_down')}
|
|
|
|
self.bomb_num = [0, 0, 0]
|
|
self.pos_bomb_num = {
|
|
"landlord": 0,
|
|
"landlord_up": 0,
|
|
"landlord_front": 0,
|
|
"landlord_down": 0
|
|
}
|
|
self.last_pid = 'landlord'
|
|
self.face_up_level = 0
|
|
self.step_count = 0
|
|
|
|
|
|
def card_play_init(self, card_play_data):
|
|
if 'play' in card_play_data.keys():
|
|
self.info_sets['landlord'].player_hand_cards = card_play_data['play']['landlord']
|
|
self.info_sets['landlord_up'].player_hand_cards = card_play_data['play']['landlord_up']
|
|
self.info_sets['landlord_front'].player_hand_cards = card_play_data['play']['landlord_front']
|
|
self.info_sets['landlord_down'].player_hand_cards = card_play_data['play']['landlord_down']
|
|
else:
|
|
self.info_sets['landlord'].player_hand_cards = \
|
|
card_play_data['landlord']
|
|
self.info_sets['landlord_up'].player_hand_cards = \
|
|
card_play_data['landlord_up']
|
|
self.info_sets['landlord_front'].player_hand_cards = \
|
|
card_play_data['landlord_front']
|
|
self.info_sets['landlord_down'].player_hand_cards = \
|
|
card_play_data['landlord_down']
|
|
if 'three_landlord_cards' not in card_play_data.keys():
|
|
self.three_landlord_cards = card_play_data['landlord'][25:33]
|
|
self.three_landlord_cards_all = card_play_data['landlord'][25:33]
|
|
else:
|
|
self.three_landlord_cards = card_play_data['three_landlord_cards'][:]
|
|
self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:]
|
|
if 'face_up_level' in card_play_data.keys():
|
|
self.info_sets['landlord'].face_up_level = card_play_data['face_up_level']
|
|
self.info_sets['landlord_up'].face_up_level = card_play_data['face_up_level']
|
|
self.info_sets['landlord_front'].face_up_level = card_play_data['face_up_level']
|
|
self.info_sets['landlord_down'].face_up_level = card_play_data['face_up_level']
|
|
else:
|
|
self.info_sets['landlord'].face_up_level = 0
|
|
self.info_sets['landlord_up'].face_up_level = 0
|
|
self.info_sets['landlord_front'].face_up_level = 0
|
|
self.info_sets['landlord_down'].face_up_level = 0
|
|
self.get_acting_player_position()
|
|
self.game_infoset = self.get_infoset()
|
|
|
|
|
|
def game_done(self):
|
|
if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
|
|
len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
|
|
len(self.info_sets['landlord_front'].player_hand_cards) == 0 or \
|
|
len(self.info_sets['landlord_down'].player_hand_cards) == 0:
|
|
# if one of the three players discards his hand,
|
|
# then game is over.
|
|
self.compute_player_utility()
|
|
self.update_num_wins_scores()
|
|
|
|
self.game_over = True
|
|
|
|
def compute_player_utility(self):
|
|
|
|
if len(self.info_sets['landlord'].player_hand_cards) == 0:
|
|
self.player_utility_dict = {'landlord': 3,
|
|
'farmer': -1}
|
|
else:
|
|
self.player_utility_dict = {'landlord': -3,
|
|
'farmer': 1}
|
|
|
|
def update_num_wins_scores(self):
|
|
for pos, utility in self.player_utility_dict.items():
|
|
base_score = 3 if pos == 'landlord' else 1
|
|
if utility > 0:
|
|
self.num_wins[pos] += 1
|
|
self.winner = pos
|
|
self.num_scores[pos] += base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
|
|
else:
|
|
self.num_scores[pos] -= base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
|
|
|
|
def get_winner(self):
|
|
return self.winner
|
|
|
|
def get_bomb_num(self):
|
|
return self.bomb_num
|
|
|
|
def step(self):
|
|
action = self.players[self.acting_player_position].act(
|
|
self.game_infoset)
|
|
self.step_count += 1
|
|
if len(action) > 0:
|
|
self.last_pid = self.acting_player_position
|
|
|
|
if action in bombs[0]:
|
|
self.bomb_num[0] += 1
|
|
self.pos_bomb_num[self.acting_player_position] += 1
|
|
|
|
if action in bombs[1]:
|
|
self.bomb_num[1] += 1
|
|
self.pos_bomb_num[self.acting_player_position] += 1
|
|
|
|
if action in bombs[2]:
|
|
self.bomb_num[2] += 1
|
|
|
|
self.last_move_dict[
|
|
self.acting_player_position] = action.copy()
|
|
|
|
self.card_play_action_seq.append((self.acting_player_position, action))
|
|
self.update_acting_player_hand_cards(action)
|
|
|
|
self.played_cards[self.acting_player_position] += action
|
|
|
|
if self.acting_player_position == 'landlord' and \
|
|
len(action) > 0 and \
|
|
len(self.three_landlord_cards) > 0:
|
|
for card in action:
|
|
if len(self.three_landlord_cards) > 0:
|
|
if card in self.three_landlord_cards:
|
|
self.three_landlord_cards.remove(card)
|
|
else:
|
|
break
|
|
|
|
self.game_done()
|
|
if not self.game_over:
|
|
self.get_acting_player_position()
|
|
self.game_infoset = self.get_infoset()
|
|
return action
|
|
|
|
def get_last_move(self):
|
|
last_move = []
|
|
if len(self.card_play_action_seq) != 0:
|
|
if len(self.card_play_action_seq[-1]) == 0:
|
|
if len(self.card_play_action_seq[-2]) == 0:
|
|
last_move = self.card_play_action_seq[-3][1]
|
|
else:
|
|
last_move = self.card_play_action_seq[-2][1]
|
|
else:
|
|
last_move = self.card_play_action_seq[-1][1]
|
|
|
|
return last_move
|
|
|
|
def get_last_two_moves(self):
|
|
last_two_moves = [[], []]
|
|
for card in self.card_play_action_seq[-2:]:
|
|
last_two_moves.insert(0, card)
|
|
last_two_moves = last_two_moves[:2]
|
|
return last_two_moves
|
|
|
|
def get_acting_player_position(self):
|
|
if self.acting_player_position is None:
|
|
self.acting_player_position = 'landlord'
|
|
|
|
else:
|
|
if self.acting_player_position == 'landlord':
|
|
self.acting_player_position = 'landlord_down'
|
|
|
|
elif self.acting_player_position == 'landlord_down':
|
|
self.acting_player_position = 'landlord_front'
|
|
|
|
elif self.acting_player_position == 'landlord_front':
|
|
self.acting_player_position = 'landlord_up'
|
|
|
|
else:
|
|
self.acting_player_position = 'landlord'
|
|
|
|
return self.acting_player_position
|
|
|
|
def update_acting_player_hand_cards(self, action):
|
|
if action != []:
|
|
for card in action:
|
|
self.info_sets[
|
|
self.acting_player_position].player_hand_cards.remove(card)
|
|
self.info_sets[self.acting_player_position].player_hand_cards.sort()
|
|
|
|
def get_legal_card_play_actions(self):
|
|
action_sequence = self.card_play_action_seq
|
|
|
|
rival_move = []
|
|
if len(action_sequence) != 0:
|
|
if len(action_sequence[-1][1]) == 0:
|
|
if len(action_sequence[-2][1]) == 0:
|
|
rival_move = action_sequence[-3][1]
|
|
else:
|
|
rival_move = action_sequence[-2][1]
|
|
else:
|
|
rival_move = action_sequence[-1][1]
|
|
|
|
return get_legal_card_play_actions(self.info_sets[self.acting_player_position].player_hand_cards, rival_move)
|
|
|
|
def reset(self):
|
|
self.card_play_action_seq = []
|
|
|
|
self.three_landlord_cards = None
|
|
self.three_landlord_cards_all = None
|
|
self.game_over = False
|
|
|
|
self.acting_player_position = None
|
|
self.player_utility_dict = None
|
|
|
|
self.last_move_dict = {'landlord': [],
|
|
'landlord_up': [],
|
|
'landlord_front': [],
|
|
'landlord_down': []}
|
|
|
|
self.played_cards = {'landlord': [],
|
|
'landlord_up': [],
|
|
'landlord_front': [],
|
|
'landlord_down': []}
|
|
|
|
self.last_move = []
|
|
self.last_two_moves = []
|
|
|
|
self.info_sets = {'landlord': InfoSet('landlord'),
|
|
'landlord_up': InfoSet('landlord_up'),
|
|
'landlord_front': InfoSet('landlord_front'),
|
|
'landlord_down': InfoSet('landlord_down')}
|
|
|
|
self.info_sets['landlord'].face_up_level = self.face_up_level
|
|
self.info_sets['landlord_up'].face_up_level = self.face_up_level
|
|
self.info_sets['landlord_front'].face_up_level = self.face_up_level
|
|
self.info_sets['landlord_down'].face_up_level = self.face_up_level
|
|
|
|
self.bomb_num = [0, 0, 0]
|
|
self.pos_bomb_num = {
|
|
"landlord": 0,
|
|
"landlord_up": 0,
|
|
"landlord_front": 0,
|
|
"landlord_down": 0
|
|
}
|
|
self.step_count = 0
|
|
|
|
def get_infoset(self):
|
|
|
|
self.info_sets[
|
|
self.acting_player_position].legal_actions = \
|
|
self.get_legal_card_play_actions()
|
|
|
|
self.info_sets[
|
|
self.acting_player_position].bomb_num = self.bomb_num
|
|
|
|
self.info_sets[
|
|
self.acting_player_position].last_move = self.get_last_move()
|
|
|
|
self.info_sets[
|
|
self.acting_player_position].last_two_moves = self.get_last_two_moves()
|
|
|
|
self.info_sets[
|
|
self.acting_player_position].last_move_dict = self.last_move_dict
|
|
|
|
self.info_sets[self.acting_player_position].num_cards_left_dict = \
|
|
{pos: len(self.info_sets[pos].player_hand_cards)
|
|
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
|
|
|
|
self.info_sets[self.acting_player_position].other_hand_cards = []
|
|
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
|
|
if pos != self.acting_player_position:
|
|
self.info_sets[
|
|
self.acting_player_position].other_hand_cards += \
|
|
self.info_sets[pos].player_hand_cards
|
|
|
|
self.info_sets[self.acting_player_position].played_cards = \
|
|
self.played_cards
|
|
self.info_sets[self.acting_player_position].three_landlord_cards = \
|
|
self.three_landlord_cards
|
|
self.info_sets[self.acting_player_position].three_landlord_cards_all = \
|
|
self.three_landlord_cards_all
|
|
self.info_sets[self.acting_player_position].card_play_action_seq = \
|
|
self.card_play_action_seq
|
|
|
|
self.info_sets[
|
|
self.acting_player_position].all_handcards = \
|
|
{pos: self.info_sets[pos].player_hand_cards
|
|
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
|
|
|
|
return pickle.loads(pickle.dumps(self.info_sets[self.acting_player_position]))
|
|
|
|
class InfoSet(object):
|
|
"""
|
|
The game state is described as infoset, which
|
|
includes all the information in the current situation,
|
|
such as the hand cards of the three players, the
|
|
historical moves, etc.
|
|
"""
|
|
def __init__(self, player_position):
|
|
# The player position, i.e., landlord, landlord_down, or landlord_up
|
|
self.player_position = player_position
|
|
# The hand cands of the current player. A list.
|
|
self.player_hand_cards = None
|
|
# The number of cards left for each player. It is a dict with str-->int
|
|
self.num_cards_left_dict = None
|
|
# The three landload cards. A list.
|
|
self.three_landlord_cards = None
|
|
self.three_landlord_cards_all = None
|
|
# The historical moves. It is a list of list
|
|
self.card_play_action_seq = None
|
|
# The union of the hand cards of the other two players for the current player
|
|
self.other_hand_cards = None
|
|
# The legal actions for the current move. It is a list of list
|
|
self.legal_actions = None
|
|
# The most recent valid move
|
|
self.last_move = None
|
|
# The most recent two moves
|
|
self.last_two_moves = None
|
|
# The last moves for all the postions
|
|
self.last_move_dict = None
|
|
# The played cands so far. It is a list.
|
|
self.played_cards = None
|
|
# The hand cards of all the players. It is a dict.
|
|
self.all_handcards = None
|
|
# Last player position that plays a valid move, i.e., not `pass`
|
|
self.last_pid = None
|
|
# The number of bombs played so far
|
|
self.bomb_num = None
|
|
self.player_id = None
|
|
# face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down
|
|
self.face_up_level = 0
|