456 lines
17 KiB
456 lines
17 KiB
from copy import deepcopy
from . import move_detector as md, move_selector as ms
from .move_generator import MovesGener
import random
EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
8: '8', 9: '9', 10: 'T', 11: 'J', 12: 'Q',
13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'}
RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
'8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12,
'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
AllEnvCard = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
bombs = [[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6],
[7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9], [10, 10, 10, 10],
[11, 11, 11, 11], [12, 12, 12, 12], [13, 13, 13, 13], [14, 14, 14, 14],
[17, 17, 17, 17], [20, 30]]
class GameEnv(object):
def __init__(self, players):
self.card_play_action_seq = []
self.three_landlord_cards = None
self.game_over = False
self.acting_player_position = None
self.player_utility_dict = None
self.players = players
self.last_move_dict = {'landlord': [],
'landlord_up': [],
'landlord_down': []}
self.played_cards = {'landlord': [],
'landlord_up': [],
'landlord_down': []}
self.last_move = []
self.last_two_moves = []
self.num_wins = {'landlord': 0,
'farmer': 0}
self.num_scores = {'landlord': 0,
'farmer': 0}
self.info_sets = {'landlord': InfoSet('landlord'),
'landlord_up': InfoSet('landlord_up'),
'landlord_down': InfoSet('landlord_down')}
self.bomb_num = 0
self.last_pid = 'landlord'
self.bid_info = [[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]]
self.bid_count = 0
self.multiply_count = {'landlord': 1,
'landlord_up': 1,
'landlord_down': 1}
self.step_count = 0
def card_play_init(self, card_play_data):
self.info_sets['landlord'].player_hand_cards = \
self.info_sets['landlord_up'].player_hand_cards = \
self.info_sets['landlord_down'].player_hand_cards = \
self.three_landlord_cards = card_play_data['three_landlord_cards']
self.game_infoset = self.get_infoset()
def game_done(self):
if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
len(self.info_sets['landlord_down'].player_hand_cards) == 0:
# if one of the three players discards his hand,
# then game is over.
self.game_over = True
def compute_player_utility(self):
if len(self.info_sets['landlord'].player_hand_cards) == 0:
self.player_utility_dict = {'landlord': 2,
'farmer': -1}
self.player_utility_dict = {'landlord': -2,
'farmer': 1}
def update_num_wins_scores(self):
for pos, utility in self.player_utility_dict.items():
base_score = 2 if pos == 'landlord' else 1
if utility > 0:
self.num_wins[pos] += 1
self.winner = pos
self.num_scores[pos] += base_score * (2 ** self.bomb_num)
self.num_scores[pos] -= base_score * (2 ** self.bomb_num)
def get_winner(self):
return self.winner
def get_bomb_num(self):
return self.bomb_num
def step(self, position, action=[]):
# 是玩家角色就调用act函数通过智能体获取action,否则通过玩家输入获取action
if self.acting_player_position == self.players[0]:
action, actions_confidence = self.players[1].act(self.game_infoset)
# 计算胜率
win_rates = {}
win_rate = action
win_rate = max(actions_confidence, -1)
win_rate = min(win_rate, 1)
win_rate = str(round(float((win_rate + 1) / 2), 4))
print("你出牌: " + str([EnvCard2RealCard[c] for c in action]) + ", 预计胜率" + str(
round(float(win_rate) * 100, 2)) + "%\n")
action = [RealCard2EnvCard[c] for c in list(input("地主{}出牌:".format(
"上家" if self.acting_player_position == "landlord_up" else
"下家" if self.acting_player_position == "landlord_down" else "")))]
print(action, end="\n\n")
# “要不起”,返回空列表
except ValueError as e:
action = []
win_rate = 0
if self.acting_player_position == position:
action, actions_confidence = self.players[1].act(self.game_infoset)
# 计算胜率
win_rate = actions_confidence
# win_rate = max(actions_confidence, -1)
# win_rate = min(win_rate, 1)
# win_rate = str(round(float((win_rate + 1) / 2), 4))
if len(action) > 0:
self.last_pid = self.acting_player_position
if action in bombs:
self.bomb_num += 1
self.acting_player_position] = action.copy()
self.card_play_action_seq.append((position, action))
self.played_cards[self.acting_player_position] += action
if self.acting_player_position == 'landlord' and \
len(action) > 0 and \
len(self.three_landlord_cards) > 0:
for card in action:
if len(self.three_landlord_cards) > 0:
if card in self.three_landlord_cards:
if not self.game_over:
self.game_infoset = self.get_infoset()
# 返回动作和胜率,只有玩家角色会接受返回值
action_message = {"action": str(''.join([EnvCard2RealCard[c] for c in action])),
"win_rate": str(round(float(win_rate) * 100, 4))}
return action_message
def get_last_move(self):
last_move = []
if len(self.card_play_action_seq) != 0:
if len(self.card_play_action_seq[-1][1]) == 0:
last_move = self.card_play_action_seq[-2][1]
last_move = self.card_play_action_seq[-1][1]
return last_move
def get_last_two_moves(self):
last_two_moves = [[], []]
for card in self.card_play_action_seq[-2:]:
last_two_moves.insert(0, card[1])
last_two_moves = last_two_moves[:2]
return last_two_moves
def get_acting_player_position(self):
if self.acting_player_position is None:
self.acting_player_position = 'landlord'
if self.acting_player_position == 'landlord':
self.acting_player_position = 'landlord_down'
elif self.acting_player_position == 'landlord_down':
self.acting_player_position = 'landlord_up'
self.acting_player_position = 'landlord'
return self.acting_player_position
def update_acting_player_hand_cards(self, action):
if action != []:
# 更新玩家手牌,删除对应的牌
if self.acting_player_position == self.players[0]:
for card in action:
# 更新另外两个玩家手牌,删除相同数量的牌
del self.info_sets[self.acting_player_position].player_hand_cards[0:len(action)]
def get_legal_card_play_actions(self):
mg = MovesGener(
action_sequence = self.card_play_action_seq
rival_move = []
if len(action_sequence) != 0:
if len(action_sequence[-1][1]) == 0:
rival_move = action_sequence[-2][1]
rival_move = action_sequence[-1][1]
rival_type = md.get_move_type(rival_move)
rival_move_type = rival_type['type']
rival_move_len = rival_type.get('len', 1)
moves = list()
if rival_move_type == md.TYPE_0_PASS:
moves = mg.gen_moves()
elif rival_move_type == md.TYPE_1_SINGLE:
all_moves = mg.gen_type_1_single()
moves = ms.filter_type_1_single(all_moves, rival_move)
elif rival_move_type == md.TYPE_2_PAIR:
all_moves = mg.gen_type_2_pair()
moves = ms.filter_type_2_pair(all_moves, rival_move)
elif rival_move_type == md.TYPE_3_TRIPLE:
all_moves = mg.gen_type_3_triple()
moves = ms.filter_type_3_triple(all_moves, rival_move)
elif rival_move_type == md.TYPE_4_BOMB:
all_moves = mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
moves = ms.filter_type_4_bomb(all_moves, rival_move)
elif rival_move_type == md.TYPE_5_KING_BOMB:
moves = []
elif rival_move_type == md.TYPE_6_3_1:
all_moves = mg.gen_type_6_3_1()
moves = ms.filter_type_6_3_1(all_moves, rival_move)
elif rival_move_type == md.TYPE_7_3_2:
all_moves = mg.gen_type_7_3_2()
moves = ms.filter_type_7_3_2(all_moves, rival_move)
elif rival_move_type == md.TYPE_8_SERIAL_SINGLE:
all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len)
moves = ms.filter_type_8_serial_single(all_moves, rival_move)
elif rival_move_type == md.TYPE_9_SERIAL_PAIR:
all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len)
moves = ms.filter_type_9_serial_pair(all_moves, rival_move)
elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE:
all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
elif rival_move_type == md.TYPE_11_SERIAL_3_1:
all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
elif rival_move_type == md.TYPE_12_SERIAL_3_2:
all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
elif rival_move_type == md.TYPE_13_4_2:
all_moves = mg.gen_type_13_4_2()
moves = ms.filter_type_13_4_2(all_moves, rival_move)
elif rival_move_type == md.TYPE_14_4_22:
all_moves = mg.gen_type_14_4_22()
moves = ms.filter_type_14_4_22(all_moves, rival_move)
if rival_move_type not in [md.TYPE_0_PASS,
moves = moves + mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
if len(rival_move) != 0: # rival_move is not 'pass'
moves = moves + [[]]
for m in moves:
return moves
def reset(self):
self.card_play_action_seq = []
self.three_landlord_cards = None
self.game_over = False
self.acting_player_position = None
self.player_utility_dict = None
self.last_move_dict = {'landlord': [],
'landlord_up': [],
'landlord_down': []}
self.played_cards = {'landlord': [],
'landlord_up': [],
'landlord_down': []}
self.last_move = []
self.last_two_moves = []
self.info_sets = {'landlord': InfoSet('landlord'),
'landlord_up': InfoSet('landlord_up'),
'landlord_down': InfoSet('landlord_down')}
self.bomb_num = 0
self.last_pid = 'landlord'
self.bid_info = [[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]]
self.bid_count = 0
self.multiply_count = {'landlord': 0,
'landlord_up': 0,
'landlord_down': 0}
self.step_count = 0
def get_infoset(self):
self.acting_player_position].last_pid = self.last_pid
self.acting_player_position].legal_actions = \
self.acting_player_position].bomb_num = self.bomb_num
self.acting_player_position].last_move = self.get_last_move()
self.acting_player_position].last_two_moves = self.get_last_two_moves()
self.acting_player_position].last_move_dict = self.last_move_dict
self.info_sets[self.acting_player_position].num_cards_left_dict = \
{pos: len(self.info_sets[pos].player_hand_cards)
for pos in ['landlord', 'landlord_up', 'landlord_down']}
self.info_sets[self.acting_player_position].other_hand_cards = []
for pos in ['landlord', 'landlord_up', 'landlord_down']:
if pos != self.acting_player_position:
self.acting_player_position].other_hand_cards += \
# 把出过的牌中三个子列表合成一个列表
played_cards_tmp = []
for i in list(self.played_cards.values()):
# 出过的牌和玩家手上的牌
played_and_hand_cards = played_cards_tmp + self.info_sets[self.acting_player_position].player_hand_cards
# 整副牌减去出过的牌和玩家手上的牌,就是其他人的手牌
for i in set(AllEnvCard):
self.acting_player_position].other_hand_cards.extend([i] * (AllEnvCard.count(i) - played_and_hand_cards.count(i)))
self.info_sets[self.acting_player_position].played_cards = \
self.info_sets[self.acting_player_position].three_landlord_cards = \
self.info_sets[self.acting_player_position].card_play_action_seq = \
self.acting_player_position].all_handcards = \
{pos: self.info_sets[pos].player_hand_cards
for pos in ['landlord', 'landlord_up', 'landlord_down']}
return deepcopy(self.info_sets[self.acting_player_position])
class InfoSet(object):
The game state is described as infoset, which
includes all the information in the current situation,
such as the hand cards of the three players, the
historical moves, etc.
def __init__(self, player_position):
# The player position, i.e., landlord, landlord_down, or landlord_up
self.player_position = player_position
# The hand cands of the current player. A list.
self.player_hand_cards = None
# The number of cards left for each player. It is a dict with str-->int
self.num_cards_left_dict = None
# The three landload cards. A list.
self.three_landlord_cards = None
# The historical moves. It is a list of list
self.card_play_action_seq = None
# The union of the hand cards of the other two players for the current player
self.other_hand_cards = None
# The legal actions for the current move. It is a list of list
self.legal_actions = None
# The most recent valid move
self.last_move = None
# The most recent two moves
self.last_two_moves = None
# The last moves for all the postions
self.last_move_dict = None
# The played cands so far. It is a list.
self.played_cards = None
# The hand cards of all the players. It is a dict.
self.all_handcards = None
# Last player position that plays a valid move, i.e., not `pass`
self.last_pid = None
# The number of bombs played so far
self.bomb_num = None
self.bid_info = [[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]]
self.multiply_info = [1, 1, 1]
self.player_id = None