From 3381e96932fe8986886cc24dfa7c1954997f38f8 Mon Sep 17 00:00:00 2001 From: Vincentzyx <929403983@qq.com> Date: Tue, 7 Sep 2021 17:19:25 +0800 Subject: [PATCH] Env --- .gitignore | 115 +-- douzero/env/__init__.py | 1 + douzero/env/env.py | 1423 +++++++++++++++++++++++++++++++++ douzero/env/game.py | 414 ++++++++++ douzero/env/move_detector.py | 107 +++ douzero/env/move_generator.py | 219 +++++ douzero/env/move_selector.py | 106 +++ douzero/env/utils.py | 33 + 8 files changed, 2304 insertions(+), 114 deletions(-) create mode 100644 douzero/env/__init__.py create mode 100644 douzero/env/env.py create mode 100644 douzero/env/game.py create mode 100644 douzero/env/move_detector.py create mode 100644 douzero/env/move_generator.py create mode 100644 douzero/env/move_selector.py create mode 100644 douzero/env/utils.py diff --git a/.gitignore b/.gitignore index 510c73d..7e99e36 100644 --- a/.gitignore +++ b/.gitignore @@ -1,114 +1 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +*.pyc \ No newline at end of file diff --git a/douzero/env/__init__.py b/douzero/env/__init__.py new file mode 100644 index 0000000..ff52082 --- /dev/null +++ b/douzero/env/__init__.py @@ -0,0 +1 @@ +from .env import Env diff --git a/douzero/env/env.py b/douzero/env/env.py new file mode 100644 index 0000000..75ab10f --- /dev/null +++ b/douzero/env/env.py @@ -0,0 +1,1423 @@ +from collections import Counter +import numpy as np +import random +import torch +import BidModel + +from douzero.env.game import GameEnv + +env_version = "3.2" +env_url = "http://od.vcccz.com/hechuan/env.py" +Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, + 11: 8, 12: 9, 13: 10, 14: 11, 17: 12} + +NumOnes2Array = {0: np.array([0, 0, 0, 0]), + 1: np.array([1, 0, 0, 0]), + 2: np.array([1, 1, 0, 0]), + 3: np.array([1, 1, 1, 0]), + 4: np.array([1, 1, 1, 1])} + +deck = [] +for i in range(3, 15): + deck.extend([i for _ in range(4)]) +deck.extend([17 for _ in range(4)]) +deck.extend([20, 30]) + + +class Env: + """ + Doudizhu multi-agent wrapper + """ + + def __init__(self, objective): + """ + Objective is wp/adp/logadp. It indicates whether considers + bomb in reward calculation. Here, we use dummy agents. + This is because, in the orignial game, the players + are `in` the game. Here, we want to isolate + players and environments to have a more gym style + interface. To achieve this, we use dummy players + to play. For each move, we tell the corresponding + dummy player which action to play, then the player + will perform the actual action in the game engine. + """ + self.objective = objective + + # Initialize players + # We use three dummy player for the target position + self.players = {} + for position in ['landlord', 'landlord_up', 'landlord_down']: + self.players[position] = DummyAgent(position) + + # Initialize the internal environment + self._env = GameEnv(self.players) + self.total_round = 0 + self.force_bid = 0 + self.infoset = None + + def reset(self, model, device, flags=None): + """ + Every time reset is called, the environment + will be re-initialized with a new deck of cards. + This function is usually called when a game is over. + """ + self._env.reset() + + # Randomly shuffle the deck + if model is None: + _deck = deck.copy() + np.random.shuffle(_deck) + card_play_data = {'landlord': _deck[:20], + 'landlord_up': _deck[20:37], + 'landlord_down': _deck[37:54], + 'three_landlord_cards': _deck[17:20], + } + for key in card_play_data: + card_play_data[key].sort() + self._env.card_play_init(card_play_data) + self.infoset = self._game_infoset + return get_obs(self.infoset) + else: + self.total_round += 1 + bid_done = False + card_play_data = [] + landlord_cards = [] + last_bid = 0 + bid_count = 0 + player_ids = {} + bid_info = None + bid_obs_buffer = [] + multiply_obs_buffer = [] + bid_limit = 3 + force_bid = False + while not bid_done: + bid_limit -= 1 + bid_obs_buffer.clear() + multiply_obs_buffer.clear() + _deck = deck.copy() + np.random.shuffle(_deck) + card_play_data = [ + _deck[:17], + _deck[17:34], + _deck[34:51], + ] + for i in range(3): + card_play_data[i].sort() + landlord_cards = _deck[51:54] + landlord_cards.sort() + bid_info = np.array([[-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1]]) + bidding_player = random.randint(0, 2) + # bidding_player = 0 # debug + first_bid = -1 + last_bid = -1 + bid_count = 0 + if bid_limit <= 0: + force_bid = True + for r in range(3): + bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player]) + with torch.no_grad(): + action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device), + torch.tensor(bidding_obs["x_batch"], device=device), flags=flags) + if bid_limit <= 0: + wr = BidModel.predict_env(card_play_data[bidding_player]) + if wr >= 0.7: + action = {"action": 1} # debug + bid_limit += 1 + + bid_obs_buffer.append({ + "x_batch": bidding_obs["x_batch"][action["action"]], + "z_batch": bidding_obs["z_batch"][action["action"]], + "pid": bidding_player + }) + if action["action"] == 1: + last_bid = bidding_player + bid_count += 1 + if first_bid == -1: + first_bid = bidding_player + for p in range(3): + if p == bidding_player: + bid_info[r][p] = 1 + else: + bid_info[r][p] = 0 + else: + bid_info[r] = [0, 0, 0] + bidding_player = (bidding_player + 1) % 3 + one_count = np.count_nonzero(bid_info == 1) + if one_count == 0: + continue + elif one_count > 1: + r = 3 + bidding_player = first_bid + bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player]) + with torch.no_grad(): + action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device), + torch.tensor(bidding_obs["x_batch"], device=device), flags=flags) + bid_obs_buffer.append({ + "x_batch": bidding_obs["x_batch"][action["action"]], + "z_batch": bidding_obs["z_batch"][action["action"]], + "pid": bidding_player + }) + if action["action"] == 1: + last_bid = bidding_player + bid_count += 1 + for p in range(3): + if p == bidding_player: + bid_info[r][p] = 1 + else: + bid_info[r][p] = 0 + break + card_play_data[last_bid].extend(landlord_cards) + card_play_data = {'landlord': card_play_data[last_bid], + 'landlord_up': card_play_data[(last_bid - 1) % 3], + 'landlord_down': card_play_data[(last_bid + 1) % 3], + 'three_landlord_cards': landlord_cards, + } + card_play_data["landlord"].sort() + player_ids = { + 'landlord': last_bid, + 'landlord_up': (last_bid - 1) % 3, + 'landlord_down': (last_bid + 1) % 3, + } + player_positions = { + last_bid: 'landlord', + (last_bid - 1) % 3: 'landlord_up', + (last_bid + 1) % 3: 'landlord_down' + } + for bid_obs in bid_obs_buffer: + bid_obs.update({"position": player_positions[bid_obs["pid"]]}) + + # Initialize the cards + self._env.card_play_init(card_play_data) + multiply_map = [ + np.array([1, 0, 0]), + np.array([0, 1, 0]), + np.array([0, 0, 1]) + ] + for pos in ["landlord", "landlord_up", "landlord_down"]: + pid = player_ids[pos] + self._env.info_sets[pos].player_id = pid + self._env.info_sets[pos].bid_info = bid_info[:, [(pid - 1) % 3, pid, (pid + 1) % 3]] + self._env.bid_count = bid_count + # multiply_obs = _get_obs_for_multiply(pos, self._env.info_sets[pos].bid_info, card_play_data[pos], + # landlord_cards) + # action = model.forward(pos, torch.tensor(multiply_obs["z_batch"], device=device), + # torch.tensor(multiply_obs["x_batch"], device=device), flags=flags) + # multiply_obs_buffer.append({ + # "x_batch": multiply_obs["x_batch"][action["action"]], + # "z_batch": multiply_obs["z_batch"][action["action"]], + # "position": pos + # }) + action = {"action": 0} + self._env.info_sets[pos].multiply_info = multiply_map[action["action"]] + self._env.multiply_count[pos] = action["action"] + self.infoset = self._game_infoset + if force_bid: + self.force_bid += 1 + if self.total_round % 100 == 0: + print("发牌情况: %i/%i %.1f%%" % (self.force_bid, self.total_round, self.force_bid / self.total_round * 100)) + self.force_bid = 0 + self.total_round = 0 + return get_obs(self.infoset), { + "bid_obs_buffer": bid_obs_buffer, + "multiply_obs_buffer": multiply_obs_buffer + } + + def step(self, action): + """ + Step function takes as input the action, which + is a list of integers, and output the next obervation, + reward, and a Boolean variable indicating whether the + current game is finished. It also returns an empty + dictionary that is reserved to pass useful information. + """ + assert action in self.infoset.legal_actions + self.players[self._acting_player_position].set_action(action) + self._env.step() + self.infoset = self._game_infoset + done = False + reward = 0.0 + if self._game_over: + done = True + reward = { + "play": { + "landlord": self._get_reward("landlord"), + "landlord_up": self._get_reward("landlord_up"), + "landlord_down": self._get_reward("landlord_down") + }, + "bid": { + "landlord": self._get_reward_bidding("landlord")*2, + "landlord_up": self._get_reward_bidding("landlord_up"), + "landlord_down": self._get_reward_bidding("landlord_down") + } + } + obs = None + else: + obs = get_obs(self.infoset) + return obs, reward, done, {} + + def _get_reward(self, pos): + """ + This function is called in the end of each + game. It returns either 1/-1 for win/loss, + or ADP, i.e., every bomb will double the score. + """ + winner = self._game_winner + bomb_num = self._game_bomb_num + self_bomb_num = self._env.pos_bomb_num[pos] + if winner == 'landlord': + if self.objective == 'adp': + return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8 + elif self.objective == 'logadp': + return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4 + else: + return 1.0 - self._env.step_count * 0.0033 + else: + if self.objective == 'adp': + return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8 + elif self.objective == 'logadp': + return (-1.0 + self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4 + else: + return -1.0 + self._env.step_count * 0.0033 + + def _get_reward_bidding(self, pos): + """ + This function is called in the end of each + game. It returns either 1/-1 for win/loss, + or ADP, i.e., every bomb will double the score. + """ + winner = self._game_winner + bomb_num = self._game_bomb_num + if winner == 'landlord': + return 1.0 * 2**(self._env.bid_count-1) / 8 + else: + return -1.0 * 2**(self._env.bid_count-1) / 8 + + @property + def _game_infoset(self): + """ + Here, inforset is defined as all the information + in the current situation, incuding the hand cards + of all the players, all the historical moves, etc. + That is, it contains perferfect infomation. Later, + we will use functions to extract the observable + information from the views of the three players. + """ + return self._env.game_infoset + + @property + def _game_bomb_num(self): + """ + The number of bombs played so far. This is used as + a feature of the neural network and is also used to + calculate ADP. + """ + return self._env.get_bomb_num() + + @property + def _game_winner(self): + """ A string of landlord/peasants + """ + return self._env.get_winner() + + @property + def _acting_player_position(self): + """ + The player that is active. It can be landlord, + landlod_down, or landlord_up. + """ + return self._env.acting_player_position + + @property + def _game_over(self): + """ Returns a Boolean + """ + return self._env.game_over + + +class DummyAgent(object): + """ + Dummy agent is designed to easily interact with the + game engine. The agent will first be told what action + to perform. Then the environment will call this agent + to perform the actual action. This can help us to + isolate environment and agents towards a gym like + interface. + """ + + def __init__(self, position): + self.position = position + self.action = None + + def act(self, infoset): + """ + Simply return the action that is set previously. + """ + assert self.action in infoset.legal_actions + return self.action + + def set_action(self, action): + """ + The environment uses this function to tell + the dummy agent what to do. + """ + self.action = action + + +def get_obs(infoset, use_general=True): + """ + This function obtains observations with imperfect information + from the infoset. It has three branches since we encode + different features for different positions. + + This function will return dictionary named `obs`. It contains + several fields. These fields will be used to train the model. + One can play with those features to improve the performance. + + `position` is a string that can be landlord/landlord_down/landlord_up + + `x_batch` is a batch of features (excluding the hisorical moves). + It also encodes the action feature + + `z_batch` is a batch of features with hisorical moves only. + + `legal_actions` is the legal moves + + `x_no_action`: the features (exluding the hitorical moves and + the action features). It does not have the batch dim. + + `z`: same as z_batch but not a batch. + """ + if use_general: + if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]: + raise ValueError('') + return _get_obs_general(infoset, infoset.player_position) + else: + if infoset.player_position == 'landlord': + return _get_obs_landlord(infoset) + elif infoset.player_position == 'landlord_up': + return _get_obs_landlord_up(infoset) + elif infoset.player_position == 'landlord_down': + return _get_obs_landlord_down(infoset) + else: + raise ValueError('') + + +def _get_one_hot_array(num_left_cards, max_num_cards): + """ + A utility function to obtain one-hot endoding + """ + one_hot = np.zeros(max_num_cards) + if num_left_cards > 0: + one_hot[num_left_cards - 1] = 1 + + return one_hot + + +def _cards2array(list_cards): + """ + A utility function that transforms the actions, i.e., + A list of integers into card matrix. Here we remove + the six entries that are always zero and flatten the + the representations. + """ + if len(list_cards) == 0: + return np.zeros(54, dtype=np.int8) + + matrix = np.zeros([4, 13], dtype=np.int8) + jokers = np.zeros(2, dtype=np.int8) + counter = Counter(list_cards) + for card, num_times in counter.items(): + if card < 20: + matrix[:, Card2Column[card]] = NumOnes2Array[num_times] + elif card == 20: + jokers[0] = 1 + elif card == 30: + jokers[1] = 1 + return np.concatenate((matrix.flatten('F'), jokers)) + + +# def _action_seq_list2array(action_seq_list): +# """ +# A utility function to encode the historical moves. +# We encode the historical 15 actions. If there is +# no 15 actions, we pad the features with 0. Since +# three moves is a round in DouDizhu, we concatenate +# the representations for each consecutive three moves. +# Finally, we obtain a 5x162 matrix, which will be fed +# into LSTM for encoding. +# """ +# action_seq_array = np.zeros((len(action_seq_list), 54)) +# for row, list_cards in enumerate(action_seq_list): +# action_seq_array[row, :] = _cards2array(list_cards) +# # action_seq_array = action_seq_array.reshape(5, 162) +# return action_seq_array + +def _action_seq_list2array(action_seq_list, new_model=True): + """ + A utility function to encode the historical moves. + We encode the historical 15 actions. If there is + no 15 actions, we pad the features with 0. Since + three moves is a round in DouDizhu, we concatenate + the representations for each consecutive three moves. + Finally, we obtain a 5x162 matrix, which will be fed + into LSTM for encoding. + """ + + if new_model: + position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2} + action_seq_array = np.ones((len(action_seq_list), 54)) * -1 # Default Value -1 for not using area + for row, list_cards in enumerate(action_seq_list): + if list_cards != []: + action_seq_array[row, :54] = _cards2array(list_cards[1]) + else: + action_seq_array = np.zeros((len(action_seq_list), 54)) + for row, list_cards in enumerate(action_seq_list): + if list_cards != []: + action_seq_array[row, :] = _cards2array(list_cards[1]) + action_seq_array = action_seq_array.reshape(5, 162) + return action_seq_array + + # action_seq_array = np.zeros((len(action_seq_list), 54)) + # for row, list_cards in enumerate(action_seq_list): + # if list_cards != []: + # action_seq_array[row, :] = _cards2array(list_cards[1]) + # return action_seq_array + + +def _process_action_seq(sequence, length=15, new_model=True): + """ + A utility function encoding historical moves. We + encode 15 moves. If there is no 15 moves, we pad + with zeros. + """ + sequence = sequence[-length:].copy() + if new_model: + sequence = sequence[::-1] + if len(sequence) < length: + empty_sequence = [[] for _ in range(length - len(sequence))] + empty_sequence.extend(sequence) + sequence = empty_sequence + return sequence + + +def _get_one_hot_bomb(bomb_num): + """ + A utility function to encode the number of bombs + into one-hot representation. + """ + one_hot = np.zeros(15) + one_hot[bomb_num] = 1 + return one_hot + + +def _get_obs_landlord(infoset): + """ + Obttain the landlord features. See Table 4 in + https://arxiv.org/pdf/2106.06135.pdf + """ + num_legal_actions = len(infoset.legal_actions) + my_handcards = _cards2array(infoset.player_hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards = _cards2array(infoset.other_hand_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array(infoset.last_move) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j, action in enumerate(infoset.legal_actions): + my_action_batch[j, :] = _cards2array(action) + + landlord_up_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_up'], 17) + landlord_up_num_cards_left_batch = np.repeat( + landlord_up_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_down'], 17) + landlord_down_num_cards_left_batch = np.repeat( + landlord_down_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_played_cards = _cards2array( + infoset.played_cards['landlord_up']) + landlord_up_played_cards_batch = np.repeat( + landlord_up_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_played_cards = _cards2array( + infoset.played_cards['landlord_down']) + landlord_down_played_cards_batch = np.repeat( + landlord_down_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb( + infoset.bomb_num) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((my_handcards_batch, + other_handcards_batch, + last_action_batch, + landlord_up_played_cards_batch, + landlord_down_played_cards_batch, + landlord_up_num_cards_left_batch, + landlord_down_num_cards_left_batch, + bomb_num_batch, + my_action_batch)) + x_no_action = np.hstack((my_handcards, + other_handcards, + last_action, + landlord_up_played_cards, + landlord_down_played_cards, + landlord_up_num_cards_left, + landlord_down_num_cards_left, + bomb_num)) + z = _action_seq_list2array(_process_action_seq( + infoset.card_play_action_seq, 15, False), False) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': 'landlord', + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': infoset.legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + } + return obs + +def _get_obs_landlord_up(infoset): + """ + Obttain the landlord_up features. See Table 5 in + https://arxiv.org/pdf/2106.06135.pdf + """ + num_legal_actions = len(infoset.legal_actions) + my_handcards = _cards2array(infoset.player_hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards = _cards2array(infoset.other_hand_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array(infoset.last_move) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j, action in enumerate(infoset.legal_actions): + my_action_batch[j, :] = _cards2array(action) + + last_landlord_action = _cards2array( + infoset.last_move_dict['landlord']) + last_landlord_action_batch = np.repeat( + last_landlord_action[np.newaxis, :], + num_legal_actions, axis=0) + landlord_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord'], 20) + landlord_num_cards_left_batch = np.repeat( + landlord_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_played_cards = _cards2array( + infoset.played_cards['landlord']) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + last_teammate_action = _cards2array( + infoset.last_move_dict['landlord_down']) + last_teammate_action_batch = np.repeat( + last_teammate_action[np.newaxis, :], + num_legal_actions, axis=0) + teammate_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_down'], 17) + teammate_num_cards_left_batch = np.repeat( + teammate_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + teammate_played_cards = _cards2array( + infoset.played_cards['landlord_down']) + teammate_played_cards_batch = np.repeat( + teammate_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb( + infoset.bomb_num) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((my_handcards_batch, + other_handcards_batch, + landlord_played_cards_batch, + teammate_played_cards_batch, + last_action_batch, + last_landlord_action_batch, + last_teammate_action_batch, + landlord_num_cards_left_batch, + teammate_num_cards_left_batch, + bomb_num_batch, + my_action_batch)) + x_no_action = np.hstack((my_handcards, + other_handcards, + landlord_played_cards, + teammate_played_cards, + last_action, + last_landlord_action, + last_teammate_action, + landlord_num_cards_left, + teammate_num_cards_left, + bomb_num)) + z = _action_seq_list2array(_process_action_seq( + infoset.card_play_action_seq, 15, False), False) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': 'landlord_up', + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': infoset.legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + } + return obs + +def _get_obs_landlord_down(infoset): + """ + Obttain the landlord_down features. See Table 5 in + https://arxiv.org/pdf/2106.06135.pdf + """ + num_legal_actions = len(infoset.legal_actions) + my_handcards = _cards2array(infoset.player_hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards = _cards2array(infoset.other_hand_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array(infoset.last_move) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j, action in enumerate(infoset.legal_actions): + my_action_batch[j, :] = _cards2array(action) + + last_landlord_action = _cards2array( + infoset.last_move_dict['landlord']) + last_landlord_action_batch = np.repeat( + last_landlord_action[np.newaxis, :], + num_legal_actions, axis=0) + landlord_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord'], 20) + landlord_num_cards_left_batch = np.repeat( + landlord_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_played_cards = _cards2array( + infoset.played_cards['landlord']) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + last_teammate_action = _cards2array( + infoset.last_move_dict['landlord_up']) + last_teammate_action_batch = np.repeat( + last_teammate_action[np.newaxis, :], + num_legal_actions, axis=0) + teammate_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_up'], 17) + teammate_num_cards_left_batch = np.repeat( + teammate_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + teammate_played_cards = _cards2array( + infoset.played_cards['landlord_up']) + teammate_played_cards_batch = np.repeat( + teammate_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_played_cards = _cards2array( + infoset.played_cards['landlord']) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb( + infoset.bomb_num) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((my_handcards_batch, + other_handcards_batch, + landlord_played_cards_batch, + teammate_played_cards_batch, + last_action_batch, + last_landlord_action_batch, + last_teammate_action_batch, + landlord_num_cards_left_batch, + teammate_num_cards_left_batch, + bomb_num_batch, + my_action_batch)) + x_no_action = np.hstack((my_handcards, + other_handcards, + landlord_played_cards, + teammate_played_cards, + last_action, + last_landlord_action, + last_teammate_action, + landlord_num_cards_left, + teammate_num_cards_left, + bomb_num)) + z = _action_seq_list2array(_process_action_seq( + infoset.card_play_action_seq, 15, False), False) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': 'landlord_down', + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': infoset.legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + } + return obs + +def _get_obs_landlord_withbid(infoset): + """ + Obttain the landlord features. See Table 4 in + https://arxiv.org/pdf/2106.06135.pdf + """ + num_legal_actions = len(infoset.legal_actions) + my_handcards = _cards2array(infoset.player_hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards = _cards2array(infoset.other_hand_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array(infoset.last_move) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j, action in enumerate(infoset.legal_actions): + my_action_batch[j, :] = _cards2array(action) + + landlord_up_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_up'], 17) + landlord_up_num_cards_left_batch = np.repeat( + landlord_up_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_down'], 17) + landlord_down_num_cards_left_batch = np.repeat( + landlord_down_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_played_cards = _cards2array( + infoset.played_cards['landlord_up']) + landlord_up_played_cards_batch = np.repeat( + landlord_up_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_played_cards = _cards2array( + infoset.played_cards['landlord_down']) + landlord_down_played_cards_batch = np.repeat( + landlord_down_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb( + infoset.bomb_num) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((my_handcards_batch, + other_handcards_batch, + last_action_batch, + landlord_up_played_cards_batch, + landlord_down_played_cards_batch, + landlord_up_num_cards_left_batch, + landlord_down_num_cards_left_batch, + bomb_num_batch, + my_action_batch)) + x_no_action = np.hstack((my_handcards, + other_handcards, + last_action, + landlord_up_played_cards, + landlord_down_played_cards, + landlord_up_num_cards_left, + landlord_down_num_cards_left, + bomb_num)) + z = _action_seq_list2array(_process_action_seq( + infoset.card_play_action_seq, 15, False), False) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': 'landlord', + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': infoset.legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + } + return obs + + +def _get_obs_general1(infoset, position): + num_legal_actions = len(infoset.legal_actions) + my_handcards = _cards2array(infoset.player_hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards = _cards2array(infoset.other_hand_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + position_map = { + "landlord": [1, 0, 0], + "landlord_up": [0, 1, 0], + "landlord_down": [0, 0, 1] + } + position_info = np.array(position_map[position]) + position_info_batch = np.repeat(position_info[np.newaxis, :], + num_legal_actions, axis=0) + + bid_info = np.array(infoset.bid_info).flatten() + bid_info_batch = np.repeat(bid_info[np.newaxis, :], + num_legal_actions, axis=0) + + multiply_info = np.array(infoset.multiply_info) + multiply_info_batch = np.repeat(multiply_info[np.newaxis, :], + num_legal_actions, axis=0) + + three_landlord_cards = _cards2array(infoset.three_landlord_cards) + three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array(infoset.last_move) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j, action in enumerate(infoset.legal_actions): + my_action_batch[j, :] = _cards2array(action) + + landlord_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord'], 20) + landlord_num_cards_left_batch = np.repeat( + landlord_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_up'], 17) + landlord_up_num_cards_left_batch = np.repeat( + landlord_up_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_down'], 17) + landlord_down_num_cards_left_batch = np.repeat( + landlord_down_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards_left_list = [] + for pos in ["landlord", "landlord_up", "landlord_up"]: + if pos != position: + other_handcards_left_list.extend(infoset.all_handcards[pos]) + + landlord_played_cards = _cards2array( + infoset.played_cards['landlord']) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_played_cards = _cards2array( + infoset.played_cards['landlord_up']) + landlord_up_played_cards_batch = np.repeat( + landlord_up_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_played_cards = _cards2array( + infoset.played_cards['landlord_down']) + landlord_down_played_cards_batch = np.repeat( + landlord_down_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb( + infoset.bomb_num) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((position_info_batch, # 3 + my_handcards_batch, # 54 + other_handcards_batch, # 54 + three_landlord_cards_batch, # 54 + last_action_batch, # 54 + landlord_played_cards_batch, # 54 + landlord_up_played_cards_batch, # 54 + landlord_down_played_cards_batch, # 54 + landlord_num_cards_left_batch, # 20 + landlord_up_num_cards_left_batch, # 17 + landlord_down_num_cards_left_batch, # 17 + bomb_num_batch, # 15 + bid_info_batch, # 12 + multiply_info_batch, # 3 + my_action_batch)) # 54 + x_no_action = np.hstack((position_info, + my_handcards, + other_handcards, + three_landlord_cards, + last_action, + landlord_played_cards, + landlord_up_played_cards, + landlord_down_played_cards, + landlord_num_cards_left, + landlord_up_num_cards_left, + landlord_down_num_cards_left, + bomb_num, + bid_info, + multiply_info)) + z = _action_seq_list2array(_process_action_seq( + infoset.card_play_action_seq, 32)) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': position, + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': infoset.legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + } + return obs + +def _get_obs_general(infoset, position): + num_legal_actions = len(infoset.legal_actions) + my_handcards = _cards2array(infoset.player_hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards = _cards2array(infoset.other_hand_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + position_map = { + "landlord": [1, 0, 0], + "landlord_up": [0, 1, 0], + "landlord_down": [0, 0, 1] + } + position_info = np.array(position_map[position]) + position_info_batch = np.repeat(position_info[np.newaxis, :], + num_legal_actions, axis=0) + + bid_info = np.array(infoset.bid_info).flatten() + bid_info_batch = np.repeat(bid_info[np.newaxis, :], + num_legal_actions, axis=0) + + multiply_info = np.array(infoset.multiply_info) + multiply_info_batch = np.repeat(multiply_info[np.newaxis, :], + num_legal_actions, axis=0) + + three_landlord_cards = _cards2array(infoset.three_landlord_cards) + three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array(infoset.last_move) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j, action in enumerate(infoset.legal_actions): + my_action_batch[j, :] = _cards2array(action) + + landlord_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord'], 20) + landlord_num_cards_left_batch = np.repeat( + landlord_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_up'], 17) + landlord_up_num_cards_left_batch = np.repeat( + landlord_up_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_num_cards_left = _get_one_hot_array( + infoset.num_cards_left_dict['landlord_down'], 17) + landlord_down_num_cards_left_batch = np.repeat( + landlord_down_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + other_handcards_left_list = [] + for pos in ["landlord", "landlord_up", "landlord_up"]: + if pos != position: + other_handcards_left_list.extend(infoset.all_handcards[pos]) + + landlord_played_cards = _cards2array( + infoset.played_cards['landlord']) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_played_cards = _cards2array( + infoset.played_cards['landlord_up']) + landlord_up_played_cards_batch = np.repeat( + landlord_up_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_played_cards = _cards2array( + infoset.played_cards['landlord_down']) + landlord_down_played_cards_batch = np.repeat( + landlord_down_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb( + infoset.bomb_num) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + num_cards_left = np.hstack(( + landlord_num_cards_left, # 20 + landlord_up_num_cards_left, # 17 + landlord_down_num_cards_left)) + + x_batch = np.hstack(( + bid_info_batch, # 12 + multiply_info_batch)) # 3 + x_no_action = np.hstack(( + bid_info, + multiply_info)) + z =np.vstack(( + num_cards_left, + my_handcards, # 54 + other_handcards, # 54 + three_landlord_cards, # 54 + landlord_played_cards, # 54 + landlord_up_played_cards, # 54 + landlord_down_played_cards, # 54 + _action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32)) + )) + + _z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + my_action_batch = my_action_batch[:,np.newaxis,:] + z_batch = np.zeros([len(_z_batch),40,54],int) + for i in range(0,len(_z_batch)): + z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i])) + obs = { + 'position': position, + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': infoset.legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + } + return obs + +def gen_bid_legal_actions(player_id, bid_info): + self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]] + curr_round = -1 + for r in range(4): + if -1 in self_bid_info[r]: + curr_round = r + break + bid_actions = [] + if curr_round != -1: + self_bid_info[curr_round] = [0, 0, 0] + bid_actions.append(np.array(self_bid_info).flatten()) + self_bid_info[curr_round] = [0, 1, 0] + bid_actions.append(np.array(self_bid_info).flatten()) + return np.array(bid_actions) + + +def _get_obs_for_bid_legacy(player_id, bid_info, hand_cards): + all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, + 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30] + num_legal_actions = 2 + my_handcards = _cards2array(hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + other_cards = [] + other_cards.extend(all_cards) + for card in hand_cards: + other_cards.remove(card) + other_handcards = _cards2array(other_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + position_info = np.array([0, 0, 0]) + position_info_batch = np.repeat(position_info[np.newaxis, :], + num_legal_actions, axis=0) + + bid_legal_actions = gen_bid_legal_actions(player_id, bid_info) + bid_info = bid_legal_actions[0] + bid_info_batch = bid_legal_actions + + multiply_info = np.array([0, 0, 0]) + multiply_info_batch = np.repeat(multiply_info[np.newaxis, :], + num_legal_actions, axis=0) + + three_landlord_cards = _cards2array([]) + three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array([]) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j in range(2): + my_action_batch[j, :] = _cards2array([]) + + landlord_num_cards_left = _get_one_hot_array(0, 20) + landlord_num_cards_left_batch = np.repeat( + landlord_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_num_cards_left = _get_one_hot_array(0, 17) + landlord_up_num_cards_left_batch = np.repeat( + landlord_up_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_num_cards_left = _get_one_hot_array(0, 17) + landlord_down_num_cards_left_batch = np.repeat( + landlord_down_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_played_cards = _cards2array([]) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_played_cards = _cards2array([]) + landlord_up_played_cards_batch = np.repeat( + landlord_up_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_played_cards = _cards2array([]) + landlord_down_played_cards_batch = np.repeat( + landlord_down_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb(0) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((position_info_batch, + my_handcards_batch, + other_handcards_batch, + three_landlord_cards_batch, + last_action_batch, + landlord_played_cards_batch, + landlord_up_played_cards_batch, + landlord_down_played_cards_batch, + landlord_num_cards_left_batch, + landlord_up_num_cards_left_batch, + landlord_down_num_cards_left_batch, + bomb_num_batch, + bid_info_batch, + multiply_info_batch, + my_action_batch)) + x_no_action = np.hstack((position_info, + my_handcards, + other_handcards, + three_landlord_cards, + last_action, + landlord_played_cards, + landlord_up_played_cards, + landlord_down_played_cards, + landlord_num_cards_left, + landlord_up_num_cards_left, + landlord_down_num_cards_left, + bomb_num)) + z = _action_seq_list2array(_process_action_seq([], 32)) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': "", + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': bid_legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + "bid_info_batch": bid_info_batch.astype(np.int8), + "multiply_info": multiply_info.astype(np.int8) + } + return obs + +def _get_obs_for_bid(player_id, bid_info, hand_cards): + all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, + 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30] + num_legal_actions = 2 + my_handcards = _cards2array(hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + bid_legal_actions = gen_bid_legal_actions(player_id, bid_info) + bid_info = bid_legal_actions[0] + bid_info_batch = np.hstack([bid_legal_actions for _ in range(5)]) + + x_batch = np.hstack((my_handcards_batch, + bid_info_batch)) + x_no_action = np.hstack((my_handcards)) + obs = { + 'position': "", + 'x_batch': x_batch.astype(np.float32), + 'z_batch': np.array([0,0]), + 'legal_actions': bid_legal_actions, + 'x_no_action': x_no_action.astype(np.int8), + "bid_info_batch": bid_info_batch.astype(np.int8) + } + return obs + +def _get_obs_for_multiply(position, bid_info, hand_cards, landlord_cards): + all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, + 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30] + num_legal_actions = 3 + my_handcards = _cards2array(hand_cards) + my_handcards_batch = np.repeat(my_handcards[np.newaxis, :], + num_legal_actions, axis=0) + other_cards = [] + other_cards.extend(all_cards) + for card in hand_cards: + other_cards.remove(card) + other_handcards = _cards2array(other_cards) + other_handcards_batch = np.repeat(other_handcards[np.newaxis, :], + num_legal_actions, axis=0) + + position_map = { + "landlord": [1, 0, 0], + "landlord_up": [0, 1, 0], + "landlord_down": [0, 0, 1] + } + position_info = np.array(position_map[position]) + position_info_batch = np.repeat(position_info[np.newaxis, :], + num_legal_actions, axis=0) + + bid_info = np.array(bid_info).flatten() + bid_info_batch = np.repeat(bid_info[np.newaxis, :], + num_legal_actions, axis=0) + + multiply_info = np.array([0, 0, 0]) + multiply_info_batch = np.array([[1, 0, 0], + [0, 1, 0], + [0, 0, 1]]) + + three_landlord_cards = _cards2array(landlord_cards) + three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :], + num_legal_actions, axis=0) + + last_action = _cards2array([]) + last_action_batch = np.repeat(last_action[np.newaxis, :], + num_legal_actions, axis=0) + + my_action_batch = np.zeros(my_handcards_batch.shape) + for j in range(num_legal_actions): + my_action_batch[j, :] = _cards2array([]) + + landlord_num_cards_left = _get_one_hot_array(0, 20) + landlord_num_cards_left_batch = np.repeat( + landlord_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_num_cards_left = _get_one_hot_array(0, 17) + landlord_up_num_cards_left_batch = np.repeat( + landlord_up_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_num_cards_left = _get_one_hot_array(0, 17) + landlord_down_num_cards_left_batch = np.repeat( + landlord_down_num_cards_left[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_played_cards = _cards2array([]) + landlord_played_cards_batch = np.repeat( + landlord_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_up_played_cards = _cards2array([]) + landlord_up_played_cards_batch = np.repeat( + landlord_up_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + landlord_down_played_cards = _cards2array([]) + landlord_down_played_cards_batch = np.repeat( + landlord_down_played_cards[np.newaxis, :], + num_legal_actions, axis=0) + + bomb_num = _get_one_hot_bomb(0) + bomb_num_batch = np.repeat( + bomb_num[np.newaxis, :], + num_legal_actions, axis=0) + + x_batch = np.hstack((position_info_batch, + my_handcards_batch, + other_handcards_batch, + three_landlord_cards_batch, + last_action_batch, + landlord_played_cards_batch, + landlord_up_played_cards_batch, + landlord_down_played_cards_batch, + landlord_num_cards_left_batch, + landlord_up_num_cards_left_batch, + landlord_down_num_cards_left_batch, + bomb_num_batch, + bid_info_batch, + multiply_info_batch, + my_action_batch)) + x_no_action = np.hstack((position_info, + my_handcards, + other_handcards, + three_landlord_cards, + last_action, + landlord_played_cards, + landlord_up_played_cards, + landlord_down_played_cards, + landlord_num_cards_left, + landlord_up_num_cards_left, + landlord_down_num_cards_left, + bomb_num)) + z = _action_seq_list2array(_process_action_seq([], 32)) + z_batch = np.repeat( + z[np.newaxis, :, :], + num_legal_actions, axis=0) + obs = { + 'position': "", + 'x_batch': x_batch.astype(np.float32), + 'z_batch': z_batch.astype(np.float32), + 'legal_actions': multiply_info_batch, + 'x_no_action': x_no_action.astype(np.int8), + 'z': z.astype(np.int8), + "bid_info": bid_info.astype(np.int8), + "multiply_info_batch": multiply_info.astype(np.int8) + } + return obs diff --git a/douzero/env/game.py b/douzero/env/game.py new file mode 100644 index 0000000..f9252f5 --- /dev/null +++ b/douzero/env/game.py @@ -0,0 +1,414 @@ +from copy import deepcopy +from . import move_detector as md, move_selector as ms +from .move_generator import MovesGener +import random + +EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7', + 8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q', + 13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'} + +RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7, + '8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12, + 'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30} + +bombs = [[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6], + [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9], [10, 10, 10, 10], + [11, 11, 11, 11], [12, 12, 12, 12], [13, 13, 13, 13], [14, 14, 14, 14], + [17, 17, 17, 17], [20, 30]] + +class GameEnv(object): + + def __init__(self, players): + + self.card_play_action_seq = [] + + self.three_landlord_cards = None + self.game_over = False + + self.acting_player_position = None + self.player_utility_dict = None + + self.players = players + + self.last_move_dict = {'landlord': [], + 'landlord_up': [], + 'landlord_down': []} + + self.played_cards = {'landlord': [], + 'landlord_up': [], + 'landlord_down': []} + + self.last_move = [] + self.last_two_moves = [] + + self.num_wins = {'landlord': 0, + 'farmer': 0} + + self.num_scores = {'landlord': 0, + 'farmer': 0} + + self.info_sets = {'landlord': InfoSet('landlord'), + 'landlord_up': InfoSet('landlord_up'), + 'landlord_down': InfoSet('landlord_down')} + + self.bomb_num = 0 + self.pos_bomb_num = { + "landlord": 0, + "landlord_up": 0, + "landlord_down": 0 + } + self.last_pid = 'landlord' + + self.bid_info = [[-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1]] + self.bid_count = 0 + self.multiply_count = {'landlord': 0, + 'landlord_up': 0, + 'landlord_down': 0} + self.step_count = 0 + + + def card_play_init(self, card_play_data): + self.info_sets['landlord'].player_hand_cards = \ + card_play_data['landlord'] + self.info_sets['landlord_up'].player_hand_cards = \ + card_play_data['landlord_up'] + self.info_sets['landlord_down'].player_hand_cards = \ + card_play_data['landlord_down'] + self.three_landlord_cards = card_play_data['three_landlord_cards'] + self.get_acting_player_position() + self.game_infoset = self.get_infoset() + + + def game_done(self): + if len(self.info_sets['landlord'].player_hand_cards) == 0 or \ + len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \ + len(self.info_sets['landlord_down'].player_hand_cards) == 0: + # if one of the three players discards his hand, + # then game is over. + self.compute_player_utility() + self.update_num_wins_scores() + + self.game_over = True + + def compute_player_utility(self): + + if len(self.info_sets['landlord'].player_hand_cards) == 0: + self.player_utility_dict = {'landlord': 2, + 'farmer': -1} + else: + self.player_utility_dict = {'landlord': -2, + 'farmer': 1} + + def update_num_wins_scores(self): + for pos, utility in self.player_utility_dict.items(): + base_score = 2 if pos == 'landlord' else 1 + if utility > 0: + self.num_wins[pos] += 1 + self.winner = pos + self.num_scores[pos] += base_score * (2 ** self.bomb_num) + else: + self.num_scores[pos] -= base_score * (2 ** self.bomb_num) + + def get_winner(self): + return self.winner + + def get_bomb_num(self): + return self.bomb_num + + def step(self): + action = self.players[self.acting_player_position].act( + self.game_infoset) + self.step_count += 1 + if len(action) > 0: + self.last_pid = self.acting_player_position + + if action in bombs: + self.bomb_num += 1 + self.pos_bomb_num[self.acting_player_position] += 1 + + self.last_move_dict[ + self.acting_player_position] = action.copy() + + self.card_play_action_seq.append((self.acting_player_position, action)) + self.update_acting_player_hand_cards(action) + + self.played_cards[self.acting_player_position] += action + + if self.acting_player_position == 'landlord' and \ + len(action) > 0 and \ + len(self.three_landlord_cards) > 0: + for card in action: + if len(self.three_landlord_cards) > 0: + if card in self.three_landlord_cards: + self.three_landlord_cards.remove(card) + else: + break + + self.game_done() + if not self.game_over: + self.get_acting_player_position() + self.game_infoset = self.get_infoset() + return action + + def get_last_move(self): + last_move = [] + if len(self.card_play_action_seq) != 0: + if len(self.card_play_action_seq[-1][1]) == 0: + last_move = self.card_play_action_seq[-2][1] + else: + last_move = self.card_play_action_seq[-1][1] + + return last_move + + def get_last_two_moves(self): + last_two_moves = [[], []] + for card in self.card_play_action_seq[-2:]: + last_two_moves.insert(0, card[1]) + last_two_moves = last_two_moves[:2] + return last_two_moves + + def get_acting_player_position(self): + if self.acting_player_position is None: + self.acting_player_position = 'landlord' + + else: + if self.acting_player_position == 'landlord': + self.acting_player_position = 'landlord_down' + + elif self.acting_player_position == 'landlord_down': + self.acting_player_position = 'landlord_up' + + else: + self.acting_player_position = 'landlord' + + return self.acting_player_position + + def update_acting_player_hand_cards(self, action): + if action != []: + for card in action: + self.info_sets[ + self.acting_player_position].player_hand_cards.remove(card) + self.info_sets[self.acting_player_position].player_hand_cards.sort() + + def get_legal_card_play_actions(self): + mg = MovesGener( + self.info_sets[self.acting_player_position].player_hand_cards) + + action_sequence = self.card_play_action_seq + + rival_move = [] + if len(action_sequence) != 0: + if len(action_sequence[-1][1]) == 0: + rival_move = action_sequence[-2][1] + else: + rival_move = action_sequence[-1][1] + + rival_type = md.get_move_type(rival_move) + rival_move_type = rival_type['type'] + rival_move_len = rival_type.get('len', 1) + moves = list() + + if rival_move_type == md.TYPE_0_PASS: + moves = mg.gen_moves() + + elif rival_move_type == md.TYPE_1_SINGLE: + all_moves = mg.gen_type_1_single() + moves = ms.filter_type_1_single(all_moves, rival_move) + + elif rival_move_type == md.TYPE_2_PAIR: + all_moves = mg.gen_type_2_pair() + moves = ms.filter_type_2_pair(all_moves, rival_move) + + elif rival_move_type == md.TYPE_3_TRIPLE: + all_moves = mg.gen_type_3_triple() + moves = ms.filter_type_3_triple(all_moves, rival_move) + + elif rival_move_type == md.TYPE_4_BOMB: + all_moves = mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb() + moves = ms.filter_type_4_bomb(all_moves, rival_move) + + elif rival_move_type == md.TYPE_5_KING_BOMB: + moves = [] + + elif rival_move_type == md.TYPE_6_3_1: + all_moves = mg.gen_type_6_3_1() + moves = ms.filter_type_6_3_1(all_moves, rival_move) + + elif rival_move_type == md.TYPE_7_3_2: + all_moves = mg.gen_type_7_3_2() + moves = ms.filter_type_7_3_2(all_moves, rival_move) + + elif rival_move_type == md.TYPE_8_SERIAL_SINGLE: + all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len) + moves = ms.filter_type_8_serial_single(all_moves, rival_move) + + elif rival_move_type == md.TYPE_9_SERIAL_PAIR: + all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len) + moves = ms.filter_type_9_serial_pair(all_moves, rival_move) + + elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE: + all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len) + moves = ms.filter_type_10_serial_triple(all_moves, rival_move) + + elif rival_move_type == md.TYPE_11_SERIAL_3_1: + all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len) + moves = ms.filter_type_11_serial_3_1(all_moves, rival_move) + + elif rival_move_type == md.TYPE_12_SERIAL_3_2: + all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len) + moves = ms.filter_type_12_serial_3_2(all_moves, rival_move) + + elif rival_move_type == md.TYPE_13_4_2: + all_moves = mg.gen_type_13_4_2() + moves = ms.filter_type_13_4_2(all_moves, rival_move) + + elif rival_move_type == md.TYPE_14_4_22: + all_moves = mg.gen_type_14_4_22() + moves = ms.filter_type_14_4_22(all_moves, rival_move) + + if rival_move_type not in [md.TYPE_0_PASS, + md.TYPE_4_BOMB, md.TYPE_5_KING_BOMB]: + moves = moves + mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb() + + if len(rival_move) != 0: # rival_move is not 'pass' + moves = moves + [[]] + + for m in moves: + m.sort() + + return moves + + def reset(self): + self.card_play_action_seq = [] + + self.three_landlord_cards = None + self.game_over = False + + self.acting_player_position = None + self.player_utility_dict = None + + self.last_move_dict = {'landlord': [], + 'landlord_up': [], + 'landlord_down': []} + + self.played_cards = {'landlord': [], + 'landlord_up': [], + 'landlord_down': []} + + self.last_move = [] + self.last_two_moves = [] + + self.info_sets = {'landlord': InfoSet('landlord'), + 'landlord_up': InfoSet('landlord_up'), + 'landlord_down': InfoSet('landlord_down')} + + self.bomb_num = 0 + self.pos_bomb_num = { + "landlord": 0, + "landlord_up": 0, + "landlord_down": 0 + } + self.last_pid = 'landlord' + self.bid_info = [[-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1]] + self.bid_count = 0 + self.multiply_count = {'landlord': 0, + 'landlord_up': 0, + 'landlord_down': 0} + self.step_count = 0 + + def get_infoset(self): + self.info_sets[ + self.acting_player_position].last_pid = self.last_pid + + self.info_sets[ + self.acting_player_position].legal_actions = \ + self.get_legal_card_play_actions() + + self.info_sets[ + self.acting_player_position].bomb_num = self.bomb_num + + self.info_sets[ + self.acting_player_position].last_move = self.get_last_move() + + self.info_sets[ + self.acting_player_position].last_two_moves = self.get_last_two_moves() + + self.info_sets[ + self.acting_player_position].last_move_dict = self.last_move_dict + + self.info_sets[self.acting_player_position].num_cards_left_dict = \ + {pos: len(self.info_sets[pos].player_hand_cards) + for pos in ['landlord', 'landlord_up', 'landlord_down']} + + self.info_sets[self.acting_player_position].other_hand_cards = [] + for pos in ['landlord', 'landlord_up', 'landlord_down']: + if pos != self.acting_player_position: + self.info_sets[ + self.acting_player_position].other_hand_cards += \ + self.info_sets[pos].player_hand_cards + + self.info_sets[self.acting_player_position].played_cards = \ + self.played_cards + self.info_sets[self.acting_player_position].three_landlord_cards = \ + self.three_landlord_cards + self.info_sets[self.acting_player_position].card_play_action_seq = \ + self.card_play_action_seq + + self.info_sets[ + self.acting_player_position].all_handcards = \ + {pos: self.info_sets[pos].player_hand_cards + for pos in ['landlord', 'landlord_up', 'landlord_down']} + + return deepcopy(self.info_sets[self.acting_player_position]) + +class InfoSet(object): + """ + The game state is described as infoset, which + includes all the information in the current situation, + such as the hand cards of the three players, the + historical moves, etc. + """ + def __init__(self, player_position): + # The player position, i.e., landlord, landlord_down, or landlord_up + self.player_position = player_position + # The hand cands of the current player. A list. + self.player_hand_cards = None + # The number of cards left for each player. It is a dict with str-->int + self.num_cards_left_dict = None + # The three landload cards. A list. + self.three_landlord_cards = None + # The historical moves. It is a list of list + self.card_play_action_seq = None + # The union of the hand cards of the other two players for the current player + self.other_hand_cards = None + # The legal actions for the current move. It is a list of list + self.legal_actions = None + # The most recent valid move + self.last_move = None + # The most recent two moves + self.last_two_moves = None + # The last moves for all the postions + self.last_move_dict = None + # The played cands so far. It is a list. + self.played_cards = None + # The hand cards of all the players. It is a dict. + self.all_handcards = None + # Last player position that plays a valid move, i.e., not `pass` + self.last_pid = None + # The number of bombs played so far + self.bomb_num = None + + self.bid_info = [[-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1], + [-1, -1, -1]] + + self.multiply_info = [1, 0, 0] + + self.player_id = None diff --git a/douzero/env/move_detector.py b/douzero/env/move_detector.py new file mode 100644 index 0000000..c61e2b7 --- /dev/null +++ b/douzero/env/move_detector.py @@ -0,0 +1,107 @@ +from douzero.env.utils import * +import collections + +# check if move is a continuous sequence +def is_continuous_seq(move): + i = 0 + while i < len(move) - 1: + if move[i+1] - move[i] != 1: + return False + i += 1 + return True + +# return the type of the move +def get_move_type(move): + move_size = len(move) + move_dict = collections.Counter(move) + + if move_size == 0: + return {'type': TYPE_0_PASS} + + if move_size == 1: + return {'type': TYPE_1_SINGLE, 'rank': move[0]} + + if move_size == 2: + if move[0] == move[1]: + return {'type': TYPE_2_PAIR, 'rank': move[0]} + elif move == [20, 30]: # Kings + return {'type': TYPE_5_KING_BOMB} + else: + return {'type': TYPE_15_WRONG} + + if move_size == 3: + if len(move_dict) == 1: + return {'type': TYPE_3_TRIPLE, 'rank': move[0]} + else: + return {'type': TYPE_15_WRONG} + + if move_size == 4: + if len(move_dict) == 1: + return {'type': TYPE_4_BOMB, 'rank': move[0]} + elif len(move_dict) == 2: + if move[0] == move[1] == move[2] or move[1] == move[2] == move[3]: + return {'type': TYPE_6_3_1, 'rank': move[1]} + else: + return {'type': TYPE_15_WRONG} + else: + return {'type': TYPE_15_WRONG} + + if is_continuous_seq(move): + return {'type': TYPE_8_SERIAL_SINGLE, 'rank': move[0], 'len': len(move)} + + if move_size == 5: + if len(move_dict) == 2: + return {'type': TYPE_7_3_2, 'rank': move[2]} + else: + return {'type': TYPE_15_WRONG} + + count_dict = collections.defaultdict(int) + for c, n in move_dict.items(): + count_dict[n] += 1 + + if move_size == 6: + if (len(move_dict) == 2 or len(move_dict) == 3) and count_dict.get(4) == 1 and \ + (count_dict.get(2) == 1 or count_dict.get(1) == 2): + return {'type': TYPE_13_4_2, 'rank': move[2]} + + if move_size == 8 and (((len(move_dict) == 3 or len(move_dict) == 2) and + (count_dict.get(4) == 1 and count_dict.get(2) == 2)) or count_dict.get(4) == 2): + return {'type': TYPE_14_4_22, 'rank': max([c for c, n in move_dict.items() if n == 4])} + + mdkeys = sorted(move_dict.keys()) + if len(move_dict) == count_dict.get(2) and is_continuous_seq(mdkeys): + return {'type': TYPE_9_SERIAL_PAIR, 'rank': mdkeys[0], 'len': len(mdkeys)} + + if len(move_dict) == count_dict.get(3) and is_continuous_seq(mdkeys): + return {'type': TYPE_10_SERIAL_TRIPLE, 'rank': mdkeys[0], 'len': len(mdkeys)} + + # Check Type 11 (serial 3+1) and Type 12 (serial 3+2) + if count_dict.get(3, 0) >= MIN_TRIPLES: + serial_3 = list() + single = list() + pair = list() + + for k, v in move_dict.items(): + if v == 3: + serial_3.append(k) + elif v == 1: + single.append(k) + elif v == 2: + pair.append(k) + else: # no other possibilities + return {'type': TYPE_15_WRONG} + + serial_3.sort() + if is_continuous_seq(serial_3): + if len(serial_3) == len(single)+len(pair)*2: + return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[0], 'len': len(serial_3)} + if len(serial_3) == len(pair) and len(move_dict) == len(serial_3) * 2: + return {'type': TYPE_12_SERIAL_3_2, 'rank': serial_3[0], 'len': len(serial_3)} + + if len(serial_3) == 4: + if is_continuous_seq(serial_3[1:]): + return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[1], 'len': len(serial_3) - 1} + if is_continuous_seq(serial_3[:-1]): + return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[0], 'len': len(serial_3) - 1} + + return {'type': TYPE_15_WRONG} diff --git a/douzero/env/move_generator.py b/douzero/env/move_generator.py new file mode 100644 index 0000000..ecebb6c --- /dev/null +++ b/douzero/env/move_generator.py @@ -0,0 +1,219 @@ +from douzero.env.utils import MIN_SINGLE_CARDS, MIN_PAIRS, MIN_TRIPLES, select +import collections +import itertools + +class MovesGener(object): + """ + This is for generating the possible combinations + """ + def __init__(self, cards_list): + self.cards_list = cards_list + self.cards_dict = collections.defaultdict(int) + + for i in self.cards_list: + self.cards_dict[i] += 1 + + self.single_card_moves = [] + self.gen_type_1_single() + self.pair_moves = [] + self.gen_type_2_pair() + self.triple_cards_moves = [] + self.gen_type_3_triple() + self.bomb_moves = [] + self.gen_type_4_bomb() + self.final_bomb_moves = [] + self.gen_type_5_king_bomb() + + def _gen_serial_moves(self, cards, min_serial, repeat=1, repeat_num=0): + if repeat_num < min_serial: # at least repeat_num is min_serial + repeat_num = 0 + + single_cards = sorted(list(set(cards))) + seq_records = list() + moves = list() + + start = i = 0 + longest = 1 + while i < len(single_cards): + if i + 1 < len(single_cards) and single_cards[i + 1] - single_cards[i] == 1: + longest += 1 + i += 1 + else: + seq_records.append((start, longest)) + i += 1 + start = i + longest = 1 + + for seq in seq_records: + if seq[1] < min_serial: + continue + start, longest = seq[0], seq[1] + longest_list = single_cards[start: start + longest] + + if repeat_num == 0: # No limitation on how many sequences + steps = min_serial + while steps <= longest: + index = 0 + while steps + index <= longest: + target_moves = sorted(longest_list[index: index + steps] * repeat) + moves.append(target_moves) + index += 1 + steps += 1 + + else: # repeat_num > 0 + if longest < repeat_num: + continue + index = 0 + while index + repeat_num <= longest: + target_moves = sorted(longest_list[index: index + repeat_num] * repeat) + moves.append(target_moves) + index += 1 + + return moves + + def gen_type_1_single(self): + self.single_card_moves = [] + for i in set(self.cards_list): + self.single_card_moves.append([i]) + return self.single_card_moves + + def gen_type_2_pair(self): + self.pair_moves = [] + for k, v in self.cards_dict.items(): + if v >= 2: + self.pair_moves.append([k, k]) + return self.pair_moves + + def gen_type_3_triple(self): + self.triple_cards_moves = [] + for k, v in self.cards_dict.items(): + if v >= 3: + self.triple_cards_moves.append([k, k, k]) + return self.triple_cards_moves + + def gen_type_4_bomb(self): + self.bomb_moves = [] + for k, v in self.cards_dict.items(): + if v == 4: + self.bomb_moves.append([k, k, k, k]) + return self.bomb_moves + + def gen_type_5_king_bomb(self): + self.final_bomb_moves = [] + if 20 in self.cards_list and 30 in self.cards_list: + self.final_bomb_moves.append([20, 30]) + return self.final_bomb_moves + + def gen_type_6_3_1(self): + result = [] + for t in self.single_card_moves: + for i in self.triple_cards_moves: + if t[0] != i[0]: + result.append(t+i) + return result + + def gen_type_7_3_2(self): + result = list() + for t in self.pair_moves: + for i in self.triple_cards_moves: + if t[0] != i[0]: + result.append(t+i) + return result + + def gen_type_8_serial_single(self, repeat_num=0): + return self._gen_serial_moves(self.cards_list, MIN_SINGLE_CARDS, repeat=1, repeat_num=repeat_num) + + def gen_type_9_serial_pair(self, repeat_num=0): + single_pairs = list() + for k, v in self.cards_dict.items(): + if v >= 2: + single_pairs.append(k) + + return self._gen_serial_moves(single_pairs, MIN_PAIRS, repeat=2, repeat_num=repeat_num) + + def gen_type_10_serial_triple(self, repeat_num=0): + single_triples = list() + for k, v in self.cards_dict.items(): + if v >= 3: + single_triples.append(k) + + return self._gen_serial_moves(single_triples, MIN_TRIPLES, repeat=3, repeat_num=repeat_num) + + def gen_type_11_serial_3_1(self, repeat_num=0): + serial_3_moves = self.gen_type_10_serial_triple(repeat_num=repeat_num) + serial_3_1_moves = list() + + for s3 in serial_3_moves: # s3 is like [3,3,3,4,4,4] + s3_set = set(s3) + new_cards = [i for i in self.cards_list if i not in s3_set] + + # Get any s3_len items from cards + subcards = select(new_cards, len(s3_set)) + + for i in subcards: + serial_3_1_moves.append(s3 + i) + + return list(k for k, _ in itertools.groupby(serial_3_1_moves)) + + def gen_type_12_serial_3_2(self, repeat_num=0): + serial_3_moves = self.gen_type_10_serial_triple(repeat_num=repeat_num) + serial_3_2_moves = list() + pair_set = sorted([k for k, v in self.cards_dict.items() if v >= 2]) + + for s3 in serial_3_moves: + s3_set = set(s3) + pair_candidates = [i for i in pair_set if i not in s3_set] + + # Get any s3_len items from cards + subcards = select(pair_candidates, len(s3_set)) + for i in subcards: + serial_3_2_moves.append(sorted(s3 + i * 2)) + + return serial_3_2_moves + + def gen_type_13_4_2(self): + four_cards = list() + for k, v in self.cards_dict.items(): + if v == 4: + four_cards.append(k) + + result = list() + for fc in four_cards: + cards_list = [k for k in self.cards_list if k != fc] + subcards = select(cards_list, 2) + for i in subcards: + result.append([fc]*4 + i) + return list(k for k, _ in itertools.groupby(result)) + + def gen_type_14_4_22(self): + four_cards = list() + for k, v in self.cards_dict.items(): + if v == 4: + four_cards.append(k) + + result = list() + for fc in four_cards: + cards_list = [k for k, v in self.cards_dict.items() if k != fc and v>=2] + subcards = select(cards_list, 2) + for i in subcards: + result.append([fc] * 4 + [i[0], i[0], i[1], i[1]]) + return result + + # generate all possible moves from given cards + def gen_moves(self): + moves = [] + moves.extend(self.gen_type_1_single()) + moves.extend(self.gen_type_2_pair()) + moves.extend(self.gen_type_3_triple()) + moves.extend(self.gen_type_4_bomb()) + moves.extend(self.gen_type_5_king_bomb()) + moves.extend(self.gen_type_6_3_1()) + moves.extend(self.gen_type_7_3_2()) + moves.extend(self.gen_type_8_serial_single()) + moves.extend(self.gen_type_9_serial_pair()) + moves.extend(self.gen_type_10_serial_triple()) + moves.extend(self.gen_type_11_serial_3_1()) + moves.extend(self.gen_type_12_serial_3_2()) + moves.extend(self.gen_type_13_4_2()) + moves.extend(self.gen_type_14_4_22()) + return moves diff --git a/douzero/env/move_selector.py b/douzero/env/move_selector.py new file mode 100644 index 0000000..61b75f8 --- /dev/null +++ b/douzero/env/move_selector.py @@ -0,0 +1,106 @@ +# return all moves that can beat rivals, moves and rival_move should be same type +import collections + +def common_handle(moves, rival_move): + new_moves = list() + for move in moves: + if move[0] > rival_move[0]: + new_moves.append(move) + return new_moves + +def filter_type_1_single(moves, rival_move): + return common_handle(moves, rival_move) + + +def filter_type_2_pair(moves, rival_move): + return common_handle(moves, rival_move) + + +def filter_type_3_triple(moves, rival_move): + return common_handle(moves, rival_move) + + +def filter_type_4_bomb(moves, rival_move): + return common_handle(moves, rival_move) + +# No need to filter for type_5_king_bomb + +def filter_type_6_3_1(moves, rival_move): + rival_move.sort() + rival_rank = rival_move[1] + new_moves = list() + for move in moves: + move.sort() + my_rank = move[1] + if my_rank > rival_rank: + new_moves.append(move) + return new_moves + +def filter_type_7_3_2(moves, rival_move): + rival_move.sort() + rival_rank = rival_move[2] + new_moves = list() + for move in moves: + move.sort() + my_rank = move[2] + if my_rank > rival_rank: + new_moves.append(move) + return new_moves + +def filter_type_8_serial_single(moves, rival_move): + return common_handle(moves, rival_move) + +def filter_type_9_serial_pair(moves, rival_move): + return common_handle(moves, rival_move) + +def filter_type_10_serial_triple(moves, rival_move): + return common_handle(moves, rival_move) + +def filter_type_11_serial_3_1(moves, rival_move): + rival = collections.Counter(rival_move) + rival_rank = max([k for k, v in rival.items() if v == 3]) + new_moves = list() + for move in moves: + mymove = collections.Counter(move) + my_rank = max([k for k, v in mymove.items() if v == 3]) + if my_rank > rival_rank: + new_moves.append(move) + return new_moves + +def filter_type_12_serial_3_2(moves, rival_move): + rival = collections.Counter(rival_move) + rival_rank = max([k for k, v in rival.items() if v == 3]) + new_moves = list() + for move in moves: + mymove = collections.Counter(move) + my_rank = max([k for k, v in mymove.items() if v == 3]) + if my_rank > rival_rank: + new_moves.append(move) + return new_moves + +def filter_type_13_4_2(moves, rival_move): + rival_move.sort() + rival_rank = rival_move[2] + new_moves = list() + for move in moves: + move.sort() + my_rank = move[2] + if my_rank > rival_rank: + new_moves.append(move) + return new_moves + +def filter_type_14_4_22(moves, rival_move): + rival = collections.Counter(rival_move) + rival_rank = my_rank = 0 + for k, v in rival.items(): + if v == 4: + rival_rank = k + new_moves = list() + for move in moves: + mymove = collections.Counter(move) + for k, v in mymove.items(): + if v == 4: + my_rank = k + if my_rank > rival_rank: + new_moves.append(move) + return new_moves diff --git a/douzero/env/utils.py b/douzero/env/utils.py new file mode 100644 index 0000000..c3a2be7 --- /dev/null +++ b/douzero/env/utils.py @@ -0,0 +1,33 @@ +import itertools + +# global parameters +MIN_SINGLE_CARDS = 5 +MIN_PAIRS = 3 +MIN_TRIPLES = 2 + +# action types +TYPE_0_PASS = 0 +TYPE_1_SINGLE = 1 +TYPE_2_PAIR = 2 +TYPE_3_TRIPLE = 3 +TYPE_4_BOMB = 4 +TYPE_5_KING_BOMB = 5 +TYPE_6_3_1 = 6 +TYPE_7_3_2 = 7 +TYPE_8_SERIAL_SINGLE = 8 +TYPE_9_SERIAL_PAIR = 9 +TYPE_10_SERIAL_TRIPLE = 10 +TYPE_11_SERIAL_3_1 = 11 +TYPE_12_SERIAL_3_2 = 12 +TYPE_13_4_2 = 13 +TYPE_14_4_22 = 14 +TYPE_15_WRONG = 15 + +# betting round action +PASS = 0 +CALL = 1 +RAISE = 2 + +# return all possible results of selecting num cards from cards list +def select(cards, num): + return [list(i) for i in itertools.combinations(cards, num)]