From 3381e96932fe8986886cc24dfa7c1954997f38f8 Mon Sep 17 00:00:00 2001
From: Vincentzyx <929403983@qq.com>
Date: Tue, 7 Sep 2021 17:19:25 +0800
Subject: [PATCH] Env

---
 .gitignore                    |  115 +--
 douzero/env/__init__.py       |    1 +
 douzero/env/env.py            | 1423 +++++++++++++++++++++++++++++++++
 douzero/env/game.py           |  414 ++++++++++
 douzero/env/move_detector.py  |  107 +++
 douzero/env/move_generator.py |  219 +++++
 douzero/env/move_selector.py  |  106 +++
 douzero/env/utils.py          |   33 +
 8 files changed, 2304 insertions(+), 114 deletions(-)
 create mode 100644 douzero/env/__init__.py
 create mode 100644 douzero/env/env.py
 create mode 100644 douzero/env/game.py
 create mode 100644 douzero/env/move_detector.py
 create mode 100644 douzero/env/move_generator.py
 create mode 100644 douzero/env/move_selector.py
 create mode 100644 douzero/env/utils.py

diff --git a/.gitignore b/.gitignore
index 510c73d..7e99e36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,114 +1 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
+*.pyc
\ No newline at end of file
diff --git a/douzero/env/__init__.py b/douzero/env/__init__.py
new file mode 100644
index 0000000..ff52082
--- /dev/null
+++ b/douzero/env/__init__.py
@@ -0,0 +1 @@
+from .env import Env
diff --git a/douzero/env/env.py b/douzero/env/env.py
new file mode 100644
index 0000000..75ab10f
--- /dev/null
+++ b/douzero/env/env.py
@@ -0,0 +1,1423 @@
+from collections import Counter
+import numpy as np
+import random
+import torch
+import BidModel
+
+from douzero.env.game import GameEnv
+
+env_version = "3.2"
+env_url = "http://od.vcccz.com/hechuan/env.py"
+Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
+               11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
+
+NumOnes2Array = {0: np.array([0, 0, 0, 0]),
+                 1: np.array([1, 0, 0, 0]),
+                 2: np.array([1, 1, 0, 0]),
+                 3: np.array([1, 1, 1, 0]),
+                 4: np.array([1, 1, 1, 1])}
+
+deck = []
+for i in range(3, 15):
+    deck.extend([i for _ in range(4)])
+deck.extend([17 for _ in range(4)])
+deck.extend([20, 30])
+
+
+class Env:
+    """
+    Doudizhu multi-agent wrapper
+    """
+
+    def __init__(self, objective):
+        """
+        Objective is wp/adp/logadp. It indicates whether considers
+        bomb in reward calculation. Here, we use dummy agents.
+        This is because, in the orignial game, the players
+        are `in` the game. Here, we want to isolate
+        players and environments to have a more gym style
+        interface. To achieve this, we use dummy players
+        to play. For each move, we tell the corresponding
+        dummy player which action to play, then the player
+        will perform the actual action in the game engine.
+        """
+        self.objective = objective
+
+        # Initialize players
+        # We use three dummy player for the target position
+        self.players = {}
+        for position in ['landlord', 'landlord_up', 'landlord_down']:
+            self.players[position] = DummyAgent(position)
+
+        # Initialize the internal environment
+        self._env = GameEnv(self.players)
+        self.total_round = 0
+        self.force_bid = 0
+        self.infoset = None
+
+    def reset(self, model, device, flags=None):
+        """
+        Every time reset is called, the environment
+        will be re-initialized with a new deck of cards.
+        This function is usually called when a game is over.
+        """
+        self._env.reset()
+
+        # Randomly shuffle the deck
+        if model is None:
+            _deck = deck.copy()
+            np.random.shuffle(_deck)
+            card_play_data = {'landlord': _deck[:20],
+                              'landlord_up': _deck[20:37],
+                              'landlord_down': _deck[37:54],
+                              'three_landlord_cards': _deck[17:20],
+                              }
+            for key in card_play_data:
+                card_play_data[key].sort()
+            self._env.card_play_init(card_play_data)
+            self.infoset = self._game_infoset
+            return get_obs(self.infoset)
+        else:
+            self.total_round += 1
+            bid_done = False
+            card_play_data = []
+            landlord_cards = []
+            last_bid = 0
+            bid_count = 0
+            player_ids = {}
+            bid_info = None
+            bid_obs_buffer = []
+            multiply_obs_buffer = []
+            bid_limit = 3
+            force_bid = False
+            while not bid_done:
+                bid_limit -= 1
+                bid_obs_buffer.clear()
+                multiply_obs_buffer.clear()
+                _deck = deck.copy()
+                np.random.shuffle(_deck)
+                card_play_data = [
+                    _deck[:17],
+                    _deck[17:34],
+                    _deck[34:51],
+                ]
+                for i in range(3):
+                    card_play_data[i].sort()
+                landlord_cards = _deck[51:54]
+                landlord_cards.sort()
+                bid_info = np.array([[-1, -1, -1],
+                                     [-1, -1, -1],
+                                     [-1, -1, -1],
+                                     [-1, -1, -1]])
+                bidding_player = random.randint(0, 2)
+                # bidding_player = 0 # debug
+                first_bid = -1
+                last_bid = -1
+                bid_count = 0
+                if bid_limit <= 0:
+                    force_bid = True
+                for r in range(3):
+                    bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
+                    with torch.no_grad():
+                        action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
+                                               torch.tensor(bidding_obs["x_batch"], device=device), flags=flags)
+                    if bid_limit <= 0:
+                        wr = BidModel.predict_env(card_play_data[bidding_player])
+                        if wr >= 0.7:
+                            action = {"action": 1}  # debug
+                            bid_limit += 1
+
+                    bid_obs_buffer.append({
+                        "x_batch": bidding_obs["x_batch"][action["action"]],
+                        "z_batch": bidding_obs["z_batch"][action["action"]],
+                        "pid": bidding_player
+                    })
+                    if action["action"] == 1:
+                        last_bid = bidding_player
+                        bid_count += 1
+                        if first_bid == -1:
+                            first_bid = bidding_player
+                        for p in range(3):
+                            if p == bidding_player:
+                                bid_info[r][p] = 1
+                            else:
+                                bid_info[r][p] = 0
+                    else:
+                        bid_info[r] = [0, 0, 0]
+                    bidding_player = (bidding_player + 1) % 3
+                one_count = np.count_nonzero(bid_info == 1)
+                if one_count == 0:
+                    continue
+                elif one_count > 1:
+                    r = 3
+                    bidding_player = first_bid
+                    bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
+                    with torch.no_grad():
+                        action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
+                                               torch.tensor(bidding_obs["x_batch"], device=device), flags=flags)
+                    bid_obs_buffer.append({
+                        "x_batch": bidding_obs["x_batch"][action["action"]],
+                        "z_batch": bidding_obs["z_batch"][action["action"]],
+                        "pid": bidding_player
+                    })
+                    if action["action"] == 1:
+                        last_bid = bidding_player
+                        bid_count += 1
+                        for p in range(3):
+                            if p == bidding_player:
+                                bid_info[r][p] = 1
+                            else:
+                                bid_info[r][p] = 0
+                break
+            card_play_data[last_bid].extend(landlord_cards)
+            card_play_data = {'landlord': card_play_data[last_bid],
+                              'landlord_up': card_play_data[(last_bid - 1) % 3],
+                              'landlord_down': card_play_data[(last_bid + 1) % 3],
+                              'three_landlord_cards': landlord_cards,
+                              }
+            card_play_data["landlord"].sort()
+            player_ids = {
+                'landlord': last_bid,
+                'landlord_up': (last_bid - 1) % 3,
+                'landlord_down': (last_bid + 1) % 3,
+            }
+            player_positions = {
+                last_bid: 'landlord',
+                (last_bid - 1) % 3: 'landlord_up',
+                (last_bid + 1) % 3: 'landlord_down'
+            }
+            for bid_obs in bid_obs_buffer:
+                bid_obs.update({"position": player_positions[bid_obs["pid"]]})
+
+            # Initialize the cards
+            self._env.card_play_init(card_play_data)
+            multiply_map = [
+                np.array([1, 0, 0]),
+                np.array([0, 1, 0]),
+                np.array([0, 0, 1])
+            ]
+            for pos in ["landlord", "landlord_up", "landlord_down"]:
+                pid = player_ids[pos]
+                self._env.info_sets[pos].player_id = pid
+                self._env.info_sets[pos].bid_info = bid_info[:, [(pid - 1) % 3, pid, (pid + 1) % 3]]
+                self._env.bid_count = bid_count
+                # multiply_obs = _get_obs_for_multiply(pos, self._env.info_sets[pos].bid_info, card_play_data[pos],
+                #                                      landlord_cards)
+                # action = model.forward(pos, torch.tensor(multiply_obs["z_batch"], device=device),
+                #                        torch.tensor(multiply_obs["x_batch"], device=device), flags=flags)
+                # multiply_obs_buffer.append({
+                #     "x_batch": multiply_obs["x_batch"][action["action"]],
+                #     "z_batch": multiply_obs["z_batch"][action["action"]],
+                #     "position": pos
+                # })
+                action = {"action": 0}
+                self._env.info_sets[pos].multiply_info = multiply_map[action["action"]]
+                self._env.multiply_count[pos] = action["action"]
+            self.infoset = self._game_infoset
+            if force_bid:
+                self.force_bid += 1
+            if self.total_round % 100 == 0:
+                print("发牌情况: %i/%i %.1f%%" % (self.force_bid, self.total_round, self.force_bid / self.total_round * 100))
+                self.force_bid = 0
+                self.total_round = 0
+            return get_obs(self.infoset), {
+                "bid_obs_buffer": bid_obs_buffer,
+                "multiply_obs_buffer": multiply_obs_buffer
+            }
+
+    def step(self, action):
+        """
+        Step function takes as input the action, which
+        is a list of integers, and output the next obervation,
+        reward, and a Boolean variable indicating whether the
+        current game is finished. It also returns an empty
+        dictionary that is reserved to pass useful information.
+        """
+        assert action in self.infoset.legal_actions
+        self.players[self._acting_player_position].set_action(action)
+        self._env.step()
+        self.infoset = self._game_infoset
+        done = False
+        reward = 0.0
+        if self._game_over:
+            done = True
+            reward = {
+                "play": {
+                    "landlord": self._get_reward("landlord"),
+                    "landlord_up": self._get_reward("landlord_up"),
+                    "landlord_down": self._get_reward("landlord_down")
+                },
+                "bid": {
+                    "landlord": self._get_reward_bidding("landlord")*2,
+                    "landlord_up": self._get_reward_bidding("landlord_up"),
+                    "landlord_down": self._get_reward_bidding("landlord_down")
+                }
+            }
+            obs = None
+        else:
+            obs = get_obs(self.infoset)
+        return obs, reward, done, {}
+
+    def _get_reward(self, pos):
+        """
+        This function is called in the end of each
+        game. It returns either 1/-1 for win/loss,
+        or ADP, i.e., every bomb will double the score.
+        """
+        winner = self._game_winner
+        bomb_num = self._game_bomb_num
+        self_bomb_num = self._env.pos_bomb_num[pos]
+        if winner == 'landlord':
+            if self.objective == 'adp':
+                return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8
+            elif self.objective == 'logadp':
+                return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
+            else:
+                return 1.0 - self._env.step_count * 0.0033
+        else:
+            if self.objective == 'adp':
+                return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8
+            elif self.objective == 'logadp':
+                return (-1.0 + self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
+            else:
+                return -1.0 + self._env.step_count * 0.0033
+
+    def _get_reward_bidding(self, pos):
+        """
+        This function is called in the end of each
+        game. It returns either 1/-1 for win/loss,
+        or ADP, i.e., every bomb will double the score.
+        """
+        winner = self._game_winner
+        bomb_num = self._game_bomb_num
+        if winner == 'landlord':
+            return 1.0 * 2**(self._env.bid_count-1) / 8
+        else:
+            return -1.0 * 2**(self._env.bid_count-1) / 8
+
+    @property
+    def _game_infoset(self):
+        """
+        Here, inforset is defined as all the information
+        in the current situation, incuding the hand cards
+        of all the players, all the historical moves, etc.
+        That is, it contains perferfect infomation. Later,
+        we will use functions to extract the observable
+        information from the views of the three players.
+        """
+        return self._env.game_infoset
+
+    @property
+    def _game_bomb_num(self):
+        """
+        The number of bombs played so far. This is used as
+        a feature of the neural network and is also used to
+        calculate ADP.
+        """
+        return self._env.get_bomb_num()
+
+    @property
+    def _game_winner(self):
+        """ A string of landlord/peasants
+        """
+        return self._env.get_winner()
+
+    @property
+    def _acting_player_position(self):
+        """
+        The player that is active. It can be landlord,
+        landlod_down, or landlord_up.
+        """
+        return self._env.acting_player_position
+
+    @property
+    def _game_over(self):
+        """ Returns a Boolean
+        """
+        return self._env.game_over
+
+
+class DummyAgent(object):
+    """
+    Dummy agent is designed to easily interact with the
+    game engine. The agent will first be told what action
+    to perform. Then the environment will call this agent
+    to perform the actual action. This can help us to
+    isolate environment and agents towards a gym like
+    interface.
+    """
+
+    def __init__(self, position):
+        self.position = position
+        self.action = None
+
+    def act(self, infoset):
+        """
+        Simply return the action that is set previously.
+        """
+        assert self.action in infoset.legal_actions
+        return self.action
+
+    def set_action(self, action):
+        """
+        The environment uses this function to tell
+        the dummy agent what to do.
+        """
+        self.action = action
+
+
+def get_obs(infoset, use_general=True):
+    """
+    This function obtains observations with imperfect information
+    from the infoset. It has three branches since we encode
+    different features for different positions.
+
+    This function will return dictionary named `obs`. It contains
+    several fields. These fields will be used to train the model.
+    One can play with those features to improve the performance.
+
+    `position` is a string that can be landlord/landlord_down/landlord_up
+
+    `x_batch` is a batch of features (excluding the hisorical moves).
+    It also encodes the action feature
+
+    `z_batch` is a batch of features with hisorical moves only.
+
+    `legal_actions` is the legal moves
+
+    `x_no_action`: the features (exluding the hitorical moves and
+    the action features). It does not have the batch dim.
+
+    `z`: same as z_batch but not a batch.
+    """
+    if use_general:
+        if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
+            raise ValueError('')
+        return _get_obs_general(infoset, infoset.player_position)
+    else:
+        if infoset.player_position == 'landlord':
+            return _get_obs_landlord(infoset)
+        elif infoset.player_position == 'landlord_up':
+            return _get_obs_landlord_up(infoset)
+        elif infoset.player_position == 'landlord_down':
+            return _get_obs_landlord_down(infoset)
+        else:
+            raise ValueError('')
+
+
+def _get_one_hot_array(num_left_cards, max_num_cards):
+    """
+    A utility function to obtain one-hot endoding
+    """
+    one_hot = np.zeros(max_num_cards)
+    if num_left_cards > 0:
+        one_hot[num_left_cards - 1] = 1
+
+    return one_hot
+
+
+def _cards2array(list_cards):
+    """
+    A utility function that transforms the actions, i.e.,
+    A list of integers into card matrix. Here we remove
+    the six entries that are always zero and flatten the
+    the representations.
+    """
+    if len(list_cards) == 0:
+        return np.zeros(54, dtype=np.int8)
+
+    matrix = np.zeros([4, 13], dtype=np.int8)
+    jokers = np.zeros(2, dtype=np.int8)
+    counter = Counter(list_cards)
+    for card, num_times in counter.items():
+        if card < 20:
+            matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
+        elif card == 20:
+            jokers[0] = 1
+        elif card == 30:
+            jokers[1] = 1
+    return np.concatenate((matrix.flatten('F'), jokers))
+
+
+# def _action_seq_list2array(action_seq_list):
+#     """
+#     A utility function to encode the historical moves.
+#     We encode the historical 15 actions. If there is
+#     no 15 actions, we pad the features with 0. Since
+#     three moves is a round in DouDizhu, we concatenate
+#     the representations for each consecutive three moves.
+#     Finally, we obtain a 5x162 matrix, which will be fed
+#     into LSTM for encoding.
+#     """
+#     action_seq_array = np.zeros((len(action_seq_list), 54))
+#     for row, list_cards in enumerate(action_seq_list):
+#         action_seq_array[row, :] = _cards2array(list_cards)
+#     # action_seq_array = action_seq_array.reshape(5, 162)
+#     return action_seq_array
+
+def _action_seq_list2array(action_seq_list, new_model=True):
+    """
+    A utility function to encode the historical moves.
+    We encode the historical 15 actions. If there is
+    no 15 actions, we pad the features with 0. Since
+    three moves is a round in DouDizhu, we concatenate
+    the representations for each consecutive three moves.
+    Finally, we obtain a 5x162 matrix, which will be fed
+    into LSTM for encoding.
+    """
+
+    if new_model:
+        position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2}
+        action_seq_array = np.ones((len(action_seq_list), 54)) * -1  # Default Value -1 for not using area
+        for row, list_cards in enumerate(action_seq_list):
+            if list_cards != []:
+                action_seq_array[row, :54] = _cards2array(list_cards[1])
+    else:
+        action_seq_array = np.zeros((len(action_seq_list), 54))
+        for row, list_cards in enumerate(action_seq_list):
+            if list_cards != []:
+                action_seq_array[row, :] = _cards2array(list_cards[1])
+        action_seq_array = action_seq_array.reshape(5, 162)
+    return action_seq_array
+
+    # action_seq_array = np.zeros((len(action_seq_list), 54))
+    # for row, list_cards in enumerate(action_seq_list):
+    #     if list_cards != []:
+    #         action_seq_array[row, :] = _cards2array(list_cards[1])
+    # return action_seq_array
+
+
+def _process_action_seq(sequence, length=15, new_model=True):
+    """
+    A utility function encoding historical moves. We
+    encode 15 moves. If there is no 15 moves, we pad
+    with zeros.
+    """
+    sequence = sequence[-length:].copy()
+    if new_model:
+        sequence = sequence[::-1]
+    if len(sequence) < length:
+        empty_sequence = [[] for _ in range(length - len(sequence))]
+        empty_sequence.extend(sequence)
+        sequence = empty_sequence
+    return sequence
+
+
+def _get_one_hot_bomb(bomb_num):
+    """
+    A utility function to encode the number of bombs
+    into one-hot representation.
+    """
+    one_hot = np.zeros(15)
+    one_hot[bomb_num] = 1
+    return one_hot
+
+
+def _get_obs_landlord(infoset):
+    """
+    Obttain the landlord features. See Table 4 in
+    https://arxiv.org/pdf/2106.06135.pdf
+    """
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    landlord_up_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((my_handcards_batch,
+                         other_handcards_batch,
+                         last_action_batch,
+                         landlord_up_played_cards_batch,
+                         landlord_down_played_cards_batch,
+                         landlord_up_num_cards_left_batch,
+                         landlord_down_num_cards_left_batch,
+                         bomb_num_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((my_handcards,
+                             other_handcards,
+                             last_action,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 15, False), False)
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': 'landlord',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def _get_obs_landlord_up(infoset):
+    """
+    Obttain the landlord_up features. See Table 5 in
+    https://arxiv.org/pdf/2106.06135.pdf
+    """
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    last_landlord_action = _cards2array(
+        infoset.last_move_dict['landlord'])
+    last_landlord_action_batch = np.repeat(
+        last_landlord_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    last_teammate_action = _cards2array(
+        infoset.last_move_dict['landlord_down'])
+    last_teammate_action_batch = np.repeat(
+        last_teammate_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    teammate_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    teammate_num_cards_left_batch = np.repeat(
+        teammate_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    teammate_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    teammate_played_cards_batch = np.repeat(
+        teammate_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((my_handcards_batch,
+                         other_handcards_batch,
+                         landlord_played_cards_batch,
+                         teammate_played_cards_batch,
+                         last_action_batch,
+                         last_landlord_action_batch,
+                         last_teammate_action_batch,
+                         landlord_num_cards_left_batch,
+                         teammate_num_cards_left_batch,
+                         bomb_num_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((my_handcards,
+                             other_handcards,
+                             landlord_played_cards,
+                             teammate_played_cards,
+                             last_action,
+                             last_landlord_action,
+                             last_teammate_action,
+                             landlord_num_cards_left,
+                             teammate_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 15, False), False)
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': 'landlord_up',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def _get_obs_landlord_down(infoset):
+    """
+    Obttain the landlord_down features. See Table 5 in
+    https://arxiv.org/pdf/2106.06135.pdf
+    """
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    last_landlord_action = _cards2array(
+        infoset.last_move_dict['landlord'])
+    last_landlord_action_batch = np.repeat(
+        last_landlord_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    last_teammate_action = _cards2array(
+        infoset.last_move_dict['landlord_up'])
+    last_teammate_action_batch = np.repeat(
+        last_teammate_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    teammate_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    teammate_num_cards_left_batch = np.repeat(
+        teammate_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    teammate_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    teammate_played_cards_batch = np.repeat(
+        teammate_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((my_handcards_batch,
+                         other_handcards_batch,
+                         landlord_played_cards_batch,
+                         teammate_played_cards_batch,
+                         last_action_batch,
+                         last_landlord_action_batch,
+                         last_teammate_action_batch,
+                         landlord_num_cards_left_batch,
+                         teammate_num_cards_left_batch,
+                         bomb_num_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((my_handcards,
+                             other_handcards,
+                             landlord_played_cards,
+                             teammate_played_cards,
+                             last_action,
+                             last_landlord_action,
+                             last_teammate_action,
+                             landlord_num_cards_left,
+                             teammate_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 15, False), False)
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': 'landlord_down',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def _get_obs_landlord_withbid(infoset):
+    """
+    Obttain the landlord features. See Table 4 in
+    https://arxiv.org/pdf/2106.06135.pdf
+    """
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    landlord_up_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((my_handcards_batch,
+                         other_handcards_batch,
+                         last_action_batch,
+                         landlord_up_played_cards_batch,
+                         landlord_down_played_cards_batch,
+                         landlord_up_num_cards_left_batch,
+                         landlord_down_num_cards_left_batch,
+                         bomb_num_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((my_handcards,
+                             other_handcards,
+                             last_action,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 15, False), False)
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': 'landlord',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+
+def _get_obs_general1(infoset, position):
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_map = {
+        "landlord": [1, 0, 0],
+        "landlord_up": [0, 1, 0],
+        "landlord_down": [0, 0, 1]
+    }
+    position_info = np.array(position_map[position])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_info = np.array(infoset.bid_info).flatten()
+    bid_info_batch = np.repeat(bid_info[np.newaxis, :],
+                               num_legal_actions, axis=0)
+
+    multiply_info = np.array(infoset.multiply_info)
+    multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    three_landlord_cards = _cards2array(infoset.three_landlord_cards)
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    other_handcards_left_list = []
+    for pos in ["landlord", "landlord_up", "landlord_up"]:
+        if pos != position:
+            other_handcards_left_list.extend(infoset.all_handcards[pos])
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((position_info_batch,  # 3
+                         my_handcards_batch,  # 54
+                         other_handcards_batch,  # 54
+                         three_landlord_cards_batch,  # 54
+                         last_action_batch,  # 54
+                         landlord_played_cards_batch,  # 54
+                         landlord_up_played_cards_batch,  # 54
+                         landlord_down_played_cards_batch,  # 54
+                         landlord_num_cards_left_batch,  # 20
+                         landlord_up_num_cards_left_batch,  # 17
+                         landlord_down_num_cards_left_batch,  # 17
+                         bomb_num_batch,  # 15
+                         bid_info_batch,  # 12
+                         multiply_info_batch, # 3
+                         my_action_batch))  # 54
+    x_no_action = np.hstack((position_info,
+                             my_handcards,
+                             other_handcards,
+                             three_landlord_cards,
+                             last_action,
+                             landlord_played_cards,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_num_cards_left,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num,
+                             bid_info,
+                             multiply_info))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 32))
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': position,
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def _get_obs_general(infoset, position):
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_map = {
+        "landlord": [1, 0, 0],
+        "landlord_up": [0, 1, 0],
+        "landlord_down": [0, 0, 1]
+    }
+    position_info = np.array(position_map[position])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_info = np.array(infoset.bid_info).flatten()
+    bid_info_batch = np.repeat(bid_info[np.newaxis, :],
+                               num_legal_actions, axis=0)
+
+    multiply_info = np.array(infoset.multiply_info)
+    multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    three_landlord_cards = _cards2array(infoset.three_landlord_cards)
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    other_handcards_left_list = []
+    for pos in ["landlord", "landlord_up", "landlord_up"]:
+        if pos != position:
+            other_handcards_left_list.extend(infoset.all_handcards[pos])
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+    num_cards_left = np.hstack((
+                         landlord_num_cards_left,  # 20
+                         landlord_up_num_cards_left,  # 17
+                         landlord_down_num_cards_left))
+
+    x_batch = np.hstack((
+                         bid_info_batch,  # 12
+                         multiply_info_batch))  # 3
+    x_no_action = np.hstack((
+                             bid_info,
+                             multiply_info))
+    z =np.vstack((
+                  num_cards_left,
+                  my_handcards,  # 54
+                  other_handcards,  # 54
+                  three_landlord_cards,  # 54
+                  landlord_played_cards,  # 54
+                  landlord_up_played_cards,  # 54
+                  landlord_down_played_cards,  # 54
+                  _action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32))
+                  ))
+
+    _z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    my_action_batch = my_action_batch[:,np.newaxis,:]
+    z_batch = np.zeros([len(_z_batch),40,54],int)
+    for i in range(0,len(_z_batch)):
+        z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
+    obs = {
+        'position': position,
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def gen_bid_legal_actions(player_id, bid_info):
+    self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]]
+    curr_round = -1
+    for r in range(4):
+        if -1 in self_bid_info[r]:
+            curr_round = r
+            break
+    bid_actions = []
+    if curr_round != -1:
+        self_bid_info[curr_round] = [0, 0, 0]
+        bid_actions.append(np.array(self_bid_info).flatten())
+        self_bid_info[curr_round] = [0, 1, 0]
+        bid_actions.append(np.array(self_bid_info).flatten())
+    return np.array(bid_actions)
+
+
+def _get_obs_for_bid_legacy(player_id, bid_info, hand_cards):
+    all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+                 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+                 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
+    num_legal_actions = 2
+    my_handcards = _cards2array(hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+    other_cards = []
+    other_cards.extend(all_cards)
+    for card in hand_cards:
+        other_cards.remove(card)
+    other_handcards = _cards2array(other_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_info = np.array([0, 0, 0])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_legal_actions = gen_bid_legal_actions(player_id, bid_info)
+    bid_info = bid_legal_actions[0]
+    bid_info_batch = bid_legal_actions
+
+    multiply_info = np.array([0, 0, 0])
+    multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    three_landlord_cards = _cards2array([])
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array([])
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j in range(2):
+        my_action_batch[j, :] = _cards2array([])
+
+    landlord_num_cards_left = _get_one_hot_array(0, 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array([])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array([])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array([])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(0)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((position_info_batch,
+                         my_handcards_batch,
+                         other_handcards_batch,
+                         three_landlord_cards_batch,
+                         last_action_batch,
+                         landlord_played_cards_batch,
+                         landlord_up_played_cards_batch,
+                         landlord_down_played_cards_batch,
+                         landlord_num_cards_left_batch,
+                         landlord_up_num_cards_left_batch,
+                         landlord_down_num_cards_left_batch,
+                         bomb_num_batch,
+                         bid_info_batch,
+                         multiply_info_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((position_info,
+                             my_handcards,
+                             other_handcards,
+                             three_landlord_cards,
+                             last_action,
+                             landlord_played_cards,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_num_cards_left,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq([], 32))
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': "",
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': bid_legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+        "bid_info_batch": bid_info_batch.astype(np.int8),
+        "multiply_info": multiply_info.astype(np.int8)
+    }
+    return obs
+
+def _get_obs_for_bid(player_id, bid_info, hand_cards):
+    all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+                 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+                 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
+    num_legal_actions = 2
+    my_handcards = _cards2array(hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    bid_legal_actions = gen_bid_legal_actions(player_id, bid_info)
+    bid_info = bid_legal_actions[0]
+    bid_info_batch = np.hstack([bid_legal_actions for _ in range(5)])
+
+    x_batch = np.hstack((my_handcards_batch,
+                         bid_info_batch))
+    x_no_action = np.hstack((my_handcards))
+    obs = {
+        'position': "",
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': np.array([0,0]),
+        'legal_actions': bid_legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        "bid_info_batch": bid_info_batch.astype(np.int8)
+    }
+    return obs
+
+def _get_obs_for_multiply(position, bid_info, hand_cards, landlord_cards):
+    all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+                 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+                 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
+    num_legal_actions = 3
+    my_handcards = _cards2array(hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+    other_cards = []
+    other_cards.extend(all_cards)
+    for card in hand_cards:
+        other_cards.remove(card)
+    other_handcards = _cards2array(other_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_map = {
+        "landlord": [1, 0, 0],
+        "landlord_up": [0, 1, 0],
+        "landlord_down": [0, 0, 1]
+    }
+    position_info = np.array(position_map[position])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_info = np.array(bid_info).flatten()
+    bid_info_batch = np.repeat(bid_info[np.newaxis, :],
+                               num_legal_actions, axis=0)
+
+    multiply_info = np.array([0, 0, 0])
+    multiply_info_batch = np.array([[1, 0, 0],
+                                    [0, 1, 0],
+                                    [0, 0, 1]])
+
+    three_landlord_cards = _cards2array(landlord_cards)
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array([])
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j in range(num_legal_actions):
+        my_action_batch[j, :] = _cards2array([])
+
+    landlord_num_cards_left = _get_one_hot_array(0, 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array([])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array([])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array([])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(0)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((position_info_batch,
+                         my_handcards_batch,
+                         other_handcards_batch,
+                         three_landlord_cards_batch,
+                         last_action_batch,
+                         landlord_played_cards_batch,
+                         landlord_up_played_cards_batch,
+                         landlord_down_played_cards_batch,
+                         landlord_num_cards_left_batch,
+                         landlord_up_num_cards_left_batch,
+                         landlord_down_num_cards_left_batch,
+                         bomb_num_batch,
+                         bid_info_batch,
+                         multiply_info_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((position_info,
+                             my_handcards,
+                             other_handcards,
+                             three_landlord_cards,
+                             last_action,
+                             landlord_played_cards,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_num_cards_left,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq([], 32))
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': "",
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': multiply_info_batch,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+        "bid_info": bid_info.astype(np.int8),
+        "multiply_info_batch": multiply_info.astype(np.int8)
+    }
+    return obs
diff --git a/douzero/env/game.py b/douzero/env/game.py
new file mode 100644
index 0000000..f9252f5
--- /dev/null
+++ b/douzero/env/game.py
@@ -0,0 +1,414 @@
+from copy import deepcopy
+from . import move_detector as md, move_selector as ms
+from .move_generator import MovesGener
+import random
+
+EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
+                    8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q',
+                    13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'}
+
+RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
+                    '8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
+                    'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
+
+bombs = [[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6],
+         [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9], [10, 10, 10, 10],
+         [11, 11, 11, 11], [12, 12, 12, 12], [13, 13, 13, 13], [14, 14, 14, 14],
+         [17, 17, 17, 17], [20, 30]]
+
+class GameEnv(object):
+
+    def __init__(self, players):
+
+        self.card_play_action_seq = []
+
+        self.three_landlord_cards = None
+        self.game_over = False
+
+        self.acting_player_position = None
+        self.player_utility_dict = None
+
+        self.players = players
+
+        self.last_move_dict = {'landlord': [],
+                               'landlord_up': [],
+                               'landlord_down': []}
+
+        self.played_cards = {'landlord': [],
+                             'landlord_up': [],
+                             'landlord_down': []}
+
+        self.last_move = []
+        self.last_two_moves = []
+
+        self.num_wins = {'landlord': 0,
+                         'farmer': 0}
+
+        self.num_scores = {'landlord': 0,
+                           'farmer': 0}
+
+        self.info_sets = {'landlord': InfoSet('landlord'),
+                          'landlord_up': InfoSet('landlord_up'),
+                          'landlord_down': InfoSet('landlord_down')}
+
+        self.bomb_num = 0
+        self.pos_bomb_num = {
+            "landlord": 0,
+            "landlord_up": 0,
+            "landlord_down": 0
+        }
+        self.last_pid = 'landlord'
+
+        self.bid_info = [[-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1]]
+        self.bid_count = 0
+        self.multiply_count = {'landlord': 0,
+                               'landlord_up': 0,
+                               'landlord_down': 0}
+        self.step_count = 0
+
+
+    def card_play_init(self, card_play_data):
+        self.info_sets['landlord'].player_hand_cards = \
+            card_play_data['landlord']
+        self.info_sets['landlord_up'].player_hand_cards = \
+            card_play_data['landlord_up']
+        self.info_sets['landlord_down'].player_hand_cards = \
+            card_play_data['landlord_down']
+        self.three_landlord_cards = card_play_data['three_landlord_cards']
+        self.get_acting_player_position()
+        self.game_infoset = self.get_infoset()
+
+
+    def game_done(self):
+        if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
+                len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
+                len(self.info_sets['landlord_down'].player_hand_cards) == 0:
+            # if one of the three players discards his hand,
+            # then game is over.
+            self.compute_player_utility()
+            self.update_num_wins_scores()
+
+            self.game_over = True
+
+    def compute_player_utility(self):
+
+        if len(self.info_sets['landlord'].player_hand_cards) == 0:
+            self.player_utility_dict = {'landlord': 2,
+                                        'farmer': -1}
+        else:
+            self.player_utility_dict = {'landlord': -2,
+                                        'farmer': 1}
+
+    def update_num_wins_scores(self):
+        for pos, utility in self.player_utility_dict.items():
+            base_score = 2 if pos == 'landlord' else 1
+            if utility > 0:
+                self.num_wins[pos] += 1
+                self.winner = pos
+                self.num_scores[pos] += base_score * (2 ** self.bomb_num)
+            else:
+                self.num_scores[pos] -= base_score * (2 ** self.bomb_num)
+
+    def get_winner(self):
+        return self.winner
+
+    def get_bomb_num(self):
+        return self.bomb_num
+
+    def step(self):
+        action = self.players[self.acting_player_position].act(
+            self.game_infoset)
+        self.step_count += 1
+        if len(action) > 0:
+            self.last_pid = self.acting_player_position
+
+        if action in bombs:
+            self.bomb_num += 1
+            self.pos_bomb_num[self.acting_player_position] += 1
+
+        self.last_move_dict[
+            self.acting_player_position] = action.copy()
+
+        self.card_play_action_seq.append((self.acting_player_position, action))
+        self.update_acting_player_hand_cards(action)
+
+        self.played_cards[self.acting_player_position] += action
+
+        if self.acting_player_position == 'landlord' and \
+                len(action) > 0 and \
+                len(self.three_landlord_cards) > 0:
+            for card in action:
+                if len(self.three_landlord_cards) > 0:
+                    if card in self.three_landlord_cards:
+                        self.three_landlord_cards.remove(card)
+                else:
+                    break
+
+        self.game_done()
+        if not self.game_over:
+            self.get_acting_player_position()
+            self.game_infoset = self.get_infoset()
+        return action
+
+    def get_last_move(self):
+        last_move = []
+        if len(self.card_play_action_seq) != 0:
+            if len(self.card_play_action_seq[-1][1]) == 0:
+                last_move = self.card_play_action_seq[-2][1]
+            else:
+                last_move = self.card_play_action_seq[-1][1]
+
+        return last_move
+
+    def get_last_two_moves(self):
+        last_two_moves = [[], []]
+        for card in self.card_play_action_seq[-2:]:
+            last_two_moves.insert(0, card[1])
+            last_two_moves = last_two_moves[:2]
+        return last_two_moves
+
+    def get_acting_player_position(self):
+        if self.acting_player_position is None:
+            self.acting_player_position = 'landlord'
+
+        else:
+            if self.acting_player_position == 'landlord':
+                self.acting_player_position = 'landlord_down'
+
+            elif self.acting_player_position == 'landlord_down':
+                self.acting_player_position = 'landlord_up'
+
+            else:
+                self.acting_player_position = 'landlord'
+
+        return self.acting_player_position
+
+    def update_acting_player_hand_cards(self, action):
+        if action != []:
+            for card in action:
+                self.info_sets[
+                    self.acting_player_position].player_hand_cards.remove(card)
+            self.info_sets[self.acting_player_position].player_hand_cards.sort()
+
+    def get_legal_card_play_actions(self):
+        mg = MovesGener(
+            self.info_sets[self.acting_player_position].player_hand_cards)
+
+        action_sequence = self.card_play_action_seq
+
+        rival_move = []
+        if len(action_sequence) != 0:
+            if len(action_sequence[-1][1]) == 0:
+                rival_move = action_sequence[-2][1]
+            else:
+                rival_move = action_sequence[-1][1]
+
+        rival_type = md.get_move_type(rival_move)
+        rival_move_type = rival_type['type']
+        rival_move_len = rival_type.get('len', 1)
+        moves = list()
+
+        if rival_move_type == md.TYPE_0_PASS:
+            moves = mg.gen_moves()
+
+        elif rival_move_type == md.TYPE_1_SINGLE:
+            all_moves = mg.gen_type_1_single()
+            moves = ms.filter_type_1_single(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_2_PAIR:
+            all_moves = mg.gen_type_2_pair()
+            moves = ms.filter_type_2_pair(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_3_TRIPLE:
+            all_moves = mg.gen_type_3_triple()
+            moves = ms.filter_type_3_triple(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_4_BOMB:
+            all_moves = mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
+            moves = ms.filter_type_4_bomb(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_5_KING_BOMB:
+            moves = []
+
+        elif rival_move_type == md.TYPE_6_3_1:
+            all_moves = mg.gen_type_6_3_1()
+            moves = ms.filter_type_6_3_1(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_7_3_2:
+            all_moves = mg.gen_type_7_3_2()
+            moves = ms.filter_type_7_3_2(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_8_SERIAL_SINGLE:
+            all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len)
+            moves = ms.filter_type_8_serial_single(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_9_SERIAL_PAIR:
+            all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len)
+            moves = ms.filter_type_9_serial_pair(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE:
+            all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
+            moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_11_SERIAL_3_1:
+            all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
+            moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_12_SERIAL_3_2:
+            all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
+            moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_13_4_2:
+            all_moves = mg.gen_type_13_4_2()
+            moves = ms.filter_type_13_4_2(all_moves, rival_move)
+
+        elif rival_move_type == md.TYPE_14_4_22:
+            all_moves = mg.gen_type_14_4_22()
+            moves = ms.filter_type_14_4_22(all_moves, rival_move)
+
+        if rival_move_type not in [md.TYPE_0_PASS,
+                                   md.TYPE_4_BOMB, md.TYPE_5_KING_BOMB]:
+            moves = moves + mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
+
+        if len(rival_move) != 0:  # rival_move is not 'pass'
+            moves = moves + [[]]
+
+        for m in moves:
+            m.sort()
+
+        return moves
+
+    def reset(self):
+        self.card_play_action_seq = []
+
+        self.three_landlord_cards = None
+        self.game_over = False
+
+        self.acting_player_position = None
+        self.player_utility_dict = None
+
+        self.last_move_dict = {'landlord': [],
+                               'landlord_up': [],
+                               'landlord_down': []}
+
+        self.played_cards = {'landlord': [],
+                             'landlord_up': [],
+                             'landlord_down': []}
+
+        self.last_move = []
+        self.last_two_moves = []
+
+        self.info_sets = {'landlord': InfoSet('landlord'),
+                          'landlord_up': InfoSet('landlord_up'),
+                          'landlord_down': InfoSet('landlord_down')}
+
+        self.bomb_num = 0
+        self.pos_bomb_num = {
+            "landlord": 0,
+            "landlord_up": 0,
+            "landlord_down": 0
+        }
+        self.last_pid = 'landlord'
+        self.bid_info = [[-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1]]
+        self.bid_count = 0
+        self.multiply_count = {'landlord': 0,
+                               'landlord_up': 0,
+                               'landlord_down': 0}
+        self.step_count = 0
+
+    def get_infoset(self):
+        self.info_sets[
+            self.acting_player_position].last_pid = self.last_pid
+
+        self.info_sets[
+            self.acting_player_position].legal_actions = \
+            self.get_legal_card_play_actions()
+
+        self.info_sets[
+            self.acting_player_position].bomb_num = self.bomb_num
+
+        self.info_sets[
+            self.acting_player_position].last_move = self.get_last_move()
+
+        self.info_sets[
+            self.acting_player_position].last_two_moves = self.get_last_two_moves()
+
+        self.info_sets[
+            self.acting_player_position].last_move_dict = self.last_move_dict
+
+        self.info_sets[self.acting_player_position].num_cards_left_dict = \
+            {pos: len(self.info_sets[pos].player_hand_cards)
+             for pos in ['landlord', 'landlord_up', 'landlord_down']}
+
+        self.info_sets[self.acting_player_position].other_hand_cards = []
+        for pos in ['landlord', 'landlord_up', 'landlord_down']:
+            if pos != self.acting_player_position:
+                self.info_sets[
+                    self.acting_player_position].other_hand_cards += \
+                    self.info_sets[pos].player_hand_cards
+
+        self.info_sets[self.acting_player_position].played_cards = \
+            self.played_cards
+        self.info_sets[self.acting_player_position].three_landlord_cards = \
+            self.three_landlord_cards
+        self.info_sets[self.acting_player_position].card_play_action_seq = \
+            self.card_play_action_seq
+
+        self.info_sets[
+            self.acting_player_position].all_handcards = \
+            {pos: self.info_sets[pos].player_hand_cards
+             for pos in ['landlord', 'landlord_up', 'landlord_down']}
+
+        return deepcopy(self.info_sets[self.acting_player_position])
+
+class InfoSet(object):
+    """
+    The game state is described as infoset, which
+    includes all the information in the current situation,
+    such as the hand cards of the three players, the
+    historical moves, etc.
+    """
+    def __init__(self, player_position):
+        # The player position, i.e., landlord, landlord_down, or landlord_up
+        self.player_position = player_position
+        # The hand cands of the current player. A list.
+        self.player_hand_cards = None
+        # The number of cards left for each player. It is a dict with str-->int
+        self.num_cards_left_dict = None
+        # The three landload cards. A list.
+        self.three_landlord_cards = None
+        # The historical moves. It is a list of list
+        self.card_play_action_seq = None
+        # The union of the hand cards of the other two players for the current player
+        self.other_hand_cards = None
+        # The legal actions for the current move. It is a list of list
+        self.legal_actions = None
+        # The most recent valid move
+        self.last_move = None
+        # The most recent two moves
+        self.last_two_moves = None
+        # The last moves for all the postions
+        self.last_move_dict = None
+        # The played cands so far. It is a list.
+        self.played_cards = None
+        # The hand cards of all the players. It is a dict.
+        self.all_handcards = None
+        # Last player position that plays a valid move, i.e., not `pass`
+        self.last_pid = None
+        # The number of bombs played so far
+        self.bomb_num = None
+
+        self.bid_info = [[-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1]]
+
+        self.multiply_info = [1, 0, 0]
+
+        self.player_id = None
diff --git a/douzero/env/move_detector.py b/douzero/env/move_detector.py
new file mode 100644
index 0000000..c61e2b7
--- /dev/null
+++ b/douzero/env/move_detector.py
@@ -0,0 +1,107 @@
+from douzero.env.utils import *
+import collections
+
+# check if move is a continuous sequence
+def is_continuous_seq(move):
+    i = 0
+    while i < len(move) - 1:
+        if move[i+1] - move[i] != 1:
+            return False
+        i += 1
+    return True
+
+# return the type of the move
+def get_move_type(move):
+    move_size = len(move)
+    move_dict = collections.Counter(move)
+
+    if move_size == 0:
+        return {'type': TYPE_0_PASS}
+
+    if move_size == 1:
+        return {'type': TYPE_1_SINGLE, 'rank': move[0]}
+
+    if move_size == 2:
+        if move[0] == move[1]:
+            return {'type': TYPE_2_PAIR, 'rank': move[0]}
+        elif move == [20, 30]:  # Kings
+            return {'type': TYPE_5_KING_BOMB}
+        else:
+            return {'type': TYPE_15_WRONG}
+
+    if move_size == 3:
+        if len(move_dict) == 1:
+            return {'type': TYPE_3_TRIPLE, 'rank': move[0]}
+        else:
+            return {'type': TYPE_15_WRONG}
+
+    if move_size == 4:
+        if len(move_dict) == 1:
+            return {'type': TYPE_4_BOMB,  'rank': move[0]}
+        elif len(move_dict) == 2:
+            if move[0] == move[1] == move[2] or move[1] == move[2] == move[3]:
+                return {'type': TYPE_6_3_1, 'rank': move[1]}
+            else:
+                return {'type': TYPE_15_WRONG}
+        else:
+            return {'type': TYPE_15_WRONG}
+
+    if is_continuous_seq(move):
+        return {'type': TYPE_8_SERIAL_SINGLE, 'rank': move[0], 'len': len(move)}
+
+    if move_size == 5:
+        if len(move_dict) == 2:
+            return {'type': TYPE_7_3_2, 'rank': move[2]}
+        else:
+            return {'type': TYPE_15_WRONG}
+
+    count_dict = collections.defaultdict(int)
+    for c, n in move_dict.items():
+        count_dict[n] += 1
+
+    if move_size == 6:
+        if (len(move_dict) == 2 or len(move_dict) == 3) and count_dict.get(4) == 1 and \
+                (count_dict.get(2) == 1 or count_dict.get(1) == 2):
+            return {'type': TYPE_13_4_2, 'rank': move[2]}
+
+    if move_size == 8 and (((len(move_dict) == 3 or len(move_dict) == 2) and
+            (count_dict.get(4) == 1 and count_dict.get(2) == 2)) or count_dict.get(4) == 2):
+        return {'type': TYPE_14_4_22, 'rank': max([c for c, n in move_dict.items() if n == 4])}
+
+    mdkeys = sorted(move_dict.keys())
+    if len(move_dict) == count_dict.get(2) and is_continuous_seq(mdkeys):
+        return {'type': TYPE_9_SERIAL_PAIR, 'rank': mdkeys[0], 'len': len(mdkeys)}
+
+    if len(move_dict) == count_dict.get(3) and is_continuous_seq(mdkeys):
+        return {'type': TYPE_10_SERIAL_TRIPLE, 'rank': mdkeys[0], 'len': len(mdkeys)}
+
+    # Check Type 11 (serial 3+1) and Type 12 (serial 3+2)
+    if count_dict.get(3, 0) >= MIN_TRIPLES:
+        serial_3 = list()
+        single = list()
+        pair = list()
+
+        for k, v in move_dict.items():
+            if v == 3:
+                serial_3.append(k)
+            elif v == 1:
+                single.append(k)
+            elif v == 2:
+                pair.append(k)
+            else:  # no other possibilities
+                return {'type': TYPE_15_WRONG}
+
+        serial_3.sort()
+        if is_continuous_seq(serial_3):
+            if len(serial_3) == len(single)+len(pair)*2:
+                return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[0], 'len': len(serial_3)}
+            if len(serial_3) == len(pair) and len(move_dict) == len(serial_3) * 2:
+                return {'type': TYPE_12_SERIAL_3_2, 'rank': serial_3[0], 'len': len(serial_3)}
+
+        if len(serial_3) == 4:
+            if is_continuous_seq(serial_3[1:]):
+                return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[1], 'len': len(serial_3) - 1}
+            if is_continuous_seq(serial_3[:-1]):
+                return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[0], 'len': len(serial_3) - 1}
+
+    return {'type': TYPE_15_WRONG}
diff --git a/douzero/env/move_generator.py b/douzero/env/move_generator.py
new file mode 100644
index 0000000..ecebb6c
--- /dev/null
+++ b/douzero/env/move_generator.py
@@ -0,0 +1,219 @@
+from douzero.env.utils import MIN_SINGLE_CARDS, MIN_PAIRS, MIN_TRIPLES, select
+import collections
+import itertools
+
+class MovesGener(object):
+    """
+    This is for generating the possible combinations
+    """
+    def __init__(self, cards_list):
+        self.cards_list = cards_list
+        self.cards_dict = collections.defaultdict(int)
+
+        for i in self.cards_list:
+            self.cards_dict[i] += 1
+
+        self.single_card_moves = []
+        self.gen_type_1_single()
+        self.pair_moves = []
+        self.gen_type_2_pair()
+        self.triple_cards_moves = []
+        self.gen_type_3_triple()
+        self.bomb_moves = []
+        self.gen_type_4_bomb()
+        self.final_bomb_moves = []
+        self.gen_type_5_king_bomb()
+
+    def _gen_serial_moves(self, cards, min_serial, repeat=1, repeat_num=0):
+        if repeat_num < min_serial:  # at least repeat_num is min_serial
+            repeat_num = 0
+
+        single_cards = sorted(list(set(cards)))
+        seq_records = list()
+        moves = list()
+
+        start = i = 0
+        longest = 1
+        while i < len(single_cards):
+            if i + 1 < len(single_cards) and single_cards[i + 1] - single_cards[i] == 1:
+                longest += 1
+                i += 1
+            else:
+                seq_records.append((start, longest))
+                i += 1
+                start = i
+                longest = 1
+
+        for seq in seq_records:
+            if seq[1] < min_serial:
+                continue
+            start, longest = seq[0], seq[1]
+            longest_list = single_cards[start: start + longest]
+
+            if repeat_num == 0:  # No limitation on how many sequences
+                steps = min_serial
+                while steps <= longest:
+                    index = 0
+                    while steps + index <= longest:
+                        target_moves = sorted(longest_list[index: index + steps] * repeat)
+                        moves.append(target_moves)
+                        index += 1
+                    steps += 1
+
+            else:  # repeat_num > 0
+                if longest < repeat_num:
+                    continue
+                index = 0
+                while index + repeat_num <= longest:
+                    target_moves = sorted(longest_list[index: index + repeat_num] * repeat)
+                    moves.append(target_moves)
+                    index += 1
+
+        return moves
+
+    def gen_type_1_single(self):
+        self.single_card_moves = []
+        for i in set(self.cards_list):
+            self.single_card_moves.append([i])
+        return self.single_card_moves
+
+    def gen_type_2_pair(self):
+        self.pair_moves = []
+        for k, v in self.cards_dict.items():
+            if v >= 2:
+                self.pair_moves.append([k, k])
+        return self.pair_moves
+
+    def gen_type_3_triple(self):
+        self.triple_cards_moves = []
+        for k, v in self.cards_dict.items():
+            if v >= 3:
+                self.triple_cards_moves.append([k, k, k])
+        return self.triple_cards_moves
+
+    def gen_type_4_bomb(self):
+        self.bomb_moves = []
+        for k, v in self.cards_dict.items():
+            if v == 4:
+                self.bomb_moves.append([k, k, k, k])
+        return self.bomb_moves
+
+    def gen_type_5_king_bomb(self):
+        self.final_bomb_moves = []
+        if 20 in self.cards_list and 30 in self.cards_list:
+            self.final_bomb_moves.append([20, 30])
+        return self.final_bomb_moves
+
+    def gen_type_6_3_1(self):
+        result = []
+        for t in self.single_card_moves:
+            for i in self.triple_cards_moves:
+                if t[0] != i[0]:
+                    result.append(t+i)
+        return result
+
+    def gen_type_7_3_2(self):
+        result = list()
+        for t in self.pair_moves:
+            for i in self.triple_cards_moves:
+                if t[0] != i[0]:
+                    result.append(t+i)
+        return result
+
+    def gen_type_8_serial_single(self, repeat_num=0):
+        return self._gen_serial_moves(self.cards_list, MIN_SINGLE_CARDS, repeat=1, repeat_num=repeat_num)
+
+    def gen_type_9_serial_pair(self, repeat_num=0):
+        single_pairs = list()
+        for k, v in self.cards_dict.items():
+            if v >= 2:
+                single_pairs.append(k)
+
+        return self._gen_serial_moves(single_pairs, MIN_PAIRS, repeat=2, repeat_num=repeat_num)
+
+    def gen_type_10_serial_triple(self, repeat_num=0):
+        single_triples = list()
+        for k, v in self.cards_dict.items():
+            if v >= 3:
+                single_triples.append(k)
+
+        return self._gen_serial_moves(single_triples, MIN_TRIPLES, repeat=3, repeat_num=repeat_num)
+
+    def gen_type_11_serial_3_1(self, repeat_num=0):
+        serial_3_moves = self.gen_type_10_serial_triple(repeat_num=repeat_num)
+        serial_3_1_moves = list()
+
+        for s3 in serial_3_moves:  # s3 is like [3,3,3,4,4,4]
+            s3_set = set(s3)
+            new_cards = [i for i in self.cards_list if i not in s3_set]
+
+            # Get any s3_len items from cards
+            subcards = select(new_cards, len(s3_set))
+
+            for i in subcards:
+                serial_3_1_moves.append(s3 + i)
+
+        return list(k for k, _ in itertools.groupby(serial_3_1_moves))
+
+    def gen_type_12_serial_3_2(self, repeat_num=0):
+        serial_3_moves = self.gen_type_10_serial_triple(repeat_num=repeat_num)
+        serial_3_2_moves = list()
+        pair_set = sorted([k for k, v in self.cards_dict.items() if v >= 2])
+
+        for s3 in serial_3_moves:
+            s3_set = set(s3)
+            pair_candidates = [i for i in pair_set if i not in s3_set]
+
+            # Get any s3_len items from cards
+            subcards = select(pair_candidates, len(s3_set))
+            for i in subcards:
+                serial_3_2_moves.append(sorted(s3 + i * 2))
+
+        return serial_3_2_moves
+
+    def gen_type_13_4_2(self):
+        four_cards = list()
+        for k, v in self.cards_dict.items():
+            if v == 4:
+                four_cards.append(k)
+
+        result = list()
+        for fc in four_cards:
+            cards_list = [k for k in self.cards_list if k != fc]
+            subcards = select(cards_list, 2)
+            for i in subcards:
+                result.append([fc]*4 + i)
+        return list(k for k, _ in itertools.groupby(result))
+
+    def gen_type_14_4_22(self):
+        four_cards = list()
+        for k, v in self.cards_dict.items():
+            if v == 4:
+                four_cards.append(k)
+
+        result = list()
+        for fc in four_cards:
+            cards_list = [k for k, v in self.cards_dict.items() if k != fc and v>=2]
+            subcards = select(cards_list, 2)
+            for i in subcards:
+                result.append([fc] * 4 + [i[0], i[0], i[1], i[1]])
+        return result
+
+    # generate all possible moves from given cards
+    def gen_moves(self):
+        moves = []
+        moves.extend(self.gen_type_1_single())
+        moves.extend(self.gen_type_2_pair())
+        moves.extend(self.gen_type_3_triple())
+        moves.extend(self.gen_type_4_bomb())
+        moves.extend(self.gen_type_5_king_bomb())
+        moves.extend(self.gen_type_6_3_1())
+        moves.extend(self.gen_type_7_3_2())
+        moves.extend(self.gen_type_8_serial_single())
+        moves.extend(self.gen_type_9_serial_pair())
+        moves.extend(self.gen_type_10_serial_triple())
+        moves.extend(self.gen_type_11_serial_3_1())
+        moves.extend(self.gen_type_12_serial_3_2())
+        moves.extend(self.gen_type_13_4_2())
+        moves.extend(self.gen_type_14_4_22())
+        return moves
diff --git a/douzero/env/move_selector.py b/douzero/env/move_selector.py
new file mode 100644
index 0000000..61b75f8
--- /dev/null
+++ b/douzero/env/move_selector.py
@@ -0,0 +1,106 @@
+# return all moves that can beat rivals, moves and rival_move should be same type
+import collections
+
+def common_handle(moves, rival_move):
+    new_moves = list()
+    for move in moves:
+        if move[0] > rival_move[0]:
+            new_moves.append(move)
+    return new_moves
+
+def filter_type_1_single(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+
+def filter_type_2_pair(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+
+def filter_type_3_triple(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+
+def filter_type_4_bomb(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+# No need to filter for type_5_king_bomb
+
+def filter_type_6_3_1(moves, rival_move):
+    rival_move.sort()
+    rival_rank = rival_move[1]
+    new_moves = list()
+    for move in moves:
+        move.sort()
+        my_rank = move[1]
+        if my_rank > rival_rank:
+            new_moves.append(move)
+    return new_moves
+
+def filter_type_7_3_2(moves, rival_move):
+    rival_move.sort()
+    rival_rank = rival_move[2]
+    new_moves = list()
+    for move in moves:
+        move.sort()
+        my_rank = move[2]
+        if my_rank > rival_rank:
+            new_moves.append(move)
+    return new_moves
+
+def filter_type_8_serial_single(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+def filter_type_9_serial_pair(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+def filter_type_10_serial_triple(moves, rival_move):
+    return common_handle(moves, rival_move)
+
+def filter_type_11_serial_3_1(moves, rival_move):
+    rival = collections.Counter(rival_move)
+    rival_rank = max([k for k, v in rival.items() if v == 3])
+    new_moves = list()
+    for move in moves:
+        mymove = collections.Counter(move)
+        my_rank = max([k for k, v in mymove.items() if v == 3])
+        if my_rank > rival_rank:
+            new_moves.append(move)
+    return new_moves
+
+def filter_type_12_serial_3_2(moves, rival_move):
+    rival = collections.Counter(rival_move)
+    rival_rank = max([k for k, v in rival.items() if v == 3])
+    new_moves = list()
+    for move in moves:
+        mymove = collections.Counter(move)
+        my_rank = max([k for k, v in mymove.items() if v == 3])
+        if my_rank > rival_rank:
+            new_moves.append(move)
+    return new_moves
+
+def filter_type_13_4_2(moves, rival_move):
+    rival_move.sort()
+    rival_rank = rival_move[2]
+    new_moves = list()
+    for move in moves:
+        move.sort()
+        my_rank = move[2]
+        if my_rank > rival_rank:
+            new_moves.append(move)
+    return new_moves
+
+def filter_type_14_4_22(moves, rival_move):
+    rival = collections.Counter(rival_move)
+    rival_rank = my_rank = 0
+    for k, v in rival.items():
+        if v == 4:
+            rival_rank = k
+    new_moves = list()
+    for move in moves:
+        mymove = collections.Counter(move)
+        for k, v in mymove.items():
+            if v == 4:
+                my_rank = k
+        if my_rank > rival_rank:
+            new_moves.append(move)
+    return new_moves
diff --git a/douzero/env/utils.py b/douzero/env/utils.py
new file mode 100644
index 0000000..c3a2be7
--- /dev/null
+++ b/douzero/env/utils.py
@@ -0,0 +1,33 @@
+import itertools
+
+# global parameters
+MIN_SINGLE_CARDS = 5
+MIN_PAIRS = 3
+MIN_TRIPLES = 2
+
+# action types
+TYPE_0_PASS = 0
+TYPE_1_SINGLE = 1
+TYPE_2_PAIR = 2
+TYPE_3_TRIPLE = 3
+TYPE_4_BOMB = 4
+TYPE_5_KING_BOMB = 5
+TYPE_6_3_1 = 6
+TYPE_7_3_2 = 7
+TYPE_8_SERIAL_SINGLE = 8
+TYPE_9_SERIAL_PAIR = 9
+TYPE_10_SERIAL_TRIPLE = 10
+TYPE_11_SERIAL_3_1 = 11
+TYPE_12_SERIAL_3_2 = 12
+TYPE_13_4_2 = 13
+TYPE_14_4_22 = 14
+TYPE_15_WRONG = 15
+
+# betting round action
+PASS = 0
+CALL = 1
+RAISE = 2
+
+# return all possible results of selecting num cards from cards list
+def select(cards, num):
+    return [list(i) for i in itertools.combinations(cards, num)]