Env

2021-09-07 17:19:25 +08:00 · 2021-09-07 17:19:25 +08:00 · 3381e96932
parent e1e727a2f3
commit 3381e96932
8 changed files with 2304 additions and 114 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,114 +1 @@
-# Byte-compiled / optimized / DLL files
+*.pyc
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
--- a/douzero/env/init.py
+++ b/douzero/env/init.py
@ -0,0 +1 @@
 from .env import Env
--- a/douzero/env/env.py
+++ b/douzero/env/env.py
--- a/douzero/env/game.py
+++ b/douzero/env/game.py
@ -0,0 +1,414 @@
 from copy import deepcopy
 from . import move_detector as md, move_selector as ms
 from .move_generator import MovesGener
 import random
 EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
                    8: '8', 9: '9', 10: '10', 11: 'J', 12: 'Q',
                    13: 'K', 14: 'A', 17: '2', 20: 'X', 30: 'D'}
 RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
                    '8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
                    'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
 bombs = [[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6],
         [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9], [10, 10, 10, 10],
         [11, 11, 11, 11], [12, 12, 12, 12], [13, 13, 13, 13], [14, 14, 14, 14],
         [17, 17, 17, 17], [20, 30]]
 class GameEnv(object):
    def __init__(self, players):
        self.card_play_action_seq = []
        self.three_landlord_cards = None
        self.game_over = False
        self.acting_player_position = None
        self.player_utility_dict = None
        self.players = players
        self.last_move_dict = {'landlord': [],
                               'landlord_up': [],
                               'landlord_down': []}
        self.played_cards = {'landlord': [],
                             'landlord_up': [],
                             'landlord_down': []}
        self.last_move = []
        self.last_two_moves = []
        self.num_wins = {'landlord': 0,
                         'farmer': 0}
        self.num_scores = {'landlord': 0,
                           'farmer': 0}
        self.info_sets = {'landlord': InfoSet('landlord'),
                          'landlord_up': InfoSet('landlord_up'),
                          'landlord_down': InfoSet('landlord_down')}
        self.bomb_num = 0
        self.pos_bomb_num = {
            "landlord": 0,
            "landlord_up": 0,
            "landlord_down": 0
        }
        self.last_pid = 'landlord'
        self.bid_info = [[-1, -1, -1],
                         [-1, -1, -1],
                         [-1, -1, -1],
                         [-1, -1, -1]]
        self.bid_count = 0
        self.multiply_count = {'landlord': 0,
                               'landlord_up': 0,
                               'landlord_down': 0}
        self.step_count = 0
    def card_play_init(self, card_play_data):
        self.info_sets['landlord'].player_hand_cards = \
            card_play_data['landlord']
        self.info_sets['landlord_up'].player_hand_cards = \
            card_play_data['landlord_up']
        self.info_sets['landlord_down'].player_hand_cards = \
            card_play_data['landlord_down']
        self.three_landlord_cards = card_play_data['three_landlord_cards']
        self.get_acting_player_position()
        self.game_infoset = self.get_infoset()
    def game_done(self):
        if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
                len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
                len(self.info_sets['landlord_down'].player_hand_cards) == 0:
            # if one of the three players discards his hand,
            # then game is over.
            self.compute_player_utility()
            self.update_num_wins_scores()
            self.game_over = True
    def compute_player_utility(self):
        if len(self.info_sets['landlord'].player_hand_cards) == 0:
            self.player_utility_dict = {'landlord': 2,
                                        'farmer': -1}
        else:
            self.player_utility_dict = {'landlord': -2,
                                        'farmer': 1}
    def update_num_wins_scores(self):
        for pos, utility in self.player_utility_dict.items():
            base_score = 2 if pos == 'landlord' else 1
            if utility > 0:
                self.num_wins[pos] += 1
                self.winner = pos
                self.num_scores[pos] += base_score * (2 ** self.bomb_num)
            else:
                self.num_scores[pos] -= base_score * (2 ** self.bomb_num)
    def get_winner(self):
        return self.winner
    def get_bomb_num(self):
        return self.bomb_num
    def step(self):
        action = self.players[self.acting_player_position].act(
            self.game_infoset)
        self.step_count += 1
        if len(action) > 0:
            self.last_pid = self.acting_player_position
        if action in bombs:
            self.bomb_num += 1
            self.pos_bomb_num[self.acting_player_position] += 1
        self.last_move_dict[
            self.acting_player_position] = action.copy()
        self.card_play_action_seq.append((self.acting_player_position, action))
        self.update_acting_player_hand_cards(action)
        self.played_cards[self.acting_player_position] += action
        if self.acting_player_position == 'landlord' and \
                len(action) > 0 and \
                len(self.three_landlord_cards) > 0:
            for card in action:
                if len(self.three_landlord_cards) > 0:
                    if card in self.three_landlord_cards:
                        self.three_landlord_cards.remove(card)
                else:
                    break
        self.game_done()
        if not self.game_over:
            self.get_acting_player_position()
            self.game_infoset = self.get_infoset()
        return action
    def get_last_move(self):
        last_move = []
        if len(self.card_play_action_seq) != 0:
            if len(self.card_play_action_seq[-1][1]) == 0:
                last_move = self.card_play_action_seq[-2][1]
            else:
                last_move = self.card_play_action_seq[-1][1]
        return last_move
    def get_last_two_moves(self):
        last_two_moves = [[], []]
        for card in self.card_play_action_seq[-2:]:
            last_two_moves.insert(0, card[1])
            last_two_moves = last_two_moves[:2]
        return last_two_moves
    def get_acting_player_position(self):
        if self.acting_player_position is None:
            self.acting_player_position = 'landlord'
        else:
            if self.acting_player_position == 'landlord':
                self.acting_player_position = 'landlord_down'
            elif self.acting_player_position == 'landlord_down':
                self.acting_player_position = 'landlord_up'
            else:
                self.acting_player_position = 'landlord'
        return self.acting_player_position
    def update_acting_player_hand_cards(self, action):
        if action != []:
            for card in action:
                self.info_sets[
                    self.acting_player_position].player_hand_cards.remove(card)
            self.info_sets[self.acting_player_position].player_hand_cards.sort()
    def get_legal_card_play_actions(self):
        mg = MovesGener(
            self.info_sets[self.acting_player_position].player_hand_cards)
        action_sequence = self.card_play_action_seq
        rival_move = []
        if len(action_sequence) != 0:
            if len(action_sequence[-1][1]) == 0:
                rival_move = action_sequence[-2][1]
            else:
                rival_move = action_sequence[-1][1]
        rival_type = md.get_move_type(rival_move)
        rival_move_type = rival_type['type']
        rival_move_len = rival_type.get('len', 1)
        moves = list()
        if rival_move_type == md.TYPE_0_PASS:
            moves = mg.gen_moves()
        elif rival_move_type == md.TYPE_1_SINGLE:
            all_moves = mg.gen_type_1_single()
            moves = ms.filter_type_1_single(all_moves, rival_move)
        elif rival_move_type == md.TYPE_2_PAIR:
            all_moves = mg.gen_type_2_pair()
            moves = ms.filter_type_2_pair(all_moves, rival_move)
        elif rival_move_type == md.TYPE_3_TRIPLE:
            all_moves = mg.gen_type_3_triple()
            moves = ms.filter_type_3_triple(all_moves, rival_move)
        elif rival_move_type == md.TYPE_4_BOMB:
            all_moves = mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
            moves = ms.filter_type_4_bomb(all_moves, rival_move)
        elif rival_move_type == md.TYPE_5_KING_BOMB:
            moves = []
        elif rival_move_type == md.TYPE_6_3_1:
            all_moves = mg.gen_type_6_3_1()
            moves = ms.filter_type_6_3_1(all_moves, rival_move)
        elif rival_move_type == md.TYPE_7_3_2:
            all_moves = mg.gen_type_7_3_2()
            moves = ms.filter_type_7_3_2(all_moves, rival_move)
        elif rival_move_type == md.TYPE_8_SERIAL_SINGLE:
            all_moves = mg.gen_type_8_serial_single(repeat_num=rival_move_len)
            moves = ms.filter_type_8_serial_single(all_moves, rival_move)
        elif rival_move_type == md.TYPE_9_SERIAL_PAIR:
            all_moves = mg.gen_type_9_serial_pair(repeat_num=rival_move_len)
            moves = ms.filter_type_9_serial_pair(all_moves, rival_move)
        elif rival_move_type == md.TYPE_10_SERIAL_TRIPLE:
            all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
            moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
        elif rival_move_type == md.TYPE_11_SERIAL_3_1:
            all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
            moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
        elif rival_move_type == md.TYPE_12_SERIAL_3_2:
            all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
            moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
        elif rival_move_type == md.TYPE_13_4_2:
            all_moves = mg.gen_type_13_4_2()
            moves = ms.filter_type_13_4_2(all_moves, rival_move)
        elif rival_move_type == md.TYPE_14_4_22:
            all_moves = mg.gen_type_14_4_22()
            moves = ms.filter_type_14_4_22(all_moves, rival_move)
        if rival_move_type not in [md.TYPE_0_PASS,
                                   md.TYPE_4_BOMB, md.TYPE_5_KING_BOMB]:
            moves = moves + mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
        if len(rival_move) != 0:  # rival_move is not 'pass'
            moves = moves + [[]]
        for m in moves:
            m.sort()
        return moves
    def reset(self):
        self.card_play_action_seq = []
        self.three_landlord_cards = None
        self.game_over = False
        self.acting_player_position = None
        self.player_utility_dict = None
        self.last_move_dict = {'landlord': [],
                               'landlord_up': [],
                               'landlord_down': []}
        self.played_cards = {'landlord': [],
                             'landlord_up': [],
                             'landlord_down': []}
        self.last_move = []
        self.last_two_moves = []
        self.info_sets = {'landlord': InfoSet('landlord'),
                          'landlord_up': InfoSet('landlord_up'),
                          'landlord_down': InfoSet('landlord_down')}
        self.bomb_num = 0
        self.pos_bomb_num = {
            "landlord": 0,
            "landlord_up": 0,
            "landlord_down": 0
        }
        self.last_pid = 'landlord'
        self.bid_info = [[-1, -1, -1],
                         [-1, -1, -1],
                         [-1, -1, -1],
                         [-1, -1, -1]]
        self.bid_count = 0
        self.multiply_count = {'landlord': 0,
                               'landlord_up': 0,
                               'landlord_down': 0}
        self.step_count = 0
    def get_infoset(self):
        self.info_sets[
            self.acting_player_position].last_pid = self.last_pid
        self.info_sets[
            self.acting_player_position].legal_actions = \
            self.get_legal_card_play_actions()
        self.info_sets[
            self.acting_player_position].bomb_num = self.bomb_num
        self.info_sets[
            self.acting_player_position].last_move = self.get_last_move()
        self.info_sets[
            self.acting_player_position].last_two_moves = self.get_last_two_moves()
        self.info_sets[
            self.acting_player_position].last_move_dict = self.last_move_dict
        self.info_sets[self.acting_player_position].num_cards_left_dict = \
            {pos: len(self.info_sets[pos].player_hand_cards)
             for pos in ['landlord', 'landlord_up', 'landlord_down']}
        self.info_sets[self.acting_player_position].other_hand_cards = []
        for pos in ['landlord', 'landlord_up', 'landlord_down']:
            if pos != self.acting_player_position:
                self.info_sets[
                    self.acting_player_position].other_hand_cards += \
                    self.info_sets[pos].player_hand_cards
        self.info_sets[self.acting_player_position].played_cards = \
            self.played_cards
        self.info_sets[self.acting_player_position].three_landlord_cards = \
            self.three_landlord_cards
        self.info_sets[self.acting_player_position].card_play_action_seq = \
            self.card_play_action_seq
        self.info_sets[
            self.acting_player_position].all_handcards = \
            {pos: self.info_sets[pos].player_hand_cards
             for pos in ['landlord', 'landlord_up', 'landlord_down']}
        return deepcopy(self.info_sets[self.acting_player_position])
 class InfoSet(object):
    """
    The game state is described as infoset, which
    includes all the information in the current situation,
    such as the hand cards of the three players, the
    historical moves, etc.
    """
    def __init__(self, player_position):
        # The player position, i.e., landlord, landlord_down, or landlord_up
        self.player_position = player_position
        # The hand cands of the current player. A list.
        self.player_hand_cards = None
        # The number of cards left for each player. It is a dict with str-->int
        self.num_cards_left_dict = None
        # The three landload cards. A list.
        self.three_landlord_cards = None
        # The historical moves. It is a list of list
        self.card_play_action_seq = None
        # The union of the hand cards of the other two players for the current player
        self.other_hand_cards = None
        # The legal actions for the current move. It is a list of list
        self.legal_actions = None
        # The most recent valid move
        self.last_move = None
        # The most recent two moves
        self.last_two_moves = None
        # The last moves for all the postions
        self.last_move_dict = None
        # The played cands so far. It is a list.
        self.played_cards = None
        # The hand cards of all the players. It is a dict.
        self.all_handcards = None
        # Last player position that plays a valid move, i.e., not `pass`
        self.last_pid = None
        # The number of bombs played so far
        self.bomb_num = None
        self.bid_info = [[-1, -1, -1],
                         [-1, -1, -1],
                         [-1, -1, -1],
                         [-1, -1, -1]]
        self.multiply_info = [1, 0, 0]
        self.player_id = None
--- a/douzero/env/move_detector.py
+++ b/douzero/env/move_detector.py
@ -0,0 +1,107 @@
 from douzero.env.utils import *
 import collections
 # check if move is a continuous sequence
 def is_continuous_seq(move):
    i = 0
    while i < len(move) - 1:
        if move[i+1] - move[i] != 1:
            return False
        i += 1
    return True
 # return the type of the move
 def get_move_type(move):
    move_size = len(move)
    move_dict = collections.Counter(move)
    if move_size == 0:
        return {'type': TYPE_0_PASS}
    if move_size == 1:
        return {'type': TYPE_1_SINGLE, 'rank': move[0]}
    if move_size == 2:
        if move[0] == move[1]:
            return {'type': TYPE_2_PAIR, 'rank': move[0]}
        elif move == [20, 30]:  # Kings
            return {'type': TYPE_5_KING_BOMB}
        else:
            return {'type': TYPE_15_WRONG}
    if move_size == 3:
        if len(move_dict) == 1:
            return {'type': TYPE_3_TRIPLE, 'rank': move[0]}
        else:
            return {'type': TYPE_15_WRONG}
    if move_size == 4:
        if len(move_dict) == 1:
            return {'type': TYPE_4_BOMB,  'rank': move[0]}
        elif len(move_dict) == 2:
            if move[0] == move[1] == move[2] or move[1] == move[2] == move[3]:
                return {'type': TYPE_6_3_1, 'rank': move[1]}
            else:
                return {'type': TYPE_15_WRONG}
        else:
            return {'type': TYPE_15_WRONG}
    if is_continuous_seq(move):
        return {'type': TYPE_8_SERIAL_SINGLE, 'rank': move[0], 'len': len(move)}
    if move_size == 5:
        if len(move_dict) == 2:
            return {'type': TYPE_7_3_2, 'rank': move[2]}
        else:
            return {'type': TYPE_15_WRONG}
    count_dict = collections.defaultdict(int)
    for c, n in move_dict.items():
        count_dict[n] += 1
    if move_size == 6:
        if (len(move_dict) == 2 or len(move_dict) == 3) and count_dict.get(4) == 1 and \
                (count_dict.get(2) == 1 or count_dict.get(1) == 2):
            return {'type': TYPE_13_4_2, 'rank': move[2]}
    if move_size == 8 and (((len(move_dict) == 3 or len(move_dict) == 2) and
            (count_dict.get(4) == 1 and count_dict.get(2) == 2)) or count_dict.get(4) == 2):
        return {'type': TYPE_14_4_22, 'rank': max([c for c, n in move_dict.items() if n == 4])}
    mdkeys = sorted(move_dict.keys())
    if len(move_dict) == count_dict.get(2) and is_continuous_seq(mdkeys):
        return {'type': TYPE_9_SERIAL_PAIR, 'rank': mdkeys[0], 'len': len(mdkeys)}
    if len(move_dict) == count_dict.get(3) and is_continuous_seq(mdkeys):
        return {'type': TYPE_10_SERIAL_TRIPLE, 'rank': mdkeys[0], 'len': len(mdkeys)}
    # Check Type 11 (serial 3+1) and Type 12 (serial 3+2)
    if count_dict.get(3, 0) >= MIN_TRIPLES:
        serial_3 = list()
        single = list()
        pair = list()
        for k, v in move_dict.items():
            if v == 3:
                serial_3.append(k)
            elif v == 1:
                single.append(k)
            elif v == 2:
                pair.append(k)
            else:  # no other possibilities
                return {'type': TYPE_15_WRONG}
        serial_3.sort()
        if is_continuous_seq(serial_3):
            if len(serial_3) == len(single)+len(pair)*2:
                return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[0], 'len': len(serial_3)}
            if len(serial_3) == len(pair) and len(move_dict) == len(serial_3) * 2:
                return {'type': TYPE_12_SERIAL_3_2, 'rank': serial_3[0], 'len': len(serial_3)}
        if len(serial_3) == 4:
            if is_continuous_seq(serial_3[1:]):
                return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[1], 'len': len(serial_3) - 1}
            if is_continuous_seq(serial_3[:-1]):
                return {'type': TYPE_11_SERIAL_3_1, 'rank': serial_3[0], 'len': len(serial_3) - 1}
    return {'type': TYPE_15_WRONG}
--- a/douzero/env/move_generator.py
+++ b/douzero/env/move_generator.py
@ -0,0 +1,219 @@
 from douzero.env.utils import MIN_SINGLE_CARDS, MIN_PAIRS, MIN_TRIPLES, select
 import collections
 import itertools
 class MovesGener(object):
    """
    This is for generating the possible combinations
    """
    def __init__(self, cards_list):
        self.cards_list = cards_list
        self.cards_dict = collections.defaultdict(int)
        for i in self.cards_list:
            self.cards_dict[i] += 1
        self.single_card_moves = []
        self.gen_type_1_single()
        self.pair_moves = []
        self.gen_type_2_pair()
        self.triple_cards_moves = []
        self.gen_type_3_triple()
        self.bomb_moves = []
        self.gen_type_4_bomb()
        self.final_bomb_moves = []
        self.gen_type_5_king_bomb()
    def _gen_serial_moves(self, cards, min_serial, repeat=1, repeat_num=0):
        if repeat_num < min_serial:  # at least repeat_num is min_serial
            repeat_num = 0
        single_cards = sorted(list(set(cards)))
        seq_records = list()
        moves = list()
        start = i = 0
        longest = 1
        while i < len(single_cards):
            if i + 1 < len(single_cards) and single_cards[i + 1] - single_cards[i] == 1:
                longest += 1
                i += 1
            else:
                seq_records.append((start, longest))
                i += 1
                start = i
                longest = 1
        for seq in seq_records:
            if seq[1] < min_serial:
                continue
            start, longest = seq[0], seq[1]
            longest_list = single_cards[start: start + longest]
            if repeat_num == 0:  # No limitation on how many sequences
                steps = min_serial
                while steps <= longest:
                    index = 0
                    while steps + index <= longest:
                        target_moves = sorted(longest_list[index: index + steps] * repeat)
                        moves.append(target_moves)
                        index += 1
                    steps += 1
            else:  # repeat_num > 0
                if longest < repeat_num:
                    continue
                index = 0
                while index + repeat_num <= longest:
                    target_moves = sorted(longest_list[index: index + repeat_num] * repeat)
                    moves.append(target_moves)
                    index += 1
        return moves
    def gen_type_1_single(self):
        self.single_card_moves = []
        for i in set(self.cards_list):
            self.single_card_moves.append([i])
        return self.single_card_moves
    def gen_type_2_pair(self):
        self.pair_moves = []
        for k, v in self.cards_dict.items():
            if v >= 2:
                self.pair_moves.append([k, k])
        return self.pair_moves
    def gen_type_3_triple(self):
        self.triple_cards_moves = []
        for k, v in self.cards_dict.items():
            if v >= 3:
                self.triple_cards_moves.append([k, k, k])
        return self.triple_cards_moves
    def gen_type_4_bomb(self):
        self.bomb_moves = []
        for k, v in self.cards_dict.items():
            if v == 4:
                self.bomb_moves.append([k, k, k, k])
        return self.bomb_moves
    def gen_type_5_king_bomb(self):
        self.final_bomb_moves = []
        if 20 in self.cards_list and 30 in self.cards_list:
            self.final_bomb_moves.append([20, 30])
        return self.final_bomb_moves
    def gen_type_6_3_1(self):
        result = []
        for t in self.single_card_moves:
            for i in self.triple_cards_moves:
                if t[0] != i[0]:
                    result.append(t+i)
        return result
    def gen_type_7_3_2(self):
        result = list()
        for t in self.pair_moves:
            for i in self.triple_cards_moves:
                if t[0] != i[0]:
                    result.append(t+i)
        return result
    def gen_type_8_serial_single(self, repeat_num=0):
        return self._gen_serial_moves(self.cards_list, MIN_SINGLE_CARDS, repeat=1, repeat_num=repeat_num)
    def gen_type_9_serial_pair(self, repeat_num=0):
        single_pairs = list()
        for k, v in self.cards_dict.items():
            if v >= 2:
                single_pairs.append(k)
        return self._gen_serial_moves(single_pairs, MIN_PAIRS, repeat=2, repeat_num=repeat_num)
    def gen_type_10_serial_triple(self, repeat_num=0):
        single_triples = list()
        for k, v in self.cards_dict.items():
            if v >= 3:
                single_triples.append(k)
        return self._gen_serial_moves(single_triples, MIN_TRIPLES, repeat=3, repeat_num=repeat_num)
    def gen_type_11_serial_3_1(self, repeat_num=0):
        serial_3_moves = self.gen_type_10_serial_triple(repeat_num=repeat_num)
        serial_3_1_moves = list()
        for s3 in serial_3_moves:  # s3 is like [3,3,3,4,4,4]
            s3_set = set(s3)
            new_cards = [i for i in self.cards_list if i not in s3_set]
            # Get any s3_len items from cards
            subcards = select(new_cards, len(s3_set))
            for i in subcards:
                serial_3_1_moves.append(s3 + i)
        return list(k for k, _ in itertools.groupby(serial_3_1_moves))
    def gen_type_12_serial_3_2(self, repeat_num=0):
        serial_3_moves = self.gen_type_10_serial_triple(repeat_num=repeat_num)
        serial_3_2_moves = list()
        pair_set = sorted([k for k, v in self.cards_dict.items() if v >= 2])
        for s3 in serial_3_moves:
            s3_set = set(s3)
            pair_candidates = [i for i in pair_set if i not in s3_set]
            # Get any s3_len items from cards
            subcards = select(pair_candidates, len(s3_set))
            for i in subcards:
                serial_3_2_moves.append(sorted(s3 + i * 2))
        return serial_3_2_moves
    def gen_type_13_4_2(self):
        four_cards = list()
        for k, v in self.cards_dict.items():
            if v == 4:
                four_cards.append(k)
        result = list()
        for fc in four_cards:
            cards_list = [k for k in self.cards_list if k != fc]
            subcards = select(cards_list, 2)
            for i in subcards:
                result.append([fc]*4 + i)
        return list(k for k, _ in itertools.groupby(result))
    def gen_type_14_4_22(self):
        four_cards = list()
        for k, v in self.cards_dict.items():
            if v == 4:
                four_cards.append(k)
        result = list()
        for fc in four_cards:
            cards_list = [k for k, v in self.cards_dict.items() if k != fc and v>=2]
            subcards = select(cards_list, 2)
            for i in subcards:
                result.append([fc] * 4 + [i[0], i[0], i[1], i[1]])
        return result
    # generate all possible moves from given cards
    def gen_moves(self):
        moves = []
        moves.extend(self.gen_type_1_single())
        moves.extend(self.gen_type_2_pair())
        moves.extend(self.gen_type_3_triple())
        moves.extend(self.gen_type_4_bomb())
        moves.extend(self.gen_type_5_king_bomb())
        moves.extend(self.gen_type_6_3_1())
        moves.extend(self.gen_type_7_3_2())
        moves.extend(self.gen_type_8_serial_single())
        moves.extend(self.gen_type_9_serial_pair())
        moves.extend(self.gen_type_10_serial_triple())
        moves.extend(self.gen_type_11_serial_3_1())
        moves.extend(self.gen_type_12_serial_3_2())
        moves.extend(self.gen_type_13_4_2())
        moves.extend(self.gen_type_14_4_22())
        return moves
--- a/douzero/env/move_selector.py
+++ b/douzero/env/move_selector.py
@ -0,0 +1,106 @@
 # return all moves that can beat rivals, moves and rival_move should be same type
 import collections
 def common_handle(moves, rival_move):
    new_moves = list()
    for move in moves:
        if move[0] > rival_move[0]:
            new_moves.append(move)
    return new_moves
 def filter_type_1_single(moves, rival_move):
    return common_handle(moves, rival_move)
 def filter_type_2_pair(moves, rival_move):
    return common_handle(moves, rival_move)
 def filter_type_3_triple(moves, rival_move):
    return common_handle(moves, rival_move)
 def filter_type_4_bomb(moves, rival_move):
    return common_handle(moves, rival_move)
 # No need to filter for type_5_king_bomb
 def filter_type_6_3_1(moves, rival_move):
    rival_move.sort()
    rival_rank = rival_move[1]
    new_moves = list()
    for move in moves:
        move.sort()
        my_rank = move[1]
        if my_rank > rival_rank:
            new_moves.append(move)
    return new_moves
 def filter_type_7_3_2(moves, rival_move):
    rival_move.sort()
    rival_rank = rival_move[2]
    new_moves = list()
    for move in moves:
        move.sort()
        my_rank = move[2]
        if my_rank > rival_rank:
            new_moves.append(move)
    return new_moves
 def filter_type_8_serial_single(moves, rival_move):
    return common_handle(moves, rival_move)
 def filter_type_9_serial_pair(moves, rival_move):
    return common_handle(moves, rival_move)
 def filter_type_10_serial_triple(moves, rival_move):
    return common_handle(moves, rival_move)
 def filter_type_11_serial_3_1(moves, rival_move):
    rival = collections.Counter(rival_move)
    rival_rank = max([k for k, v in rival.items() if v == 3])
    new_moves = list()
    for move in moves:
        mymove = collections.Counter(move)
        my_rank = max([k for k, v in mymove.items() if v == 3])
        if my_rank > rival_rank:
            new_moves.append(move)
    return new_moves
 def filter_type_12_serial_3_2(moves, rival_move):
    rival = collections.Counter(rival_move)
    rival_rank = max([k for k, v in rival.items() if v == 3])
    new_moves = list()
    for move in moves:
        mymove = collections.Counter(move)
        my_rank = max([k for k, v in mymove.items() if v == 3])
        if my_rank > rival_rank:
            new_moves.append(move)
    return new_moves
 def filter_type_13_4_2(moves, rival_move):
    rival_move.sort()
    rival_rank = rival_move[2]
    new_moves = list()
    for move in moves:
        move.sort()
        my_rank = move[2]
        if my_rank > rival_rank:
            new_moves.append(move)
    return new_moves
 def filter_type_14_4_22(moves, rival_move):
    rival = collections.Counter(rival_move)
    rival_rank = my_rank = 0
    for k, v in rival.items():
        if v == 4:
            rival_rank = k
    new_moves = list()
    for move in moves:
        mymove = collections.Counter(move)
        for k, v in mymove.items():
            if v == 4:
                my_rank = k
        if my_rank > rival_rank:
            new_moves.append(move)
    return new_moves
--- a/douzero/env/utils.py
+++ b/douzero/env/utils.py
@ -0,0 +1,33 @@
 import itertools
 # global parameters
 MIN_SINGLE_CARDS = 5
 MIN_PAIRS = 3
 MIN_TRIPLES = 2
 # action types
 TYPE_0_PASS = 0
 TYPE_1_SINGLE = 1
 TYPE_2_PAIR = 2
 TYPE_3_TRIPLE = 3
 TYPE_4_BOMB = 4
 TYPE_5_KING_BOMB = 5
 TYPE_6_3_1 = 6
 TYPE_7_3_2 = 7
 TYPE_8_SERIAL_SINGLE = 8
 TYPE_9_SERIAL_PAIR = 9
 TYPE_10_SERIAL_TRIPLE = 10
 TYPE_11_SERIAL_3_1 = 11
 TYPE_12_SERIAL_3_2 = 12
 TYPE_13_4_2 = 13
 TYPE_14_4_22 = 14
 TYPE_15_WRONG = 15
 # betting round action
 PASS = 0
 CALL = 1
 RAISE = 2
 # return all possible results of selecting num cards from cards list
 def select(cards, num):
    return [list(i) for i in itertools.combinations(cards, num)]