Add support for new model types

2021-09-05 01:48:08 +08:00 · 2021-09-05 01:48:08 +08:00 · 8b2b39216a
parent 34db1f5ec0
commit 8b2b39216a
7 changed files with 999 additions and 94 deletions
--- a/GameHelper.py
+++ b/GameHelper.py
@ -165,8 +165,9 @@ class GameHelper:
                self.PicsCV.update({info[0]: imgCv})

    def Screenshot(self, region=None):  # -> (im, (left, top))
+        self.Handle = win32gui.FindWindow("Hlddz", None)
        hwnd = self.Handle
-        # im = Image.open(r"C:\Users\q9294\Desktop\llc.png")
+        # im = Image.open(r"C:\Users\q9294\Desktop\Snipaste_2021-09-05_00-52-51.png")
        # im = im.resize((1796, 1047))
        # return im, (0,0)
        left, top, right, bot = win32gui.GetWindowRect(hwnd)
@ -315,11 +316,14 @@ class GameHelper:
        win32gui.PostMessage(self.Handle, WM_LBUTTONDOWN, MK_LBUTTON, lParam)
        win32gui.PostMessage(self.Handle, WM_LBUTTONUP, MK_LBUTTON, lParam)

-    def SelectCards(self, cards):
+    def SelectCards(self, cards, no_check=False):
+        print("选择牌", cards)
        cards = [card for card in cards]
        tobeSelected = []
        tobeSelected.extend(cards)
        image, windowPos = self.Screenshot()
+        while image.size[0] == 0:
+            image, windowPos = self.Screenshot()
        handCardsInfo, states = self.GetCards(image)
        cardSelectMap = []
        for card in handCardsInfo:
@ -340,11 +344,16 @@ class GameHelper:
            for i in range(0, len(clickMap)):
                if clickMap[i] == 1:
                    self.LeftClick(handCardsInfo[i][1])
+                    print("点击", handCardsInfo[i][1])
                    break
            time.sleep(0.1)
            if self.Interrupt:
                break
+            if no_check:
+                return
            image, _ = self.Screenshot()
+            while image.size[0] == 0:
+                image, windowPos = self.Screenshot()
            states = self.GetCardsState(image)
            clickMap = []
            for i in range(0, len(cardSelectMap)):
@ -352,4 +361,4 @@ class GameHelper:
                    clickMap.append(0)
                else:
                    clickMap.append(1)
-            QtWidgets.QApplication.processEvents(QEventLoop.AllEvents, 10)
+            QtWidgets.QApplication.processEvents(QEventLoop.AllEvents, 10)
--- a/baselines/resnet/把Resnet新模型放在这里
+++ b/baselines/resnet/把Resnet新模型放在这里
--- a/douzero/dmc/models.py
+++ b/douzero/dmc/models.py
@ -7,6 +7,7 @@ import numpy as np

 import torch
 from torch import nn
+import torch.nn.functional as F

 class LandlordLstmModel(nn.Module):
    def __init__(self):
@ -78,22 +79,334 @@ class FarmerLstmModel(nn.Module):
                action = torch.argmax(x,dim=0)[0]
            return dict(action=action)

+class LandlordLstmNewModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.lstm = nn.LSTM(162, 128, batch_first=True)
+        self.dense1 = nn.Linear(373 + 128, 512)
+        self.dense2 = nn.Linear(512, 512)
+        self.dense3 = nn.Linear(512, 512)
+        self.dense4 = nn.Linear(512, 512)
+        self.dense5 = nn.Linear(512, 512)
+        self.dense6 = nn.Linear(512, 1)
+
+    def forward(self, z, x, return_value=False, flags=None):
+        lstm_out, (h_n, _) = self.lstm(z)
+        lstm_out = lstm_out[:,-1,:]
+        x = torch.cat([lstm_out,x], dim=-1)
+        x = self.dense1(x)
+        x = torch.relu(x)
+        x = self.dense2(x)
+        x = torch.relu(x)
+        x = self.dense3(x)
+        x = torch.relu(x)
+        x = self.dense4(x)
+        x = torch.relu(x)
+        x = self.dense5(x)
+        x = torch.relu(x)
+        x = self.dense6(x)
+        if return_value:
+            return dict(values=x)
+        else:
+            if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+                action = torch.randint(x.shape[0], (1,))[0]
+            else:
+                action = torch.argmax(x,dim=0)[0]
+            return dict(action=action)
+
+class FarmerLstmNewModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.lstm = nn.LSTM(162, 128, batch_first=True)
+        self.dense1 = nn.Linear(484 + 128, 512)
+        self.dense2 = nn.Linear(512, 512)
+        self.dense3 = nn.Linear(512, 512)
+        self.dense4 = nn.Linear(512, 512)
+        self.dense5 = nn.Linear(512, 512)
+        self.dense6 = nn.Linear(512, 1)
+
+    def forward(self, z, x, return_value=False, flags=None):
+        lstm_out, (h_n, _) = self.lstm(z)
+        lstm_out = lstm_out[:,-1,:]
+        x = torch.cat([lstm_out,x], dim=-1)
+        x = self.dense1(x)
+        x = torch.relu(x)
+        x = self.dense2(x)
+        x = torch.relu(x)
+        x = self.dense3(x)
+        x = torch.relu(x)
+        x = self.dense4(x)
+        x = torch.relu(x)
+        x = self.dense5(x)
+        x = torch.relu(x)
+        x = self.dense6(x)
+        if return_value:
+            return dict(values=x)
+        else:
+            if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+                action = torch.randint(x.shape[0], (1,))[0]
+            else:
+                action = torch.argmax(x,dim=0)[0]
+            return dict(action=action)
+
+class GeneralModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # input: B * 32 * 57
+        # self.lstm = nn.LSTM(162, 512, batch_first=True)
+        self.conv_z_1 = torch.nn.Sequential(
+            nn.Conv2d(1, 64, kernel_size=(1,57)),  # B * 1 * 64 * 32
+            nn.ReLU(inplace=True),
+            nn.BatchNorm2d(64),
+        )
+        # Squeeze(-1) B * 64 * 16
+        self.conv_z_2 = torch.nn.Sequential(
+            nn.Conv1d(64, 128, kernel_size=(5,), padding=2),  # 128 * 16
+            nn.ReLU(inplace=True),
+            nn.BatchNorm1d(128),
+        )
+        self.conv_z_3 = torch.nn.Sequential(
+            nn.Conv1d(128, 256, kernel_size=(3,), padding=1), # 256 * 8
+            nn.ReLU(inplace=True),
+            nn.BatchNorm1d(256),
+
+        )
+        self.conv_z_4 = torch.nn.Sequential(
+            nn.Conv1d(256, 512, kernel_size=(3,), padding=1), # 512 * 4
+            nn.ReLU(inplace=True),
+            nn.BatchNorm1d(512),
+
+        )
+
+        self.dense1 = nn.Linear(519 + 1024, 1024)
+        self.dense2 = nn.Linear(1024, 512)
+        self.dense3 = nn.Linear(512, 512)
+        self.dense4 = nn.Linear(512, 512)
+        self.dense5 = nn.Linear(512, 512)
+        self.dense6 = nn.Linear(512, 1)
+
+    def forward(self, z, x, return_value=False, flags=None, debug=False):
+        z = z.unsqueeze(1)
+        z = self.conv_z_1(z)
+        z = z.squeeze(-1)
+        z = torch.max_pool1d(z, 2)
+        z = self.conv_z_2(z)
+        z = torch.max_pool1d(z, 2)
+        z = self.conv_z_3(z)
+        z = torch.max_pool1d(z, 2)
+        z = self.conv_z_4(z)
+        z = torch.max_pool1d(z, 2)
+        z = z.flatten(1,2)
+        x = torch.cat([z,x], dim=-1)
+        x = self.dense1(x)
+        x = torch.relu(x)
+        x = self.dense2(x)
+        x = torch.relu(x)
+        x = self.dense3(x)
+        x = torch.relu(x)
+        x = self.dense4(x)
+        x = torch.relu(x)
+        x = self.dense5(x)
+        x = torch.relu(x)
+        x = self.dense6(x)
+        if return_value:
+            return dict(values=x)
+        else:
+            if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+                action = torch.randint(x.shape[0], (1,))[0]
+            else:
+                action = torch.argmax(x,dim=0)[0]
+            return dict(action=action, max_value=torch.max(x))
+
+
+# 用于ResNet18和34的残差块，用的是2个3x3的卷积
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=(3,),
+                               stride=(stride,), padding=1, bias=False)
+        self.bn1 = nn.BatchNorm1d(planes)
+        self.conv2 = nn.Conv1d(planes, planes, kernel_size=(3,),
+                               stride=(1,), padding=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(planes)
+        self.shortcut = nn.Sequential()
+        # 经过处理后的x要与x的维度相同(尺寸和深度)
+        # 如果不相同，需要添加卷积+BN来变换为同一维度
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv1d(in_planes, self.expansion * planes,
+                          kernel_size=(1,), stride=(stride,), bias=False),
+                nn.BatchNorm1d(self.expansion * planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResnetModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.in_planes = 80
+        #input 1*54*41
+        self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
+                               stride=(2,), padding=1, bias=False) #1*27*80
+
+        self.bn1 = nn.BatchNorm1d(80)
+
+        self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
+        self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
+        self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
+        # self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
+        self.linear2 = nn.Linear(1024, 512)
+        self.linear3 = nn.Linear(512, 256)
+        self.linear4 = nn.Linear(256, 1)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, z, x, return_value=False, flags=None, debug=False):
+        out = F.relu(self.bn1(self.conv1(z)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = out.flatten(1,2)
+        out = torch.cat([x,x,x,x,out], dim=-1)
+        out = F.leaky_relu_(self.linear1(out))
+        out = F.leaky_relu_(self.linear2(out))
+        out = F.leaky_relu_(self.linear3(out))
+        out = F.leaky_relu_(self.linear4(out))
+        if return_value:
+            return dict(values=out)
+        else:
+            if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+                action = torch.randint(out.shape[0], (1,))[0]
+            else:
+                action = torch.argmax(out,dim=0)[0]
+            return dict(action=action, max_value=torch.max(out))
+
+
+
+
+
+class BidModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        self.dense1 = nn.Linear(114, 512)
+        self.dense2 = nn.Linear(512, 512)
+        self.dense3 = nn.Linear(512, 512)
+        self.dense4 = nn.Linear(512, 512)
+        self.dense5 = nn.Linear(512, 512)
+        self.dense6 = nn.Linear(512, 1)
+
+    def forward(self, z, x, return_value=False, flags=None, debug=False):
+        x = self.dense1(x)
+        x = F.leaky_relu(x)
+        # x = F.relu(x)
+        x = self.dense2(x)
+        x = F.leaky_relu(x)
+        # x = F.relu(x)
+        x = self.dense3(x)
+        x = F.leaky_relu(x)
+        # x = F.relu(x)
+        x = self.dense4(x)
+        x = F.leaky_relu(x)
+        # x = F.relu(x)
+        x = self.dense5(x)
+        # x = F.relu(x)
+        x = F.leaky_relu(x)
+        x = self.dense6(x)
+        if return_value:
+            return dict(values=x)
+        else:
+            if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+                action = torch.randint(x.shape[0], (1,))[0]
+            else:
+                action = torch.argmax(x,dim=0)[0]
+            return dict(action=action, max_value=torch.max(x))
+
+
 # Model dict is only used in evaluation but not training
 model_dict = {}
 model_dict['landlord'] = LandlordLstmModel
 model_dict['landlord_up'] = FarmerLstmModel
 model_dict['landlord_down'] = FarmerLstmModel
+model_dict_resnet = {}
+model_dict_resnet['landlord'] = ResnetModel
+model_dict_resnet['landlord_up'] = ResnetModel
+model_dict_resnet['landlord_down'] = ResnetModel
+model_dict_resnet['bidding'] = BidModel
+model_dict_general = {}
+model_dict_general['landlord'] = GeneralModel
+model_dict_general['landlord_up'] = GeneralModel
+model_dict_general['landlord_down'] = GeneralModel
+model_dict_general['bidding'] = BidModel

-class Model:
+
+class General_Model:
    """
    The wrapper for the three models. We also wrap several
    interfaces such as share_memory, eval, etc.
    """
    def __init__(self, device=0):
        self.models = {}
-        self.models['landlord'] = LandlordLstmModel().to(torch.device('cuda:'+str(device)))
-        self.models['landlord_up'] = FarmerLstmModel().to(torch.device('cuda:'+str(device)))
-        self.models['landlord_down'] = FarmerLstmModel().to(torch.device('cuda:'+str(device)))
+        if not device == "cpu":
+            device = 'cuda:' + str(device)
+        # model = GeneralModel().to(torch.device(device))
+        self.models['landlord'] = GeneralModel1().to(torch.device(device))
+        self.models['landlord_up'] = GeneralModel1().to(torch.device(device))
+        self.models['landlord_down'] = GeneralModel1().to(torch.device(device))
+        self.models['bidding'] = BidModel().to(torch.device(device))
+
+    def forward(self, position, z, x, training=False, flags=None, debug=False):
+        model = self.models[position]
+        return model.forward(z, x, training, flags, debug)
+
+    def share_memory(self):
+        self.models['landlord'].share_memory()
+        self.models['landlord_up'].share_memory()
+        self.models['landlord_down'].share_memory()
+        self.models['bidding'].share_memory()
+
+    def eval(self):
+        self.models['landlord'].eval()
+        self.models['landlord_up'].eval()
+        self.models['landlord_down'].eval()
+        self.models['bidding'].eval()
+
+    def parameters(self, position):
+        return self.models[position].parameters()
+
+    def get_model(self, position):
+        return self.models[position]
+
+    def get_models(self):
+        return self.models
+
+class OldModel:
+    """
+    The wrapper for the three models. We also wrap several
+    interfaces such as share_memory, eval, etc.
+    """
+    def __init__(self, device=0):
+        self.models = {}
+        if not device == "cpu":
+            device = 'cuda:' + str(device)
+        self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
+        self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
+        self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))

    def forward(self, position, z, x, training=False, flags=None):
        model = self.models[position]
@ -117,3 +430,44 @@ class Model:

    def get_models(self):
        return self.models
+
+
+class Model:
+    """
+    The wrapper for the three models. We also wrap several
+    interfaces such as share_memory, eval, etc.
+    """
+    def __init__(self, device=0):
+        self.models = {}
+        if not device == "cpu":
+            device = 'cuda:' + str(device)
+        # model = GeneralModel().to(torch.device(device))
+        self.models['landlord'] = ResnetModel().to(torch.device(device))
+        self.models['landlord_up'] = ResnetModel().to(torch.device(device))
+        self.models['landlord_down'] = ResnetModel().to(torch.device(device))
+        self.models['bidding'] = BidModel().to(torch.device(device))
+
+    def forward(self, position, z, x, training=False, flags=None, debug=False):
+        model = self.models[position]
+        return model.forward(z, x, training, flags, debug)
+
+    def share_memory(self):
+        self.models['landlord'].share_memory()
+        self.models['landlord_up'].share_memory()
+        self.models['landlord_down'].share_memory()
+        self.models['bidding'].share_memory()
+
+    def eval(self):
+        self.models['landlord'].eval()
+        self.models['landlord_up'].eval()
+        self.models['landlord_down'].eval()
+        self.models['bidding'].eval()
+
+    def parameters(self, position):
+        return self.models[position].parameters()
+
+    def get_model(self, position):
+        return self.models[position]
+
+    def get_models(self):
+        return self.models
--- a/douzero/env/env.py
+++ b/douzero/env/env.py
@ -181,38 +181,25 @@ class DummyAgent(object):
        """
        self.action = action

-def get_obs(infoset):
-    """
-    This function obtains observations with imperfect information
-    from the infoset. It has three branches since we encode
-    different features for different positions.
-    
-    This function will return dictionary named `obs`. It contains
-    several fields. These fields will be used to train the model.
-    One can play with those features to improve the performance.

-    `position` is a string that can be landlord/landlord_down/landlord_up
-
-    `x_batch` is a batch of features (excluding the hisorical moves).
-    It also encodes the action feature
-
-    `z_batch` is a batch of features with hisorical moves only.
-
-    `legal_actions` is the legal moves
-
-    `x_no_action`: the features (exluding the hitorical moves and
-    the action features). It does not have the batch dim.
-
-    `z`: same as z_batch but not a batch.
-    """
-    if infoset.player_position == 'landlord':
-        return _get_obs_landlord(infoset)
-    elif infoset.player_position == 'landlord_up':
-        return _get_obs_landlord_up(infoset)
-    elif infoset.player_position == 'landlord_down':
-        return _get_obs_landlord_down(infoset)
+def get_obs(infoset, model_type="old"):
+    if model_type == "general":
+        if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
+            raise ValueError('')
+        return _get_obs_general(infoset, infoset.player_position)
+    elif model_type == "resnet":
+        if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
+            raise ValueError('')
+        return _get_obs_resnet(infoset, infoset.player_position)
    else:
-        raise ValueError('')
+        if infoset.player_position == 'landlord':
+            return _get_obs_landlord(infoset)
+        elif infoset.player_position == 'landlord_up':
+            return _get_obs_landlord_up(infoset)
+        elif infoset.player_position == 'landlord_down':
+            return _get_obs_landlord_down(infoset)
+        else:
+            raise ValueError('')

 def _get_one_hot_array(num_left_cards, max_num_cards):
    """
@ -245,29 +232,41 @@ def _cards2array(list_cards):
            jokers[1] = 1
    return np.concatenate((matrix.flatten('F'), jokers))

-def _action_seq_list2array(action_seq_list):
-    """
-    A utility function to encode the historical moves.
-    We encode the historical 15 actions. If there is
-    no 15 actions, we pad the features with 0. Since
-    three moves is a round in DouDizhu, we concatenate
-    the representations for each consecutive three moves.
-    Finally, we obtain a 5x162 matrix, which will be fed
-    into LSTM for encoding.
-    """
-    action_seq_array = np.zeros((len(action_seq_list), 54))
-    for row, list_cards in enumerate(action_seq_list):
-        action_seq_array[row, :] = _cards2array(list_cards)
-    action_seq_array = action_seq_array.reshape(5, 162)
+
+def _action_seq_list2array(action_seq_list, model_type="old"):
+    if model_type == "general":
+        position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2}
+        action_seq_array = np.ones((len(action_seq_list), 57)) * -1  # Default Value -1 for not using area
+        for row, list_cards in enumerate(action_seq_list):
+            if list_cards:
+                action_seq_array[row, :54] = _cards2array(list_cards[1])
+                for pos in position_map:
+                    if list_cards[0] == pos:
+                        action_seq_array[row, 54 + position_map[pos]] = 1
+                    else:
+                        action_seq_array[row, 54 + position_map[pos]] = 0
+    elif model_type == "resnet":
+        action_seq_array = np.ones((len(action_seq_list), 54)) * -1  # Default Value -1 for not using area
+        for row, list_cards in enumerate(action_seq_list):
+            if list_cards:
+                action_seq_array[row, :] = _cards2array(list_cards[1])
+    else:
+        action_seq_array = np.zeros((len(action_seq_list), 54))
+        for row, list_cards in enumerate(action_seq_list):
+            if list_cards:
+                action_seq_array[row, :] = _cards2array(list_cards[1])
+        action_seq_array = action_seq_array.reshape(5, 162)
    return action_seq_array

-def _process_action_seq(sequence, length=15):
+def _process_action_seq(sequence, length=15, new_model=True):
    """
    A utility function encoding historical moves. We
    encode 15 moves. If there is no 15 moves, we pad
    with zeros.
    """
    sequence = sequence[-length:].copy()
+    if new_model:
+        sequence = sequence[::-1]
    if len(sequence) < length:
        empty_sequence = [[] for _ in range(length - len(sequence))]
        empty_sequence.extend(sequence)
@ -353,18 +352,18 @@ def _get_obs_landlord(infoset):
                             landlord_down_num_cards_left,
                             bomb_num))
    z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq))
+    infoset.card_play_action_seq, 15, False), "old")
    z_batch = np.repeat(
        z[np.newaxis, :, :],
        num_legal_actions, axis=0)
    obs = {
-            'position': 'landlord',
-            'x_batch': x_batch.astype(np.float32),
-            'z_batch': z_batch.astype(np.float32),
-            'legal_actions': infoset.legal_actions,
-            'x_no_action': x_no_action.astype(np.int8),
-            'z': z.astype(np.int8),
-          }
+        'position': 'landlord',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
    return obs

 def _get_obs_landlord_up(infoset):
@ -451,18 +450,18 @@ def _get_obs_landlord_up(infoset):
                             teammate_num_cards_left,
                             bomb_num))
    z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq))
+    infoset.card_play_action_seq, 15, False), "old")
    z_batch = np.repeat(
        z[np.newaxis, :, :],
        num_legal_actions, axis=0)
    obs = {
-            'position': 'landlord_up',
-            'x_batch': x_batch.astype(np.float32),
-            'z_batch': z_batch.astype(np.float32),
-            'legal_actions': infoset.legal_actions,
-            'x_no_action': x_no_action.astype(np.int8),
-            'z': z.astype(np.int8),
-          }
+        'position': 'landlord_up',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
    return obs

 def _get_obs_landlord_down(infoset):
@ -555,16 +554,530 @@ def _get_obs_landlord_down(infoset):
                             teammate_num_cards_left,
                             bomb_num))
    z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq))
+    infoset.card_play_action_seq, 15, False), "old")
    z_batch = np.repeat(
        z[np.newaxis, :, :],
        num_legal_actions, axis=0)
    obs = {
-            'position': 'landlord_down',
-            'x_batch': x_batch.astype(np.float32),
-            'z_batch': z_batch.astype(np.float32),
-            'legal_actions': infoset.legal_actions,
-            'x_no_action': x_no_action.astype(np.int8),
-            'z': z.astype(np.int8),
-          }
+        'position': 'landlord_down',
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def _get_obs_resnet(infoset, position):
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_map = {
+        "landlord": [1, 0, 0],
+        "landlord_up": [0, 1, 0],
+        "landlord_down": [0, 0, 1]
+    }
+    position_info = np.array(position_map[position])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_info = np.array(infoset.bid_info).flatten()
+    bid_info_batch = np.repeat(bid_info[np.newaxis, :],
+                               num_legal_actions, axis=0)
+
+    multiply_info = np.array(infoset.multiply_info)
+    multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    three_landlord_cards = _cards2array(infoset.three_landlord_cards)
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    other_handcards_left_list = []
+    for pos in ["landlord", "landlord_up", "landlord_up"]:
+        if pos != position:
+            other_handcards_left_list.extend(infoset.all_handcards[pos])
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+    num_cards_left = np.hstack((
+        landlord_num_cards_left,  # 20
+        landlord_up_num_cards_left,  # 17
+        landlord_down_num_cards_left))
+
+    x_batch = np.hstack((
+        bid_info_batch,  # 12
+        multiply_info_batch))  # 3
+    x_no_action = np.hstack((
+        bid_info,
+        multiply_info))
+    z =np.vstack((
+        num_cards_left,
+        my_handcards,  # 54
+        other_handcards,  # 54
+        three_landlord_cards,  # 54
+        landlord_played_cards,  # 54
+        landlord_up_played_cards,  # 54
+        landlord_down_played_cards,  # 54
+        _action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32), model_type="resnet")
+    ))
+
+    _z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    my_action_batch = my_action_batch[:,np.newaxis,:]
+    z_batch = np.zeros([len(_z_batch),40,54],int)
+    for i in range(0,len(_z_batch)):
+        z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
+    obs = {
+        'position': position,
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+def _get_obs_general(infoset, position):
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_map = {
+        "landlord": [1, 0, 0],
+        "landlord_up": [0, 1, 0],
+        "landlord_down": [0, 0, 1]
+    }
+    position_info = np.array(position_map[position])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_info = np.array(infoset.bid_info).flatten()
+    bid_info_batch = np.repeat(bid_info[np.newaxis, :],
+                               num_legal_actions, axis=0)
+
+    multiply_info = np.array(infoset.multiply_info)
+    multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    three_landlord_cards = _cards2array(infoset.three_landlord_cards)
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_up'], 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    other_handcards_left_list = []
+    for pos in ["landlord", "landlord_up", "landlord_up"]:
+        if pos != position:
+            other_handcards_left_list.extend(infoset.all_handcards[pos])
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array(
+        infoset.played_cards['landlord_up'])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((position_info_batch,  # 3
+                         my_handcards_batch,  # 54
+                         other_handcards_batch,  # 54
+                         three_landlord_cards_batch,  # 54
+                         last_action_batch,  # 54
+                         landlord_played_cards_batch,  # 54
+                         landlord_up_played_cards_batch,  # 54
+                         landlord_down_played_cards_batch,  # 54
+                         landlord_num_cards_left_batch,  # 20
+                         landlord_up_num_cards_left_batch,  # 17
+                         landlord_down_num_cards_left_batch,  # 17
+                         bomb_num_batch,  # 15
+                         bid_info_batch,  # 12
+                         multiply_info_batch, # 3
+                         my_action_batch))  # 54
+    x_no_action = np.hstack((position_info,
+                             my_handcards,
+                             other_handcards,
+                             three_landlord_cards,
+                             last_action,
+                             landlord_played_cards,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_num_cards_left,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num,
+                             bid_info,
+                             multiply_info))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 32), "general")
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': position,
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': infoset.legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+    }
+    return obs
+
+
+def gen_bid_legal_actions(player_id, bid_info):
+    self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]]
+    curr_round = -1
+    for r in range(4):
+        if -1 in self_bid_info[r]:
+            curr_round = r
+            break
+    bid_actions = []
+    if curr_round != -1:
+        self_bid_info[curr_round] = [0, 0, 0]
+        bid_actions.append(np.array(self_bid_info).flatten())
+        self_bid_info[curr_round] = [0, 1, 0]
+        bid_actions.append(np.array(self_bid_info).flatten())
+    return np.array(bid_actions)
+
+
+def _get_obs_for_bid(player_id, bid_info, hand_cards):
+    all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+                 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+                 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
+    num_legal_actions = 2
+    my_handcards = _cards2array(hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+    other_cards = []
+    other_cards.extend(all_cards)
+    for card in hand_cards:
+        other_cards.remove(card)
+    other_handcards = _cards2array(other_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_info = np.array([0, 0, 0])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_legal_actions = gen_bid_legal_actions(player_id, bid_info)
+    bid_info = bid_legal_actions[0]
+    bid_info_batch = bid_legal_actions
+
+    multiply_info = np.array([0, 0, 0])
+    multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    three_landlord_cards = _cards2array([])
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array([])
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j in range(2):
+        my_action_batch[j, :] = _cards2array([])
+
+    landlord_num_cards_left = _get_one_hot_array(0, 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array([])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array([])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array([])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(0)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((position_info_batch,
+                         my_handcards_batch,
+                         other_handcards_batch,
+                         three_landlord_cards_batch,
+                         last_action_batch,
+                         landlord_played_cards_batch,
+                         landlord_up_played_cards_batch,
+                         landlord_down_played_cards_batch,
+                         landlord_num_cards_left_batch,
+                         landlord_up_num_cards_left_batch,
+                         landlord_down_num_cards_left_batch,
+                         bomb_num_batch,
+                         bid_info_batch,
+                         multiply_info_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((position_info,
+                             my_handcards,
+                             other_handcards,
+                             three_landlord_cards,
+                             last_action,
+                             landlord_played_cards,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_num_cards_left,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq([], 32))
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': "",
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': bid_legal_actions,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+        "bid_info_batch": bid_info_batch.astype(np.int8),
+        "multiply_info": multiply_info.astype(np.int8)
+    }
+    return obs
+
+
+def _get_obs_for_multiply(position, bid_info, hand_cards, landlord_cards):
+    all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+                 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+                 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
+    num_legal_actions = 3
+    my_handcards = _cards2array(hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+    other_cards = []
+    other_cards.extend(all_cards)
+    for card in hand_cards:
+        other_cards.remove(card)
+    other_handcards = _cards2array(other_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    position_map = {
+        "landlord": [1, 0, 0],
+        "landlord_up": [0, 1, 0],
+        "landlord_down": [0, 0, 1]
+    }
+    position_info = np.array(position_map[position])
+    position_info_batch = np.repeat(position_info[np.newaxis, :],
+                                    num_legal_actions, axis=0)
+
+    bid_info = np.array(bid_info).flatten()
+    bid_info_batch = np.repeat(bid_info[np.newaxis, :],
+                               num_legal_actions, axis=0)
+
+    multiply_info = np.array([0, 0, 0])
+    multiply_info_batch = np.array([[1, 0, 0],
+                                    [0, 1, 0],
+                                    [0, 0, 1]])
+
+    three_landlord_cards = _cards2array(landlord_cards)
+    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+                                           num_legal_actions, axis=0)
+
+    last_action = _cards2array([])
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j in range(num_legal_actions):
+        my_action_batch[j, :] = _cards2array([])
+
+    landlord_num_cards_left = _get_one_hot_array(0, 20)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_up_num_cards_left_batch = np.repeat(
+        landlord_up_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_num_cards_left = _get_one_hot_array(0, 17)
+    landlord_down_num_cards_left_batch = np.repeat(
+        landlord_down_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array([])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_up_played_cards = _cards2array([])
+    landlord_up_played_cards_batch = np.repeat(
+        landlord_up_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_down_played_cards = _cards2array([])
+    landlord_down_played_cards_batch = np.repeat(
+        landlord_down_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(0)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((position_info_batch,
+                         my_handcards_batch,
+                         other_handcards_batch,
+                         three_landlord_cards_batch,
+                         last_action_batch,
+                         landlord_played_cards_batch,
+                         landlord_up_played_cards_batch,
+                         landlord_down_played_cards_batch,
+                         landlord_num_cards_left_batch,
+                         landlord_up_num_cards_left_batch,
+                         landlord_down_num_cards_left_batch,
+                         bomb_num_batch,
+                         bid_info_batch,
+                         multiply_info_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((position_info,
+                             my_handcards,
+                             other_handcards,
+                             three_landlord_cards,
+                             last_action,
+                             landlord_played_cards,
+                             landlord_up_played_cards,
+                             landlord_down_played_cards,
+                             landlord_num_cards_left,
+                             landlord_up_num_cards_left,
+                             landlord_down_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq([], 32))
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+        'position': "",
+        'x_batch': x_batch.astype(np.float32),
+        'z_batch': z_batch.astype(np.float32),
+        'legal_actions': multiply_info_batch,
+        'x_no_action': x_no_action.astype(np.int8),
+        'z': z.astype(np.int8),
+        "bid_info": bid_info.astype(np.int8),
+        "multiply_info_batch": multiply_info.astype(np.int8)
+    }
    return obs
--- a/douzero/env/game.py
+++ b/douzero/env/game.py
@ -401,13 +401,13 @@ class InfoSet(object):
        self.player_position = player_position
        # The hand cands of the current player. A list.
        self.player_hand_cards = None
-        # The number of cards left for each player. It is a dict with str-->int 
+        # The number of cards left for each player. It is a dict with str-->int
        self.num_cards_left_dict = None
        # The three landload cards. A list.
        self.three_landlord_cards = None
        # The historical moves. It is a list of list
        self.card_play_action_seq = None
-        # The union of the hand cards of the other two players for the current player 
+        # The union of the hand cards of the other two players for the current player
        self.other_hand_cards = None
        # The legal actions for the current move. It is a list of list
        self.legal_actions = None
@ -419,9 +419,18 @@ class InfoSet(object):
        self.last_move_dict = None
        # The played cands so far. It is a list.
        self.played_cards = None
-        # The hand cards of all the players. It is a dict. 
+        # The hand cards of all the players. It is a dict.
        self.all_handcards = None
        # Last player position that plays a valid move, i.e., not `pass`
        self.last_pid = None
        # The number of bombs played so far
        self.bomb_num = None
+
+        self.bid_info = [[-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1],
+                         [-1, -1, -1]]
+
+        self.multiply_info = [1, 0, 0]
+
+        self.player_id = None
--- a/douzero/evaluation/deep_agent.py
+++ b/douzero/evaluation/deep_agent.py
@ -3,9 +3,15 @@ import numpy as np

 from douzero.env.env import get_obs

-def _load_model(position, model_path):
-    from douzero.dmc.models import model_dict
-    model = model_dict[position]()
+def _load_model(position, model_path, model_type):
+    from douzero.dmc.models import model_dict, model_dict_resnet, model_dict_general
+    print(position, "loads", model_type, "model: ", model_path)
+    if model_type == "general":
+        model = model_dict_general[position]()
+    elif model_type == "resnet":
+        model = model_dict_resnet[position]()
+    else:
+        model = model_dict[position]()
    model_state_dict = model.state_dict()
    if torch.cuda.is_available():
        pretrained = torch.load(model_path, map_location='cuda:0')
@ -22,14 +28,19 @@ def _load_model(position, model_path):
 class DeepAgent:

    def __init__(self, position, model_path):
-        self.model = _load_model(position, model_path)
+        self.model_type = "old"
+        if "general" in model_path:
+            self.model_type = "general"
+        elif "resnet" in model_path:
+            self.model_type = "resnet"
+        self.model = _load_model(position, model_path, self.model_type)

    def act(self, infoset):
        # 只有一个合法动作时直接返回，这样会得不到胜率信息
        # if len(infoset.legal_actions) == 1:
        #     return infoset.legal_actions[0], 0

-        obs = get_obs(infoset)
+        obs = get_obs(infoset, model_type=self.model_type)
        z_batch = torch.from_numpy(obs['z_batch']).float()
        x_batch = torch.from_numpy(obs['x_batch']).float()
        if torch.cuda.is_available():
--- a/main.py
+++ b/main.py
@ -21,6 +21,7 @@ from MainWindow import Ui_Form

 from douzero.env.game import GameEnv
 from douzero.evaluation.deep_agent import DeepAgent
+import traceback

 import BidModel
 import LandlordModel
@ -83,7 +84,7 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
        self.shouldExit = 0  # 通知上一轮记牌结束
        self.canRecord = threading.Lock()  # 开始记牌
        self.card_play_model_path_dict = {
-            'landlord': "baselines/douzero_ADP/landlord.ckpt",
+            'landlord': "baselines/resnet/resnet_landlord_1613536300.ckpt",
            'landlord_up': "baselines/douzero_ADP/landlord_up.ckpt",
            'landlord_down': "baselines/douzero_ADP/landlord_down.ckpt"
        }
@ -199,9 +200,13 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
        ai_players[1] = DeepAgent(self.user_position, self.card_play_model_path_dict[self.user_position])

        self.env = GameEnv(ai_players)
+
        try:
            self.start()
-        except:
+        except Exception as e:
+            exc_type, exc_obj, exc_tb = sys.exc_info()
+            print(e)
+            traceback.print_tb(exc_tb)
            self.stop()

    def sleep(self, ms):
@ -225,12 +230,16 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
                self.WinRate.setText("评分：" + action_message["win_rate"])
                print("\n手牌：", str(''.join(
                    [EnvCard2RealCard[c] for c in self.env.info_sets[self.user_position].player_hand_cards])))
-                print("出牌：", action_message["action"] if action_message["action"] else "不出", "， 胜率：",
+                print("出牌：", action_message["action"] if action_message["action"] else "不出", "，得分：",
                      action_message["win_rate"])
                if action_message["action"] == "":
                    helper.ClickOnImage("pass_btn", region=self.PassBtnPos)
                else:
-                    helper.SelectCards(action_message["action"])
+                    hand_cards_str = ''.join([EnvCard2RealCard[c] for c in self.env.info_sets[self.user_position].player_hand_cards])
+                    if len(hand_cards_str) == 0 and len(action_message["action"]) == 1:
+                        helper.SelectCards(action_message["action"], True)
+                    else:
+                        helper.SelectCards(action_message["action"])
                    tryCount = 20
                    result = helper.LocateOnScreen("play_card", region=self.PassBtnPos, confidence=0.85)
                    while result is None and tryCount > 0:
@ -248,14 +257,14 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
            elif self.play_order == 1:
                self.RPlayedCard.setText("...")
                pass_flag = helper.LocateOnScreen('pass',
-                                                       region=self.RPlayedCardsPos,
-                                                       confidence=self.PassConfidence)
+                                                  region=self.RPlayedCardsPos,
+                                                  confidence=self.PassConfidence)
                self.detect_start_btn()
                while self.RunGame and self.have_white(self.RPlayedCardsPos) == 0 and pass_flag is None:
                    print("等待下家出牌")
                    self.sleep(100)
                    pass_flag = helper.LocateOnScreen('pass', region=self.RPlayedCardsPos,
-                                                           confidence=self.PassConfidence)
+                                                      confidence=self.PassConfidence)
                    self.detect_start_btn()
                self.sleep(200)
                # 未找到"不出"
@ -281,13 +290,13 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
                self.LPlayedCard.setText("...")
                self.detect_start_btn()
                pass_flag = helper.LocateOnScreen('pass', region=self.LPlayedCardsPos,
-                                                       confidence=self.PassConfidence)
+                                                  confidence=self.PassConfidence)
                while self.RunGame and self.have_white(self.LPlayedCardsPos) == 0 and pass_flag is None:
                    print("等待上家出牌")
                    self.detect_start_btn()
                    self.sleep(100)
                    pass_flag = helper.LocateOnScreen('pass', region=self.LPlayedCardsPos,
-                                                           confidence=self.PassConfidence)
+                                                      confidence=self.PassConfidence)
                self.sleep(200)
                # 不出
                # 未找到"不出"
@ -319,7 +328,7 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
    def find_landlord(self, landlord_flag_pos):
        for pos in landlord_flag_pos:
            result = helper.LocateOnScreen("landlord_words", region=pos,
-                                                confidence=self.LandlordFlagConfidence)
+                                           confidence=self.LandlordFlagConfidence)
            if result is not None:
                return landlord_flag_pos.index(pos)
        return None