From aab93d66c67205309f162005e6736f58672a48a7 Mon Sep 17 00:00:00 2001
From: zhiyang7 <zhiyang7@iflytek.com>
Date: Sun, 5 Dec 2021 12:03:30 +0800
Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E9=80=A0=E4=B8=BA4=E4=BA=BA=E6=96=97?=
 =?UTF-8?q?=E5=9C=B0=E4=B8=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 douzero/dmc/dmc.py               |  27 +-
 douzero/dmc/models.py            |  80 ++---
 douzero/dmc/utils.py             |  38 ++-
 douzero/env/env.py               | 487 ++++++++++++++++++++++---------
 douzero/env/game.py              | 199 ++++++++-----
 douzero/env/move_generator.py    |  24 +-
 douzero/env/utils.py             |  15 +-
 douzero/evaluation/simulation.py |  56 ++--
 evaluate.py                      |   5 +-
 generate_eval_data.py            |  15 +-
 10 files changed, 629 insertions(+), 317 deletions(-)

diff --git a/douzero/dmc/dmc.py b/douzero/dmc/dmc.py
index 55c4a54..175a1e4 100644
--- a/douzero/dmc/dmc.py
+++ b/douzero/dmc/dmc.py
@@ -16,7 +16,7 @@ from .file_writer import FileWriter
 from .models import Model, OldModel
 from .utils import get_batch, log, create_env, create_optimizers, act
 
-mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_down', 'bidding']}
+mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']}
 
 def compute_loss(logits, targets):
     loss = ((logits.squeeze(-1) - targets)**2).mean()
@@ -27,7 +27,7 @@ def compute_loss_for_bid(outputs, reward):
 
 def learn(position, actor_models, model, batch, optimizer, flags, lock):
     """Performs a learning (optimization) step."""
-    position_index = {"landlord": 31, "landlord_up": 32, "landlord_down": 33}
+    position_index = {"landlord": 31, "landlord_up": 32, 'landlord_front': 33, "landlord_down": 34}
     print("Learn", position)
     if flags.training_device != "cpu":
         device = torch.device('cuda:'+str(flags.training_device))
@@ -46,7 +46,8 @@ def learn(position, actor_models, model, batch, optimizer, flags, lock):
     with lock:
         learner_outputs = model(obs_z, obs_x, return_value=True)
         if position == "bidding":
-            pass
+            loss = compute_loss(learner_outputs['values'], target)
+            # pass
         else:
             loss = compute_loss(learner_outputs['values'], target)
         stats = {
@@ -101,7 +102,7 @@ def train(flags):
     # Initialize queues
     actor_processes = []
     ctx = mp.get_context('spawn')
-    batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()}
+    batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), 'landlord_front': ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()}
 
     # Learner model for training
     learner_model = Model(device=flags.training_device)
@@ -115,20 +116,22 @@ def train(flags):
         'loss_landlord',
         'mean_episode_return_landlord_up',
         'loss_landlord_up',
+        'mean_episode_return_landlord_front',
+        'loss_landlord_front',
         'mean_episode_return_landlord_down',
         'loss_landlord_down',
         'mean_episode_return_bidding',
         'loss_bidding',
     ]
     frames, stats = 0, {k: 0 for k in stat_keys}
-    position_frames = {'landlord':0, 'landlord_up':0, 'landlord_down':0, 'bidding': 0}
+    position_frames = {'landlord':0, 'landlord_up':0, 'landlord_front':0, 'landlord_down':0, 'bidding': 0}
 
     # Load models if any
     if flags.load_model and os.path.exists(checkpointpath):
         checkpoint_states = torch.load(
             checkpointpath, map_location=("cuda:"+str(flags.training_device) if flags.training_device != "cpu" else "cpu")
         )
-        for k in ['landlord', 'landlord_up', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down']
+        for k in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down']
             learner_model.get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k])
             optimizers[k].load_state_dict(checkpoint_states["optimizer_state_dict"][k])
             for device in device_iterator:
@@ -176,12 +179,12 @@ def train(flags):
     threads = []
     locks = {}
     for device in device_iterator:
-        locks[device] = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
-    position_locks = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
+        locks[device] = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_front': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
+    position_locks = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_front': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
 
     for device in device_iterator:
         for i in range(flags.num_threads):
-            for position in ['landlord', 'landlord_up', 'landlord_down', 'bidding']:
+            for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']:
                 thread = threading.Thread(
                     target=batch_and_learn, name='batch-and-learn-%d' % i, args=(i,device,position,locks[device][position],position_locks[position]))
                 thread.start()
@@ -202,7 +205,7 @@ def train(flags):
         }, checkpointpath)
 
         # Save the weights for evaluation purpose
-        for position in ['landlord', 'landlord_up', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down']
+        for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
             model_weights_dir = os.path.expandvars(os.path.expanduser(
                 '%s/%s/%s' % (flags.savedir, flags.xpid, "general_"+position+'_'+str(frames)+'.ckpt')))
             torch.save(learner_model.get_model(position).state_dict(), model_weights_dir)
@@ -229,15 +232,17 @@ def train(flags):
             fps_avg = np.mean(fps_log)
 
             position_fps = {k:(position_frames[k]-position_start_frames[k])/(end_time-start_time) for k in position_frames}
-            log.info('After %i (L:%i U:%i D:%i) frames: @ %.1f fps (avg@ %.1f fps) (L:%.1f U:%.1f D:%.1f) Stats:\n%s',
+            log.info('After %i (L:%i U:%i F:%i D:%i) frames: @ %.1f fps (avg@ %.1f fps) (L:%.1f U:%.1f F:%.1f D:%.1f) Stats:\n%s',
                      frames,
                      position_frames['landlord'],
                      position_frames['landlord_up'],
+                     position_frames['landlord_front'],
                      position_frames['landlord_down'],
                      fps,
                      fps_avg,
                      position_fps['landlord'],
                      position_fps['landlord_up'],
+                     position_fps['landlord_front'],
                      position_fps['landlord_down'],
                      pprint.pformat(stats))
 
diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py
index c333115..5a6ec2e 100644
--- a/douzero/dmc/models.py
+++ b/douzero/dmc/models.py
@@ -12,13 +12,13 @@ import torch.nn.functional as F
 class LandlordLstmModel(nn.Module):
     def __init__(self):
         super().__init__()
-        self.lstm = nn.LSTM(162, 128, batch_first=True)
-        self.dense1 = nn.Linear(373 + 128, 512)
-        self.dense2 = nn.Linear(512, 512)
-        self.dense3 = nn.Linear(512, 512)
-        self.dense4 = nn.Linear(512, 512)
-        self.dense5 = nn.Linear(512, 512)
-        self.dense6 = nn.Linear(512, 1)
+        self.lstm = nn.LSTM(432, 128, batch_first=True)
+        self.dense1 = nn.Linear(846 + 128, 1024)
+        self.dense2 = nn.Linear(1024, 1024)
+        self.dense3 = nn.Linear(1024, 768)
+        self.dense4 = nn.Linear(768, 512)
+        self.dense5 = nn.Linear(512, 256)
+        self.dense6 = nn.Linear(256, 1)
 
     def forward(self, z, x, return_value=False, flags=None):
         lstm_out, (h_n, _) = self.lstm(z)
@@ -47,13 +47,13 @@ class LandlordLstmModel(nn.Module):
 class FarmerLstmModel(nn.Module):
     def __init__(self):
         super().__init__()
-        self.lstm = nn.LSTM(162, 128, batch_first=True)
-        self.dense1 = nn.Linear(484 + 128, 512)
-        self.dense2 = nn.Linear(512, 512)
-        self.dense3 = nn.Linear(512, 512)
-        self.dense4 = nn.Linear(512, 512)
-        self.dense5 = nn.Linear(512, 512)
-        self.dense6 = nn.Linear(512, 1)
+        self.lstm = nn.LSTM(432, 128, batch_first=True)
+        self.dense1 = nn.Linear(1178 + 128, 1024)
+        self.dense2 = nn.Linear(1024, 1024)
+        self.dense3 = nn.Linear(1024, 768)
+        self.dense4 = nn.Linear(768, 512)
+        self.dense5 = nn.Linear(512, 256)
+        self.dense6 = nn.Linear(256, 1)
 
     def forward(self, z, x, return_value=False, flags=None):
         lstm_out, (h_n, _) = self.lstm(z)
@@ -82,8 +82,8 @@ class FarmerLstmModel(nn.Module):
 class LandlordLstmNewModel(nn.Module):
     def __init__(self):
         super().__init__()
-        self.lstm = nn.LSTM(162, 128, batch_first=True)
-        self.dense1 = nn.Linear(373 + 128, 512)
+        self.lstm = nn.LSTM(432, 128, batch_first=True)
+        self.dense1 = nn.Linear(846 + 128, 512)
         self.dense2 = nn.Linear(512, 512)
         self.dense3 = nn.Linear(512, 512)
         self.dense4 = nn.Linear(512, 512)
@@ -117,8 +117,8 @@ class LandlordLstmNewModel(nn.Module):
 class FarmerLstmNewModel(nn.Module):
     def __init__(self):
         super().__init__()
-        self.lstm = nn.LSTM(162, 128, batch_first=True)
-        self.dense1 = nn.Linear(484 + 128, 512)
+        self.lstm = nn.LSTM(432, 128, batch_first=True)
+        self.dense1 = nn.Linear(1178 + 128, 512)
         self.dense2 = nn.Linear(512, 512)
         self.dense3 = nn.Linear(512, 512)
         self.dense4 = nn.Linear(512, 512)
@@ -253,20 +253,22 @@ class GeneralModel(nn.Module):
     def __init__(self):
         super().__init__()
         self.in_planes = 80
-        #input 1*54*41
+        #input 1*108*41
         self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
-                               stride=(2,), padding=1, bias=False) #1*27*80
+                               stride=(2,), padding=1, bias=False) #1*54*80
 
         self.bn1 = nn.BatchNorm1d(80)
 
-        self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
-        self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
-        self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
+        self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*27*80
+        self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*14*160
+        self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*7*320
+        self.layer4 = self._make_layer(BasicBlock, 640, 2, stride=2)#1*4*320
         # self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
-        self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
-        self.linear2 = nn.Linear(1024, 512)
-        self.linear3 = nn.Linear(512, 256)
-        self.linear4 = nn.Linear(256, 1)
+        self.linear1 = nn.Linear(640 * BasicBlock.expansion * 4 + 24 * 4, 2048)
+        self.linear2 = nn.Linear(2048, 1024)
+        self.linear3 = nn.Linear(1024, 512)
+        self.linear4 = nn.Linear(512, 256)
+        self.linear5 = nn.Linear(256, 1)
 
     def _make_layer(self, block, planes, num_blocks, stride):
         strides = [stride] + [1] * (num_blocks - 1)
@@ -281,12 +283,14 @@ class GeneralModel(nn.Module):
         out = self.layer1(out)
         out = self.layer2(out)
         out = self.layer3(out)
+        out = self.layer4(out)
         out = out.flatten(1,2)
         out = torch.cat([x,x,x,x,out], dim=-1)
         out = F.leaky_relu_(self.linear1(out))
         out = F.leaky_relu_(self.linear2(out))
         out = F.leaky_relu_(self.linear3(out))
         out = F.leaky_relu_(self.linear4(out))
+        out = F.leaky_relu_(self.linear5(out))
         if return_value:
             return dict(values=out)
         else:
@@ -304,7 +308,7 @@ class BidModel(nn.Module):
     def __init__(self):
         super().__init__()
 
-        self.dense1 = nn.Linear(114, 512)
+        self.dense1 = nn.Linear(208, 512)
         self.dense2 = nn.Linear(512, 512)
         self.dense3 = nn.Linear(512, 512)
         self.dense4 = nn.Linear(512, 512)
@@ -342,15 +346,18 @@ class BidModel(nn.Module):
 model_dict = {}
 model_dict['landlord'] = LandlordLstmModel
 model_dict['landlord_up'] = FarmerLstmModel
+model_dict['landlord_front'] = FarmerLstmModel
 model_dict['landlord_down'] = FarmerLstmModel
 model_dict_new = {}
 model_dict_new['landlord'] = GeneralModel
 model_dict_new['landlord_up'] = GeneralModel
+model_dict_new['landlord_front'] = GeneralModel
 model_dict_new['landlord_down'] = GeneralModel
 model_dict_new['bidding'] = BidModel
 model_dict_lstm = {}
 model_dict_lstm['landlord'] = GeneralModel
 model_dict_lstm['landlord_up'] = GeneralModel
+model_dict_lstm['landlord_front'] = GeneralModel
 model_dict_lstm['landlord_down'] = GeneralModel
 
 class General_Model:
@@ -365,6 +372,7 @@ class General_Model:
         # model = GeneralModel().to(torch.device(device))
         self.models['landlord'] = GeneralModel1().to(torch.device(device))
         self.models['landlord_up'] = GeneralModel1().to(torch.device(device))
+        self.models['landlord_front'] = GeneralModel1().to(torch.device(device))
         self.models['landlord_down'] = GeneralModel1().to(torch.device(device))
         self.models['bidding'] = BidModel().to(torch.device(device))
 
@@ -375,12 +383,14 @@ class General_Model:
     def share_memory(self):
         self.models['landlord'].share_memory()
         self.models['landlord_up'].share_memory()
+        self.models['landlord_front'].share_memory()
         self.models['landlord_down'].share_memory()
         self.models['bidding'].share_memory()
 
     def eval(self):
         self.models['landlord'].eval()
         self.models['landlord_up'].eval()
+        self.models['landlord_front'].eval()
         self.models['landlord_down'].eval()
         self.models['bidding'].eval()
 
@@ -404,6 +414,7 @@ class OldModel:
             device = 'cuda:' + str(device)
         self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
         self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
+        self.models['landlord_front'] = FarmerLstmModel().to(torch.device(device))
         self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
 
     def forward(self, position, z, x, training=False, flags=None):
@@ -413,11 +424,13 @@ class OldModel:
     def share_memory(self):
         self.models['landlord'].share_memory()
         self.models['landlord_up'].share_memory()
+        self.models['landlord_front'].share_memory()
         self.models['landlord_down'].share_memory()
 
     def eval(self):
         self.models['landlord'].eval()
         self.models['landlord_up'].eval()
+        self.models['landlord_front'].eval()
         self.models['landlord_down'].eval()
 
     def parameters(self, position):
@@ -442,6 +455,7 @@ class Model:
         # model = GeneralModel().to(torch.device(device))
         self.models['landlord'] = GeneralModel().to(torch.device(device))
         self.models['landlord_up'] = GeneralModel().to(torch.device(device))
+        self.models['landlord_front'] = GeneralModel().to(torch.device(device))
         self.models['landlord_down'] = GeneralModel().to(torch.device(device))
         self.models['bidding'] = BidModel().to(torch.device(device))
 
@@ -452,12 +466,14 @@ class Model:
     def share_memory(self):
         self.models['landlord'].share_memory()
         self.models['landlord_up'].share_memory()
+        self.models['landlord_front'].share_memory()
         self.models['landlord_down'].share_memory()
         self.models['bidding'].share_memory()
 
     def eval(self):
         self.models['landlord'].eval()
         self.models['landlord_up'].eval()
+        self.models['landlord_front'].eval()
         self.models['landlord_down'].eval()
         self.models['bidding'].eval()
 
@@ -470,11 +486,3 @@ class Model:
     def get_models(self):
         return self.models
 
-
-
-
-
-
-
-
-
diff --git a/douzero/dmc/utils.py b/douzero/dmc/utils.py
index 1ed0c7d..70eced7 100644
--- a/douzero/dmc/utils.py
+++ b/douzero/dmc/utils.py
@@ -16,11 +16,15 @@ from douzero.env import Env
 Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
                11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
 
-NumOnes2Array = {0: np.array([0, 0, 0, 0]),
-                 1: np.array([1, 0, 0, 0]),
-                 2: np.array([1, 1, 0, 0]),
-                 3: np.array([1, 1, 1, 0]),
-                 4: np.array([1, 1, 1, 1])}
+NumOnes2Array = {0: np.array([0, 0, 0, 0, 0, 0, 0, 0]),
+                 1: np.array([1, 0, 0, 0, 0, 0, 0, 0]),
+                 2: np.array([1, 1, 0, 0, 0, 0, 0, 0]),
+                 3: np.array([1, 1, 1, 0, 0, 0, 0, 0]),
+                 4: np.array([1, 1, 1, 1, 0, 0, 0, 0]),
+                 5: np.array([1, 1, 1, 1, 1, 0, 0, 0]),
+                 6: np.array([1, 1, 1, 1, 1, 1, 0, 0]),
+                 7: np.array([1, 1, 1, 1, 1, 1, 1, 0]),
+                 8: np.array([1, 1, 1, 1, 1, 1, 1, 1])}
 
 shandle = logging.StreamHandler()
 shandle.setFormatter(
@@ -60,7 +64,7 @@ def create_optimizers(flags, learner_model):
     """
     Create three optimizers for the three positions
     """
-    positions = ['landlord', 'landlord_up', 'landlord_down', 'bidding']
+    positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']
     optimizers = {}
     for position in positions:
         optimizer = RAdam(
@@ -72,7 +76,7 @@ def create_optimizers(flags, learner_model):
 
 
 def act(i, device, batch_queues, model, flags):
-    positions = ['landlord', 'landlord_up', 'landlord_down', 'bidding']
+    positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']
     for pos in positions:
         model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
     try:
@@ -90,9 +94,9 @@ def act(i, device, batch_queues, model, flags):
         type_buf = {p: [] for p in positions}
         obs_x_batch_buf = {p: [] for p in positions}
 
-        position_index = {"landlord": 31, "landlord_up": 32, "landlord_down": 33}
-        bid_type_index = {"landlord": 41, "landlord_up": 42, "landlord_down": 43}
-        bid_type_map = {41: "landlord", 42: "landlord_up", 43: "landlord_down"}
+        position_index = {"landlord": 31, "landlord_up": 32, "landlord_front": 33, "landlord_down": 34}
+        bid_type_index = {"landlord": 41, "landlord_up": 42, "landlord_front": 43, "landlord_down": 43}
+        bid_type_map = {41: "landlord", 42: "landlord_up", 43: "landlord_front", 44: "landlord_down"}
 
         position, obs, env_output = env.initial(model, device, flags=flags)
         bid_obs_buffer = env_output["begin_buf"]["bid_obs_buffer"]
@@ -149,7 +153,7 @@ def act(i, device, batch_queues, model, flags):
                                     target_buf[p].append(episode_return)
                     break
             for p in positions:
-                if size[p] > T:
+                while size[p] > T:
                     # print(p, "epr", torch.stack([torch.tensor(ndarr, device="cpu") for ndarr in episode_return_buf[p][:T]]),)
                     batch_queues[p].put({
                         "done": torch.stack([torch.tensor(ndarr, device="cpu") for ndarr in done_buf[p][:T]]),
@@ -182,18 +186,22 @@ def _cards2tensor(list_cards):
     See Figure 2 in https://arxiv.org/pdf/2106.06135.pdf
     """
     if len(list_cards) == 0:
-        return torch.zeros(54, dtype=torch.int8)
+        return torch.zeros(108, dtype=torch.int8)
 
-    matrix = np.zeros([4, 13], dtype=np.int8)
-    jokers = np.zeros(2, dtype=np.int8)
+    matrix = np.zeros([8, 13], dtype=np.int8)
+    jokers = np.zeros(4, dtype=np.int8)
     counter = Counter(list_cards)
     for card, num_times in counter.items():
         if card < 20:
             matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
         elif card == 20:
             jokers[0] = 1
+            if num_times == 2:
+                jokers[1] = 1
         elif card == 30:
-            jokers[1] = 1
+            jokers[2] = 1
+            if num_times == 2:
+                jokers[3] = 1
     matrix = np.concatenate((matrix.flatten('F'), jokers))
     matrix = torch.from_numpy(matrix)
     return matrix
diff --git a/douzero/env/env.py b/douzero/env/env.py
index 75ab10f..91947e2 100644
--- a/douzero/env/env.py
+++ b/douzero/env/env.py
@@ -11,17 +11,22 @@ env_url = "http://od.vcccz.com/hechuan/env.py"
 Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
                11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
 
-NumOnes2Array = {0: np.array([0, 0, 0, 0]),
-                 1: np.array([1, 0, 0, 0]),
-                 2: np.array([1, 1, 0, 0]),
-                 3: np.array([1, 1, 1, 0]),
-                 4: np.array([1, 1, 1, 1])}
+NumOnes2Array = {0: np.array([0, 0, 0, 0, 0, 0, 0, 0]),
+                 1: np.array([1, 0, 0, 0, 0, 0, 0, 0]),
+                 2: np.array([1, 1, 0, 0, 0, 0, 0, 0]),
+                 3: np.array([1, 1, 1, 0, 0, 0, 0, 0]),
+                 4: np.array([1, 1, 1, 1, 0, 0, 0, 0]),
+                 5: np.array([1, 1, 1, 1, 1, 0, 0, 0]),
+                 6: np.array([1, 1, 1, 1, 1, 1, 0, 0]),
+                 7: np.array([1, 1, 1, 1, 1, 1, 1, 0]),
+                 8: np.array([1, 1, 1, 1, 1, 1, 1, 1])}
+
 
 deck = []
 for i in range(3, 15):
-    deck.extend([i for _ in range(4)])
-deck.extend([17 for _ in range(4)])
-deck.extend([20, 30])
+    deck.extend([i for _ in range(8)])
+deck.extend([17 for _ in range(8)])
+deck.extend([20, 20, 30, 30])
 
 
 class Env:
@@ -46,7 +51,7 @@ class Env:
         # Initialize players
         # We use three dummy player for the target position
         self.players = {}
-        for position in ['landlord', 'landlord_up', 'landlord_down']:
+        for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
             self.players[position] = DummyAgent(position)
 
         # Initialize the internal environment
@@ -67,10 +72,11 @@ class Env:
         if model is None:
             _deck = deck.copy()
             np.random.shuffle(_deck)
-            card_play_data = {'landlord': _deck[:20],
-                              'landlord_up': _deck[20:37],
-                              'landlord_down': _deck[37:54],
-                              'three_landlord_cards': _deck[17:20],
+            card_play_data = {'landlord': _deck[:33],
+                              'landlord_up': _deck[33:58],
+                              'landlord_front': _deck[58:83],
+                              'landlord_down': _deck[83:108],
+                            #   'three_landlord_cards': _deck[17:20],
                               }
             for key in card_play_data:
                 card_play_data[key].sort()
@@ -97,18 +103,20 @@ class Env:
                 _deck = deck.copy()
                 np.random.shuffle(_deck)
                 card_play_data = [
-                    _deck[:17],
-                    _deck[17:34],
-                    _deck[34:51],
+                    _deck[:25],
+                    _deck[25:50],
+                    _deck[50:75],
+                    _deck[75:100],
                 ]
-                for i in range(3):
+                for i in range(4):
                     card_play_data[i].sort()
-                landlord_cards = _deck[51:54]
+                landlord_cards = _deck[100:108]
                 landlord_cards.sort()
-                bid_info = np.array([[-1, -1, -1],
-                                     [-1, -1, -1],
-                                     [-1, -1, -1],
-                                     [-1, -1, -1]])
+                bid_info = np.array([[-1, -1, -1, -1],
+                                     [-1, -1, -1, -1],
+                                     [-1, -1, -1, -1],
+                                     [-1, -1, -1, -1],
+                                     [-1, -1, -1, -1]])
                 bidding_player = random.randint(0, 2)
                 # bidding_player = 0 # debug
                 first_bid = -1
@@ -116,7 +124,7 @@ class Env:
                 bid_count = 0
                 if bid_limit <= 0:
                     force_bid = True
-                for r in range(3):
+                for r in range(4):
                     bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
                     with torch.no_grad():
                         action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
@@ -137,19 +145,19 @@ class Env:
                         bid_count += 1
                         if first_bid == -1:
                             first_bid = bidding_player
-                        for p in range(3):
+                        for p in range(4):
                             if p == bidding_player:
                                 bid_info[r][p] = 1
                             else:
                                 bid_info[r][p] = 0
                     else:
-                        bid_info[r] = [0, 0, 0]
-                    bidding_player = (bidding_player + 1) % 3
+                        bid_info[r] = [0, 0, 0, 0]
+                    bidding_player = (bidding_player + 1) % 4
                 one_count = np.count_nonzero(bid_info == 1)
                 if one_count == 0:
                     continue
                 elif one_count > 1:
-                    r = 3
+                    r = 4
                     bidding_player = first_bid
                     bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
                     with torch.no_grad():
@@ -163,7 +171,7 @@ class Env:
                     if action["action"] == 1:
                         last_bid = bidding_player
                         bid_count += 1
-                        for p in range(3):
+                        for p in range(4):
                             if p == bidding_player:
                                 bid_info[r][p] = 1
                             else:
@@ -171,20 +179,23 @@ class Env:
                 break
             card_play_data[last_bid].extend(landlord_cards)
             card_play_data = {'landlord': card_play_data[last_bid],
-                              'landlord_up': card_play_data[(last_bid - 1) % 3],
-                              'landlord_down': card_play_data[(last_bid + 1) % 3],
-                              'three_landlord_cards': landlord_cards,
+                              'landlord_up': card_play_data[(last_bid - 1) % 4],
+                              'landlord_down': card_play_data[(last_bid + 1) % 4],
+                              'landlord_front': card_play_data[(last_bid + 2) % 4],
+                            #   'three_landlord_cards': landlord_cards,
                               }
             card_play_data["landlord"].sort()
             player_ids = {
                 'landlord': last_bid,
-                'landlord_up': (last_bid - 1) % 3,
-                'landlord_down': (last_bid + 1) % 3,
+                'landlord_up': (last_bid - 1) % 4,
+                'landlord_down': (last_bid + 1) % 4,
+                'landlord_front': (last_bid + 2) % 4,
             }
             player_positions = {
                 last_bid: 'landlord',
-                (last_bid - 1) % 3: 'landlord_up',
-                (last_bid + 1) % 3: 'landlord_down'
+                (last_bid - 1) % 4: 'landlord_up',
+                (last_bid + 1) % 4: 'landlord_down',
+                (last_bid + 2) % 4: 'landlord_front',
             }
             for bid_obs in bid_obs_buffer:
                 bid_obs.update({"position": player_positions[bid_obs["pid"]]})
@@ -192,14 +203,15 @@ class Env:
             # Initialize the cards
             self._env.card_play_init(card_play_data)
             multiply_map = [
-                np.array([1, 0, 0]),
-                np.array([0, 1, 0]),
-                np.array([0, 0, 1])
+                np.array([1, 0, 0, 0]),
+                np.array([0, 1, 0, 0]),
+                np.array([0, 0, 1, 0]),
+                np.array([0, 0, 0, 1])
             ]
-            for pos in ["landlord", "landlord_up", "landlord_down"]:
+            for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
                 pid = player_ids[pos]
                 self._env.info_sets[pos].player_id = pid
-                self._env.info_sets[pos].bid_info = bid_info[:, [(pid - 1) % 3, pid, (pid + 1) % 3]]
+                self._env.info_sets[pos].bid_info = bid_info[:, [(pid - 1) % 4, pid, (pid + 1) % 4, (pid + 2) % 4]]
                 self._env.bid_count = bid_count
                 # multiply_obs = _get_obs_for_multiply(pos, self._env.info_sets[pos].bid_info, card_play_data[pos],
                 #                                      landlord_cards)
@@ -245,11 +257,13 @@ class Env:
                 "play": {
                     "landlord": self._get_reward("landlord"),
                     "landlord_up": self._get_reward("landlord_up"),
+                    "landlord_front": self._get_reward("landlord_front"),
                     "landlord_down": self._get_reward("landlord_down")
                 },
                 "bid": {
-                    "landlord": self._get_reward_bidding("landlord")*2,
+                    "landlord": self._get_reward_bidding("landlord")*3,
                     "landlord_up": self._get_reward_bidding("landlord_up"),
+                    "landlord_front": self._get_reward_bidding("landlord_front"),
                     "landlord_down": self._get_reward_bidding("landlord_down")
                 }
             }
@@ -269,16 +283,19 @@ class Env:
         self_bomb_num = self._env.pos_bomb_num[pos]
         if winner == 'landlord':
             if self.objective == 'adp':
-                return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8
+                return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] + self._env.multiply_count[pos]) /8
+                return (2.0 ** bomb_num[0]) * (3.0 ** bomb_num[1])
             elif self.objective == 'logadp':
                 return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
+                return bomb_num[0] + bomb_num[1] + 1.0
             else:
                 return 1.0 - self._env.step_count * 0.0033
+                return 1.0
         else:
             if self.objective == 'adp':
-                return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8
+                return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] +self._env.multiply_count[pos]) /8
             elif self.objective == 'logadp':
-                return (-1.0 + self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
+                return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4
             else:
                 return -1.0 + self._env.step_count * 0.0033
 
@@ -371,12 +388,12 @@ def get_obs(infoset, use_general=True):
     This function obtains observations with imperfect information
     from the infoset. It has three branches since we encode
     different features for different positions.
-
+    
     This function will return dictionary named `obs`. It contains
     several fields. These fields will be used to train the model.
     One can play with those features to improve the performance.
 
-    `position` is a string that can be landlord/landlord_down/landlord_up
+    `position` is a string that can be landlord/landlord_down/landlord_front/landlord_up
 
     `x_batch` is a batch of features (excluding the hisorical moves).
     It also encodes the action feature
@@ -391,7 +408,7 @@ def get_obs(infoset, use_general=True):
     `z`: same as z_batch but not a batch.
     """
     if use_general:
-        if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
+        if infoset.player_position not in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
             raise ValueError('')
         return _get_obs_general(infoset, infoset.player_position)
     else:
@@ -399,6 +416,8 @@ def get_obs(infoset, use_general=True):
             return _get_obs_landlord(infoset)
         elif infoset.player_position == 'landlord_up':
             return _get_obs_landlord_up(infoset)
+        elif infoset.player_position == 'landlord_front':
+            return _get_obs_landlord_front(infoset)
         elif infoset.player_position == 'landlord_down':
             return _get_obs_landlord_down(infoset)
         else:
@@ -424,18 +443,22 @@ def _cards2array(list_cards):
     the representations.
     """
     if len(list_cards) == 0:
-        return np.zeros(54, dtype=np.int8)
+        return np.zeros(108, dtype=np.int8)
 
-    matrix = np.zeros([4, 13], dtype=np.int8)
-    jokers = np.zeros(2, dtype=np.int8)
+    matrix = np.zeros([8, 13], dtype=np.int8)
+    jokers = np.zeros(4, dtype=np.int8)
     counter = Counter(list_cards)
     for card, num_times in counter.items():
         if card < 20:
             matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
         elif card == 20:
             jokers[0] = 1
+            if num_times == 2:
+                jokers[1] = 1
         elif card == 30:
-            jokers[1] = 1
+            jokers[2] = 1
+            if num_times == 2:
+                jokers[3] = 1
     return np.concatenate((matrix.flatten('F'), jokers))
 
 
@@ -449,7 +472,7 @@ def _cards2array(list_cards):
 #     Finally, we obtain a 5x162 matrix, which will be fed
 #     into LSTM for encoding.
 #     """
-#     action_seq_array = np.zeros((len(action_seq_list), 54))
+#     action_seq_array = np.zeros((len(action_seq_list), 108))
 #     for row, list_cards in enumerate(action_seq_list):
 #         action_seq_array[row, :] = _cards2array(list_cards)
 #     # action_seq_array = action_seq_array.reshape(5, 162)
@@ -458,26 +481,26 @@ def _cards2array(list_cards):
 def _action_seq_list2array(action_seq_list, new_model=True):
     """
     A utility function to encode the historical moves.
-    We encode the historical 15 actions. If there is
-    no 15 actions, we pad the features with 0. Since
+    We encode the historical 20 actions. If there is
+    no 20 actions, we pad the features with 0. Since
     three moves is a round in DouDizhu, we concatenate
     the representations for each consecutive three moves.
-    Finally, we obtain a 5x162 matrix, which will be fed
+    Finally, we obtain a 5x432 matrix, which will be fed
     into LSTM for encoding.
     """
 
     if new_model:
-        position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2}
-        action_seq_array = np.ones((len(action_seq_list), 54)) * -1  # Default Value -1 for not using area
+        # position_map = {"landlord": 0, "landlord_up": 1, "landlord_front": 2, "landlord_down": 3}
+        action_seq_array = np.ones((len(action_seq_list), 108)) * -1  # Default Value -1 for not using area
         for row, list_cards in enumerate(action_seq_list):
             if list_cards != []:
-                action_seq_array[row, :54] = _cards2array(list_cards[1])
+                action_seq_array[row, :108] = _cards2array(list_cards[1])
     else:
-        action_seq_array = np.zeros((len(action_seq_list), 54))
+        action_seq_array = np.zeros((len(action_seq_list), 108))
         for row, list_cards in enumerate(action_seq_list):
             if list_cards != []:
                 action_seq_array[row, :] = _cards2array(list_cards[1])
-        action_seq_array = action_seq_array.reshape(5, 162)
+        action_seq_array = action_seq_array.reshape(5, 432)
     return action_seq_array
 
     # action_seq_array = np.zeros((len(action_seq_list), 54))
@@ -487,10 +510,10 @@ def _action_seq_list2array(action_seq_list, new_model=True):
     # return action_seq_array
 
 
-def _process_action_seq(sequence, length=15, new_model=True):
+def _process_action_seq(sequence, length=20, new_model=True):
     """
     A utility function encoding historical moves. We
-    encode 15 moves. If there is no 15 moves, we pad
+    encode 20 moves. If there is no 20 moves, we pad
     with zeros.
     """
     sequence = sequence[-length:].copy()
@@ -508,8 +531,8 @@ def _get_one_hot_bomb(bomb_num):
     A utility function to encode the number of bombs
     into one-hot representation.
     """
-    one_hot = np.zeros(15)
-    one_hot[bomb_num] = 1
+    one_hot = np.zeros(29)
+    one_hot[bomb_num[0] + bomb_num[1]] = 1
     return one_hot
 
 
@@ -536,13 +559,19 @@ def _get_obs_landlord(infoset):
         my_action_batch[j, :] = _cards2array(action)
 
     landlord_up_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord_up'], 17)
+        infoset.num_cards_left_dict['landlord_up'], 25)
     landlord_up_num_cards_left_batch = np.repeat(
         landlord_up_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
 
+    landlord_front_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_front'], 25)
+    landlord_front_num_cards_left_batch = np.repeat(
+        landlord_front_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
     landlord_down_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord_down'], 17)
+        infoset.num_cards_left_dict['landlord_down'], 25)
     landlord_down_num_cards_left_batch = np.repeat(
         landlord_down_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
@@ -553,6 +582,12 @@ def _get_obs_landlord(infoset):
         landlord_up_played_cards[np.newaxis, :],
         num_legal_actions, axis=0)
 
+    landlord_front_played_cards = _cards2array(
+        infoset.played_cards['landlord_front'])
+    landlord_front_played_cards_batch = np.repeat(
+        landlord_front_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
     landlord_down_played_cards = _cards2array(
         infoset.played_cards['landlord_down'])
     landlord_down_played_cards_batch = np.repeat(
@@ -569,8 +604,10 @@ def _get_obs_landlord(infoset):
                          other_handcards_batch,
                          last_action_batch,
                          landlord_up_played_cards_batch,
+                         landlord_front_played_cards_batch,
                          landlord_down_played_cards_batch,
                          landlord_up_num_cards_left_batch,
+                         landlord_front_num_cards_left_batch,
                          landlord_down_num_cards_left_batch,
                          bomb_num_batch,
                          my_action_batch))
@@ -578,23 +615,25 @@ def _get_obs_landlord(infoset):
                              other_handcards,
                              last_action,
                              landlord_up_played_cards,
+                             landlord_front_played_cards,
                              landlord_down_played_cards,
                              landlord_up_num_cards_left,
+                             landlord_front_num_cards_left,
                              landlord_down_num_cards_left,
                              bomb_num))
     z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq, 15, False), False)
+        infoset.card_play_action_seq, 20, False), False)
     z_batch = np.repeat(
         z[np.newaxis, :, :],
         num_legal_actions, axis=0)
     obs = {
-        'position': 'landlord',
-        'x_batch': x_batch.astype(np.float32),
-        'z_batch': z_batch.astype(np.float32),
-        'legal_actions': infoset.legal_actions,
-        'x_no_action': x_no_action.astype(np.int8),
-        'z': z.astype(np.int8),
-    }
+            'position': 'landlord',
+            'x_batch': x_batch.astype(np.float32),
+            'z_batch': z_batch.astype(np.float32),
+            'legal_actions': infoset.legal_actions,
+            'x_no_action': x_no_action.astype(np.int8),
+            'z': z.astype(np.int8),
+          }
     return obs
 
 def _get_obs_landlord_up(infoset):
@@ -625,7 +664,7 @@ def _get_obs_landlord_up(infoset):
         last_landlord_action[np.newaxis, :],
         num_legal_actions, axis=0)
     landlord_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord'], 20)
+        infoset.num_cards_left_dict['landlord'], 33)
     landlord_num_cards_left_batch = np.repeat(
         landlord_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
@@ -642,7 +681,7 @@ def _get_obs_landlord_up(infoset):
         last_teammate_action[np.newaxis, :],
         num_legal_actions, axis=0)
     teammate_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord_down'], 17)
+        infoset.num_cards_left_dict['landlord_down'], 25)
     teammate_num_cards_left_batch = np.repeat(
         teammate_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
@@ -653,6 +692,144 @@ def _get_obs_landlord_up(infoset):
         teammate_played_cards[np.newaxis, :],
         num_legal_actions, axis=0)
 
+    last_teammate_front_action = _cards2array(
+        infoset.last_move_dict['landlord_front'])
+    last_teammate_front_action_batch = np.repeat(
+        last_teammate_front_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    teammate_front_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_front'], 25)
+    teammate_front_num_cards_left_batch = np.repeat(
+        teammate_front_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    teammate_front_played_cards = _cards2array(
+        infoset.played_cards['landlord_front'])
+    teammate_front_played_cards_batch = np.repeat(
+        teammate_front_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    bomb_num = _get_one_hot_bomb(
+        infoset.bomb_num)
+    bomb_num_batch = np.repeat(
+        bomb_num[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    x_batch = np.hstack((my_handcards_batch,
+                         other_handcards_batch,
+                         landlord_played_cards_batch,
+                         teammate_played_cards_batch,
+                         teammate_front_played_cards_batch,
+                         last_action_batch,
+                         last_landlord_action_batch,
+                         last_teammate_action_batch,
+                         last_teammate_front_action_batch,
+                         landlord_num_cards_left_batch,
+                         teammate_num_cards_left_batch,
+                         teammate_front_num_cards_left_batch,
+                         bomb_num_batch,
+                         my_action_batch))
+    x_no_action = np.hstack((my_handcards,
+                             other_handcards,
+                             landlord_played_cards,
+                             teammate_played_cards,
+                             teammate_front_played_cards,
+                             last_action,
+                             last_landlord_action,
+                             last_teammate_action,
+                             last_teammate_front_action,
+                             landlord_num_cards_left,
+                             teammate_num_cards_left,
+                             teammate_front_num_cards_left,
+                             bomb_num))
+    z = _action_seq_list2array(_process_action_seq(
+        infoset.card_play_action_seq, 20, False), False)
+    z_batch = np.repeat(
+        z[np.newaxis, :, :],
+        num_legal_actions, axis=0)
+    obs = {
+            'position': 'landlord_up',
+            'x_batch': x_batch.astype(np.float32),
+            'z_batch': z_batch.astype(np.float32),
+            'legal_actions': infoset.legal_actions,
+            'x_no_action': x_no_action.astype(np.int8),
+            'z': z.astype(np.int8),
+          }
+    return obs
+
+def _get_obs_landlord_front(infoset):
+    """
+    Obttain the landlord_front features. See Table 5 in
+    https://arxiv.org/pdf/2106.06135.pdf
+    """
+    num_legal_actions = len(infoset.legal_actions)
+    my_handcards = _cards2array(infoset.player_hand_cards)
+    my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
+                                   num_legal_actions, axis=0)
+
+    other_handcards = _cards2array(infoset.other_hand_cards)
+    other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
+                                      num_legal_actions, axis=0)
+
+    last_action = _cards2array(infoset.last_move)
+    last_action_batch = np.repeat(last_action[np.newaxis, :],
+                                  num_legal_actions, axis=0)
+
+    my_action_batch = np.zeros(my_handcards_batch.shape)
+    for j, action in enumerate(infoset.legal_actions):
+        my_action_batch[j, :] = _cards2array(action)
+
+    last_landlord_action = _cards2array(
+        infoset.last_move_dict['landlord'])
+    last_landlord_action_batch = np.repeat(
+        last_landlord_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    landlord_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord'], 33)
+    landlord_num_cards_left_batch = np.repeat(
+        landlord_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    landlord_played_cards = _cards2array(
+        infoset.played_cards['landlord'])
+    landlord_played_cards_batch = np.repeat(
+        landlord_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    last_teammate_action = _cards2array(
+        infoset.last_move_dict['landlord_down'])
+    last_teammate_action_batch = np.repeat(
+        last_teammate_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    teammate_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_down'], 25)
+    teammate_num_cards_left_batch = np.repeat(
+        teammate_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    teammate_played_cards = _cards2array(
+        infoset.played_cards['landlord_down'])
+    teammate_played_cards_batch = np.repeat(
+        teammate_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    last_teammate_front_action = _cards2array(
+        infoset.last_move_dict['landlord_front'])
+    last_teammate_front_action_batch = np.repeat(
+        last_teammate_front_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    teammate_front_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_front'], 25)
+    teammate_front_num_cards_left_batch = np.repeat(
+        teammate_front_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    teammate_front_played_cards = _cards2array(
+        infoset.played_cards['landlord_front'])
+    teammate_front_played_cards_batch = np.repeat(
+        teammate_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
     bomb_num = _get_one_hot_bomb(
         infoset.bomb_num)
     bomb_num_batch = np.repeat(
@@ -663,36 +840,42 @@ def _get_obs_landlord_up(infoset):
                          other_handcards_batch,
                          landlord_played_cards_batch,
                          teammate_played_cards_batch,
+                         teammate_front_played_cards_batch,
                          last_action_batch,
                          last_landlord_action_batch,
                          last_teammate_action_batch,
+                         last_teammate_front_action_batch,
                          landlord_num_cards_left_batch,
                          teammate_num_cards_left_batch,
+                         teammate_front_num_cards_left_batch,
                          bomb_num_batch,
                          my_action_batch))
     x_no_action = np.hstack((my_handcards,
                              other_handcards,
                              landlord_played_cards,
                              teammate_played_cards,
+                             teammate_front_played_cards,
                              last_action,
                              last_landlord_action,
                              last_teammate_action,
+                             last_teammate_front_action,
                              landlord_num_cards_left,
                              teammate_num_cards_left,
+                             teammate_front_num_cards_left,
                              bomb_num))
     z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq, 15, False), False)
+        infoset.card_play_action_seq, 20, False), False)
     z_batch = np.repeat(
         z[np.newaxis, :, :],
         num_legal_actions, axis=0)
     obs = {
-        'position': 'landlord_up',
-        'x_batch': x_batch.astype(np.float32),
-        'z_batch': z_batch.astype(np.float32),
-        'legal_actions': infoset.legal_actions,
-        'x_no_action': x_no_action.astype(np.int8),
-        'z': z.astype(np.int8),
-    }
+            'position': 'landlord_front',
+            'x_batch': x_batch.astype(np.float32),
+            'z_batch': z_batch.astype(np.float32),
+            'legal_actions': infoset.legal_actions,
+            'x_no_action': x_no_action.astype(np.int8),
+            'z': z.astype(np.int8),
+          }
     return obs
 
 def _get_obs_landlord_down(infoset):
@@ -723,7 +906,7 @@ def _get_obs_landlord_down(infoset):
         last_landlord_action[np.newaxis, :],
         num_legal_actions, axis=0)
     landlord_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord'], 20)
+        infoset.num_cards_left_dict['landlord'], 33)
     landlord_num_cards_left_batch = np.repeat(
         landlord_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
@@ -740,7 +923,7 @@ def _get_obs_landlord_down(infoset):
         last_teammate_action[np.newaxis, :],
         num_legal_actions, axis=0)
     teammate_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord_up'], 17)
+        infoset.num_cards_left_dict['landlord_up'], 25)
     teammate_num_cards_left_batch = np.repeat(
         teammate_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
@@ -751,10 +934,21 @@ def _get_obs_landlord_down(infoset):
         teammate_played_cards[np.newaxis, :],
         num_legal_actions, axis=0)
 
-    landlord_played_cards = _cards2array(
-        infoset.played_cards['landlord'])
-    landlord_played_cards_batch = np.repeat(
-        landlord_played_cards[np.newaxis, :],
+    last_teammate_front_action = _cards2array(
+        infoset.last_move_dict['landlord_front'])
+    last_teammate_front_action_batch = np.repeat(
+        last_teammate_front_action[np.newaxis, :],
+        num_legal_actions, axis=0)
+    teammate_front_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_front'], 25)
+    teammate_front_num_cards_left_batch = np.repeat(
+        teammate_front_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
+    teammate_front_played_cards = _cards2array(
+        infoset.played_cards['landlord_front'])
+    teammate_front_played_cards_batch = np.repeat(
+        teammate_front_played_cards[np.newaxis, :],
         num_legal_actions, axis=0)
 
     bomb_num = _get_one_hot_bomb(
@@ -767,36 +961,42 @@ def _get_obs_landlord_down(infoset):
                          other_handcards_batch,
                          landlord_played_cards_batch,
                          teammate_played_cards_batch,
+                         teammate_front_played_cards_batch,
                          last_action_batch,
                          last_landlord_action_batch,
                          last_teammate_action_batch,
+                         last_teammate_front_action_batch,
                          landlord_num_cards_left_batch,
                          teammate_num_cards_left_batch,
+                         teammate_front_num_cards_left_batch,
                          bomb_num_batch,
                          my_action_batch))
     x_no_action = np.hstack((my_handcards,
                              other_handcards,
                              landlord_played_cards,
                              teammate_played_cards,
+                             teammate_front_played_cards,
                              last_action,
                              last_landlord_action,
                              last_teammate_action,
+                             last_teammate_front_action,
                              landlord_num_cards_left,
                              teammate_num_cards_left,
+                             teammate_front_num_cards_left,
                              bomb_num))
     z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq, 15, False), False)
+        infoset.card_play_action_seq, 20, False), False)
     z_batch = np.repeat(
         z[np.newaxis, :, :],
         num_legal_actions, axis=0)
     obs = {
-        'position': 'landlord_down',
-        'x_batch': x_batch.astype(np.float32),
-        'z_batch': z_batch.astype(np.float32),
-        'legal_actions': infoset.legal_actions,
-        'x_no_action': x_no_action.astype(np.int8),
-        'z': z.astype(np.int8),
-    }
+            'position': 'landlord_down',
+            'x_batch': x_batch.astype(np.float32),
+            'z_batch': z_batch.astype(np.float32),
+            'legal_actions': infoset.legal_actions,
+            'x_no_action': x_no_action.astype(np.int8),
+            'z': z.astype(np.int8),
+          }
     return obs
 
 def _get_obs_landlord_withbid(infoset):
@@ -869,7 +1069,7 @@ def _get_obs_landlord_withbid(infoset):
                              landlord_down_num_cards_left,
                              bomb_num))
     z = _action_seq_list2array(_process_action_seq(
-        infoset.card_play_action_seq, 15, False), False)
+        infoset.card_play_action_seq, 20, False), False)
     z_batch = np.repeat(
         z[np.newaxis, :, :],
         num_legal_actions, axis=0)
@@ -970,21 +1170,21 @@ def _get_obs_general1(infoset, position):
         bomb_num[np.newaxis, :],
         num_legal_actions, axis=0)
 
-    x_batch = np.hstack((position_info_batch,  # 3
-                         my_handcards_batch,  # 54
-                         other_handcards_batch,  # 54
-                         three_landlord_cards_batch,  # 54
-                         last_action_batch,  # 54
-                         landlord_played_cards_batch,  # 54
-                         landlord_up_played_cards_batch,  # 54
-                         landlord_down_played_cards_batch,  # 54
+    x_batch = np.hstack((position_info_batch,  # 4
+                         my_handcards_batch,  # 108
+                         other_handcards_batch,  # 108
+                         three_landlord_cards_batch,  # 108
+                         last_action_batch,  # 108
+                         landlord_played_cards_batch,  # 108
+                         landlord_up_played_cards_batch,  # 108
+                         landlord_down_played_cards_batch,  # 108
                          landlord_num_cards_left_batch,  # 20
                          landlord_up_num_cards_left_batch,  # 17
                          landlord_down_num_cards_left_batch,  # 17
                          bomb_num_batch,  # 15
                          bid_info_batch,  # 12
                          multiply_info_batch, # 3
-                         my_action_batch))  # 54
+                         my_action_batch))  # 108
     x_no_action = np.hstack((position_info,
                              my_handcards,
                              other_handcards,
@@ -1025,9 +1225,10 @@ def _get_obs_general(infoset, position):
                                       num_legal_actions, axis=0)
 
     position_map = {
-        "landlord": [1, 0, 0],
-        "landlord_up": [0, 1, 0],
-        "landlord_down": [0, 0, 1]
+        "landlord": [1, 0, 0, 0],
+        "landlord_up": [0, 1, 0, 0],
+        "landlord_front": [0, 0, 1, 0],
+        "landlord_down": [0, 0, 0, 1]
     }
     position_info = np.array(position_map[position])
     position_info_batch = np.repeat(position_info[np.newaxis, :],
@@ -1041,9 +1242,9 @@ def _get_obs_general(infoset, position):
     multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
                                     num_legal_actions, axis=0)
 
-    three_landlord_cards = _cards2array(infoset.three_landlord_cards)
-    three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
-                                           num_legal_actions, axis=0)
+    # three_landlord_cards = _cards2array(infoset.three_landlord_cards)
+    # three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
+    #                                        num_legal_actions, axis=0)
 
     last_action = _cards2array(infoset.last_move)
     last_action_batch = np.repeat(last_action[np.newaxis, :],
@@ -1054,25 +1255,31 @@ def _get_obs_general(infoset, position):
         my_action_batch[j, :] = _cards2array(action)
 
     landlord_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord'], 20)
+        infoset.num_cards_left_dict['landlord'], 33)
     landlord_num_cards_left_batch = np.repeat(
         landlord_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
 
     landlord_up_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord_up'], 17)
+        infoset.num_cards_left_dict['landlord_up'], 25)
     landlord_up_num_cards_left_batch = np.repeat(
         landlord_up_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
 
+    landlord_front_num_cards_left = _get_one_hot_array(
+        infoset.num_cards_left_dict['landlord_front'], 25)
+    landlord_front_num_cards_left_batch = np.repeat(
+        landlord_front_num_cards_left[np.newaxis, :],
+        num_legal_actions, axis=0)
+
     landlord_down_num_cards_left = _get_one_hot_array(
-        infoset.num_cards_left_dict['landlord_down'], 17)
+        infoset.num_cards_left_dict['landlord_down'], 25)
     landlord_down_num_cards_left_batch = np.repeat(
         landlord_down_num_cards_left[np.newaxis, :],
         num_legal_actions, axis=0)
 
     other_handcards_left_list = []
-    for pos in ["landlord", "landlord_up", "landlord_up"]:
+    for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
         if pos != position:
             other_handcards_left_list.extend(infoset.all_handcards[pos])
 
@@ -1088,6 +1295,12 @@ def _get_obs_general(infoset, position):
         landlord_up_played_cards[np.newaxis, :],
         num_legal_actions, axis=0)
 
+    landlord_front_played_cards = _cards2array(
+        infoset.played_cards['landlord_front'])
+    landlord_front_played_cards_batch = np.repeat(
+        landlord_front_played_cards[np.newaxis, :],
+        num_legal_actions, axis=0)
+
     landlord_down_played_cards = _cards2array(
         infoset.played_cards['landlord_down'])
     landlord_down_played_cards_batch = np.repeat(
@@ -1100,24 +1313,26 @@ def _get_obs_general(infoset, position):
         bomb_num[np.newaxis, :],
         num_legal_actions, axis=0)
     num_cards_left = np.hstack((
-                         landlord_num_cards_left,  # 20
-                         landlord_up_num_cards_left,  # 17
+                         landlord_num_cards_left,  # 33
+                         landlord_up_num_cards_left,  # 25
+                         landlord_front_num_cards_left,  # 25
                          landlord_down_num_cards_left))
 
     x_batch = np.hstack((
-                         bid_info_batch,  # 12
-                         multiply_info_batch))  # 3
+                         bid_info_batch,  # 16
+                         multiply_info_batch))  # 4
     x_no_action = np.hstack((
                              bid_info,
                              multiply_info))
     z =np.vstack((
                   num_cards_left,
-                  my_handcards,  # 54
-                  other_handcards,  # 54
-                  three_landlord_cards,  # 54
-                  landlord_played_cards,  # 54
-                  landlord_up_played_cards,  # 54
-                  landlord_down_played_cards,  # 54
+                  my_handcards,  # 108
+                  other_handcards,  # 108
+                #   three_landlord_cards,  # 108
+                  landlord_played_cards,  # 108
+                  landlord_up_played_cards,  # 108
+                  landlord_front_played_cards,  # 108
+                  landlord_down_played_cards,  # 108
                   _action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32))
                   ))
 
@@ -1125,7 +1340,7 @@ def _get_obs_general(infoset, position):
         z[np.newaxis, :, :],
         num_legal_actions, axis=0)
     my_action_batch = my_action_batch[:,np.newaxis,:]
-    z_batch = np.zeros([len(_z_batch),40,54],int)
+    z_batch = np.zeros([len(_z_batch),40,108],int)
     for i in range(0,len(_z_batch)):
         z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
     obs = {
@@ -1139,17 +1354,17 @@ def _get_obs_general(infoset, position):
     return obs
 
 def gen_bid_legal_actions(player_id, bid_info):
-    self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]]
+    self_bid_info = bid_info[:, [(player_id - 1) % 4, player_id, (player_id + 1) % 4, (player_id + 2) % 4]]
     curr_round = -1
-    for r in range(4):
+    for r in range(5):
         if -1 in self_bid_info[r]:
             curr_round = r
             break
     bid_actions = []
     if curr_round != -1:
-        self_bid_info[curr_round] = [0, 0, 0]
+        self_bid_info[curr_round] = [0, 0, 0, 0]
         bid_actions.append(np.array(self_bid_info).flatten())
-        self_bid_info[curr_round] = [0, 1, 0]
+        self_bid_info[curr_round] = [0, 1, 0, 0]
         bid_actions.append(np.array(self_bid_info).flatten())
     return np.array(bid_actions)
 
@@ -1273,9 +1488,9 @@ def _get_obs_for_bid_legacy(player_id, bid_info, hand_cards):
     return obs
 
 def _get_obs_for_bid(player_id, bid_info, hand_cards):
-    all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
-                 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
-                 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
+    # all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+    #              8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+    #              12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
     num_legal_actions = 2
     my_handcards = _cards2array(hand_cards)
     my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
diff --git a/douzero/env/game.py b/douzero/env/game.py
index f9252f5..1b716f7 100644
--- a/douzero/env/game.py
+++ b/douzero/env/game.py
@@ -11,10 +11,19 @@ RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
                     '8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
                     'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
 
-bombs = [[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6],
-         [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9], [10, 10, 10, 10],
-         [11, 11, 11, 11], [12, 12, 12, 12], [13, 13, 13, 13], [14, 14, 14, 14],
-         [17, 17, 17, 17], [20, 30]]
+bombs = [
+    [[3, 3, 3, 3, 3, 3], [4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7],
+     [8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9], [10, 10, 10, 10, 10, 10], [11, 11, 11, 11, 11, 11],
+     [12, 12, 12, 12, 12, 12], [13, 13, 13, 13, 13, 13], [14, 14, 14, 14, 14, 14], [17, 17, 17, 17, 17, 17],
+     [3, 3, 3, 3, 3, 3, 3], [4, 4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7, 7],
+     [8, 8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9, 9], [10, 10, 10, 10, 10, 10, 10], [11, 11, 11, 11, 11, 11, 11],
+     [12, 12, 12, 12, 12, 12, 12], [13, 13, 13, 13, 13, 13, 13], [14, 14, 14, 14, 14, 14, 14],
+     [17, 17, 17, 17, 17, 17, 17]],
+    [[3, 3, 3, 3, 3, 3, 3, 3], [4, 4, 4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6, 6, 6],
+     [7, 7, 7, 7, 7, 7, 7, 7], [8, 8, 8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9, 9, 9], [10, 10, 10, 10, 10, 10, 10, 10],
+     [11, 11, 11, 11, 11, 11, 11, 11], [12, 12, 12, 12, 12, 12, 12, 12], [13, 13, 13, 13, 13, 13, 13, 13],
+     [14, 14, 14, 14, 14, 14, 14, 14], [17, 17, 17, 17, 17, 17, 17, 17],
+     [20, 20, 30, 30]]]
 
 class GameEnv(object):
 
@@ -22,7 +31,7 @@ class GameEnv(object):
 
         self.card_play_action_seq = []
 
-        self.three_landlord_cards = None
+        # self.three_landlord_cards = None
         self.game_over = False
 
         self.acting_player_position = None
@@ -32,10 +41,12 @@ class GameEnv(object):
 
         self.last_move_dict = {'landlord': [],
                                'landlord_up': [],
+                               'landlord_front': [],
                                'landlord_down': []}
 
         self.played_cards = {'landlord': [],
                              'landlord_up': [],
+                             'landlord_front': [],
                              'landlord_down': []}
 
         self.last_move = []
@@ -48,24 +59,28 @@ class GameEnv(object):
                            'farmer': 0}
 
         self.info_sets = {'landlord': InfoSet('landlord'),
-                          'landlord_up': InfoSet('landlord_up'),
-                          'landlord_down': InfoSet('landlord_down')}
+                         'landlord_up': InfoSet('landlord_up'),
+                         'landlord_front': InfoSet('landlord_front'),
+                         'landlord_down': InfoSet('landlord_down')}
 
-        self.bomb_num = 0
+        self.bomb_num = [0, 0]
         self.pos_bomb_num = {
             "landlord": 0,
             "landlord_up": 0,
+            "landlord_front": 0,
             "landlord_down": 0
         }
         self.last_pid = 'landlord'
 
-        self.bid_info = [[-1, -1, -1],
-                         [-1, -1, -1],
-                         [-1, -1, -1],
-                         [-1, -1, -1]]
+        self.bid_info = [[-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1]]
         self.bid_count = 0
         self.multiply_count = {'landlord': 0,
                                'landlord_up': 0,
+                               'landlord_front': 0,
                                'landlord_down': 0}
         self.step_count = 0
 
@@ -75,9 +90,11 @@ class GameEnv(object):
             card_play_data['landlord']
         self.info_sets['landlord_up'].player_hand_cards = \
             card_play_data['landlord_up']
+        self.info_sets['landlord_front'].player_hand_cards = \
+            card_play_data['landlord_front']
         self.info_sets['landlord_down'].player_hand_cards = \
             card_play_data['landlord_down']
-        self.three_landlord_cards = card_play_data['three_landlord_cards']
+        # self.three_landlord_cards = card_play_data['three_landlord_cards']
         self.get_acting_player_position()
         self.game_infoset = self.get_infoset()
 
@@ -85,6 +102,7 @@ class GameEnv(object):
     def game_done(self):
         if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
                 len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
+                len(self.info_sets['landlord_front'].player_hand_cards) == 0 or \
                 len(self.info_sets['landlord_down'].player_hand_cards) == 0:
             # if one of the three players discards his hand,
             # then game is over.
@@ -96,21 +114,21 @@ class GameEnv(object):
     def compute_player_utility(self):
 
         if len(self.info_sets['landlord'].player_hand_cards) == 0:
-            self.player_utility_dict = {'landlord': 2,
+            self.player_utility_dict = {'landlord': 3,
                                         'farmer': -1}
         else:
-            self.player_utility_dict = {'landlord': -2,
+            self.player_utility_dict = {'landlord': -3,
                                         'farmer': 1}
 
     def update_num_wins_scores(self):
         for pos, utility in self.player_utility_dict.items():
-            base_score = 2 if pos == 'landlord' else 1
+            base_score = 3 if pos == 'landlord' else 1
             if utility > 0:
                 self.num_wins[pos] += 1
                 self.winner = pos
-                self.num_scores[pos] += base_score * (2 ** self.bomb_num)
+                self.num_scores[pos] += base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
             else:
-                self.num_scores[pos] -= base_score * (2 ** self.bomb_num)
+                self.num_scores[pos] -= base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
 
     def get_winner(self):
         return self.winner
@@ -121,12 +139,17 @@ class GameEnv(object):
     def step(self):
         action = self.players[self.acting_player_position].act(
             self.game_infoset)
-        self.step_count += 1
+        assert action in self.game_infoset.legal_actions
+
         if len(action) > 0:
             self.last_pid = self.acting_player_position
 
-        if action in bombs:
-            self.bomb_num += 1
+        if action in bombs[0]:
+            self.bomb_num[0] += 1
+            self.pos_bomb_num[self.acting_player_position] += 1
+
+        if action in bombs[1]:
+            self.bomb_num[1] += 1
             self.pos_bomb_num[self.acting_player_position] += 1
 
         self.last_move_dict[
@@ -137,15 +160,15 @@ class GameEnv(object):
 
         self.played_cards[self.acting_player_position] += action
 
-        if self.acting_player_position == 'landlord' and \
-                len(action) > 0 and \
-                len(self.three_landlord_cards) > 0:
-            for card in action:
-                if len(self.three_landlord_cards) > 0:
-                    if card in self.three_landlord_cards:
-                        self.three_landlord_cards.remove(card)
-                else:
-                    break
+        # if self.acting_player_position == 'landlord' and \
+        #         len(action) > 0 and \
+        #         len(self.three_landlord_cards) > 0:
+        #     for card in action:
+        #         if len(self.three_landlord_cards) > 0:
+        #             if card in self.three_landlord_cards:
+        #                 self.three_landlord_cards.remove(card)
+        #         else:
+        #             break
 
         self.game_done()
         if not self.game_over:
@@ -156,7 +179,7 @@ class GameEnv(object):
     def get_last_move(self):
         last_move = []
         if len(self.card_play_action_seq) != 0:
-            if len(self.card_play_action_seq[-1][1]) == 0:
+            if len(self.card_play_action_seq[-1]) == 0:
                 last_move = self.card_play_action_seq[-2][1]
             else:
                 last_move = self.card_play_action_seq[-1][1]
@@ -166,7 +189,7 @@ class GameEnv(object):
     def get_last_two_moves(self):
         last_two_moves = [[], []]
         for card in self.card_play_action_seq[-2:]:
-            last_two_moves.insert(0, card[1])
+            last_two_moves.insert(0, card)
             last_two_moves = last_two_moves[:2]
         return last_two_moves
 
@@ -179,6 +202,9 @@ class GameEnv(object):
                 self.acting_player_position = 'landlord_down'
 
             elif self.acting_player_position == 'landlord_down':
+                self.acting_player_position = 'landlord_front'
+
+            elif self.acting_player_position == 'landlord_front':
                 self.acting_player_position = 'landlord_up'
 
             else:
@@ -202,7 +228,10 @@ class GameEnv(object):
         rival_move = []
         if len(action_sequence) != 0:
             if len(action_sequence[-1][1]) == 0:
-                rival_move = action_sequence[-2][1]
+                if len(action_sequence[-2][1]) == 0:
+                    rival_move = action_sequence[-3][1]
+                else:
+                    rival_move = action_sequence[-2][1]
             else:
                 rival_move = action_sequence[-1][1]
 
@@ -227,15 +256,36 @@ class GameEnv(object):
             moves = ms.filter_type_3_triple(all_moves, rival_move)
 
         elif rival_move_type == md.TYPE_4_BOMB:
-            all_moves = mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
+            all_moves = mg.gen_type_4_bomb(4)
             moves = ms.filter_type_4_bomb(all_moves, rival_move)
+            all_moves += mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
+
+        elif rival_move_type == md.TYPE_4_BOMB5:
+            all_moves = mg.gen_type_4_bomb(5)
+            moves = ms.filter_type_4_bomb(all_moves, rival_move)
+            all_moves += mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
+
+        elif rival_move_type == md.TYPE_4_BOMB6:
+            all_moves = mg.gen_type_4_bomb(6)
+            moves = ms.filter_type_4_bomb(all_moves, rival_move)
+            all_moves += mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
+
+        elif rival_move_type == md.TYPE_4_BOMB7:
+            all_moves = mg.gen_type_4_bomb(7)
+            moves = ms.filter_type_4_bomb(all_moves, rival_move)
+            all_moves += mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
+
+        elif rival_move_type == md.TYPE_4_BOMB8:
+            all_moves = mg.gen_type_4_bomb(8)
+            moves = ms.filter_type_4_bomb(all_moves, rival_move)
+            all_moves += mg.gen_type_5_king_bomb()
 
         elif rival_move_type == md.TYPE_5_KING_BOMB:
             moves = []
 
-        elif rival_move_type == md.TYPE_6_3_1:
-            all_moves = mg.gen_type_6_3_1()
-            moves = ms.filter_type_6_3_1(all_moves, rival_move)
+        # elif rival_move_type == md.TYPE_6_3_1:
+        #     all_moves = mg.gen_type_6_3_1()
+        #     moves = ms.filter_type_6_3_1(all_moves, rival_move)
 
         elif rival_move_type == md.TYPE_7_3_2:
             all_moves = mg.gen_type_7_3_2()
@@ -253,25 +303,24 @@ class GameEnv(object):
             all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
             moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
 
-        elif rival_move_type == md.TYPE_11_SERIAL_3_1:
-            all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
-            moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
+        # elif rival_move_type == md.TYPE_11_SERIAL_3_1:
+        #     all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
+        #     moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
 
         elif rival_move_type == md.TYPE_12_SERIAL_3_2:
             all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
             moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
 
-        elif rival_move_type == md.TYPE_13_4_2:
-            all_moves = mg.gen_type_13_4_2()
-            moves = ms.filter_type_13_4_2(all_moves, rival_move)
+        # elif rival_move_type == md.TYPE_13_4_2:
+        #     all_moves = mg.gen_type_13_4_2()
+        #     moves = ms.filter_type_13_4_2(all_moves, rival_move)
 
-        elif rival_move_type == md.TYPE_14_4_22:
-            all_moves = mg.gen_type_14_4_22()
-            moves = ms.filter_type_14_4_22(all_moves, rival_move)
+        # elif rival_move_type == md.TYPE_14_4_22:
+        #     all_moves = mg.gen_type_14_4_22()
+        #     moves = ms.filter_type_14_4_22(all_moves, rival_move)
 
-        if rival_move_type not in [md.TYPE_0_PASS,
-                                   md.TYPE_4_BOMB, md.TYPE_5_KING_BOMB]:
-            moves = moves + mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
+        if rival_move_type != md.TYPE_0_PASS and rival_move_type < md.TYPE_4_BOMB:
+            moves = moves + mg.gen_type_4_bomb(4) + mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
 
         if len(rival_move) != 0:  # rival_move is not 'pass'
             moves = moves + [[]]
@@ -284,7 +333,7 @@ class GameEnv(object):
     def reset(self):
         self.card_play_action_seq = []
 
-        self.three_landlord_cards = None
+        # self.three_landlord_cards = None
         self.game_over = False
 
         self.acting_player_position = None
@@ -292,33 +341,40 @@ class GameEnv(object):
 
         self.last_move_dict = {'landlord': [],
                                'landlord_up': [],
+                               'landlord_front': [],
                                'landlord_down': []}
 
         self.played_cards = {'landlord': [],
                              'landlord_up': [],
+                             'landlord_front': [],
                              'landlord_down': []}
 
         self.last_move = []
         self.last_two_moves = []
 
         self.info_sets = {'landlord': InfoSet('landlord'),
-                          'landlord_up': InfoSet('landlord_up'),
-                          'landlord_down': InfoSet('landlord_down')}
+                         'landlord_up': InfoSet('landlord_up'),
+                         'landlord_front': InfoSet('landlord_front'),
+                         'landlord_down': InfoSet('landlord_down')}
 
-        self.bomb_num = 0
+        self.bomb_num = [0, 0]
         self.pos_bomb_num = {
             "landlord": 0,
             "landlord_up": 0,
+            "landlord_front": 0,
             "landlord_down": 0
         }
         self.last_pid = 'landlord'
-        self.bid_info = [[-1, -1, -1],
-                         [-1, -1, -1],
-                         [-1, -1, -1],
-                         [-1, -1, -1]]
+
+        self.bid_info = [[-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1]]
         self.bid_count = 0
         self.multiply_count = {'landlord': 0,
                                'landlord_up': 0,
+                               'landlord_front': 0,
                                'landlord_down': 0}
         self.step_count = 0
 
@@ -344,10 +400,10 @@ class GameEnv(object):
 
         self.info_sets[self.acting_player_position].num_cards_left_dict = \
             {pos: len(self.info_sets[pos].player_hand_cards)
-             for pos in ['landlord', 'landlord_up', 'landlord_down']}
+             for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
 
         self.info_sets[self.acting_player_position].other_hand_cards = []
-        for pos in ['landlord', 'landlord_up', 'landlord_down']:
+        for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
             if pos != self.acting_player_position:
                 self.info_sets[
                     self.acting_player_position].other_hand_cards += \
@@ -355,15 +411,15 @@ class GameEnv(object):
 
         self.info_sets[self.acting_player_position].played_cards = \
             self.played_cards
-        self.info_sets[self.acting_player_position].three_landlord_cards = \
-            self.three_landlord_cards
+        # self.info_sets[self.acting_player_position].three_landlord_cards = \
+        #     self.three_landlord_cards
         self.info_sets[self.acting_player_position].card_play_action_seq = \
             self.card_play_action_seq
 
         self.info_sets[
             self.acting_player_position].all_handcards = \
             {pos: self.info_sets[pos].player_hand_cards
-             for pos in ['landlord', 'landlord_up', 'landlord_down']}
+             for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
 
         return deepcopy(self.info_sets[self.acting_player_position])
 
@@ -379,13 +435,13 @@ class InfoSet(object):
         self.player_position = player_position
         # The hand cands of the current player. A list.
         self.player_hand_cards = None
-        # The number of cards left for each player. It is a dict with str-->int
+        # The number of cards left for each player. It is a dict with str-->int 
         self.num_cards_left_dict = None
         # The three landload cards. A list.
-        self.three_landlord_cards = None
+        # self.three_landlord_cards = None
         # The historical moves. It is a list of list
         self.card_play_action_seq = None
-        # The union of the hand cards of the other two players for the current player
+        # The union of the hand cards of the other two players for the current player 
         self.other_hand_cards = None
         # The legal actions for the current move. It is a list of list
         self.legal_actions = None
@@ -397,18 +453,19 @@ class InfoSet(object):
         self.last_move_dict = None
         # The played cands so far. It is a list.
         self.played_cards = None
-        # The hand cards of all the players. It is a dict.
+        # The hand cards of all the players. It is a dict. 
         self.all_handcards = None
         # Last player position that plays a valid move, i.e., not `pass`
         self.last_pid = None
         # The number of bombs played so far
         self.bomb_num = None
 
-        self.bid_info = [[-1, -1, -1],
-                         [-1, -1, -1],
-                         [-1, -1, -1],
-                         [-1, -1, -1]]
+        self.bid_info = [[-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1],
+                         [-1, -1, -1, -1]]
 
-        self.multiply_info = [1, 0, 0]
+        self.multiply_info = [1, 0, 0, 0]
 
         self.player_id = None
diff --git a/douzero/env/move_generator.py b/douzero/env/move_generator.py
index ecebb6c..65cdafd 100644
--- a/douzero/env/move_generator.py
+++ b/douzero/env/move_generator.py
@@ -91,17 +91,17 @@ class MovesGener(object):
                 self.triple_cards_moves.append([k, k, k])
         return self.triple_cards_moves
 
-    def gen_type_4_bomb(self):
+    def gen_type_4_bomb(self, num = 4):
         self.bomb_moves = []
         for k, v in self.cards_dict.items():
-            if v == 4:
-                self.bomb_moves.append([k, k, k, k])
+            if v == num:
+                self.bomb_moves.append([k] * num)
         return self.bomb_moves
 
     def gen_type_5_king_bomb(self):
         self.final_bomb_moves = []
-        if 20 in self.cards_list and 30 in self.cards_list:
-            self.final_bomb_moves.append([20, 30])
+        if 20 in self.cards_list and self.cards_dict[20] == 2 and 30 in self.cards_list and self.cards_dict[30] == 2:
+            self.final_bomb_moves.append([20, 20, 30, 30])
         return self.final_bomb_moves
 
     def gen_type_6_3_1(self):
@@ -205,15 +205,19 @@ class MovesGener(object):
         moves.extend(self.gen_type_1_single())
         moves.extend(self.gen_type_2_pair())
         moves.extend(self.gen_type_3_triple())
-        moves.extend(self.gen_type_4_bomb())
+        moves.extend(self.gen_type_4_bomb(4))
+        moves.extend(self.gen_type_4_bomb(5))
+        moves.extend(self.gen_type_4_bomb(6))
+        moves.extend(self.gen_type_4_bomb(7))
+        moves.extend(self.gen_type_4_bomb(8))
         moves.extend(self.gen_type_5_king_bomb())
-        moves.extend(self.gen_type_6_3_1())
+        # moves.extend(self.gen_type_6_3_1())
         moves.extend(self.gen_type_7_3_2())
         moves.extend(self.gen_type_8_serial_single())
         moves.extend(self.gen_type_9_serial_pair())
         moves.extend(self.gen_type_10_serial_triple())
-        moves.extend(self.gen_type_11_serial_3_1())
+        # moves.extend(self.gen_type_11_serial_3_1())
         moves.extend(self.gen_type_12_serial_3_2())
-        moves.extend(self.gen_type_13_4_2())
-        moves.extend(self.gen_type_14_4_22())
+        # moves.extend(self.gen_type_13_4_2())
+        # moves.extend(self.gen_type_14_4_22())
         return moves
diff --git a/douzero/env/utils.py b/douzero/env/utils.py
index c3a2be7..2e83150 100644
--- a/douzero/env/utils.py
+++ b/douzero/env/utils.py
@@ -10,17 +10,22 @@ TYPE_0_PASS = 0
 TYPE_1_SINGLE = 1
 TYPE_2_PAIR = 2
 TYPE_3_TRIPLE = 3
-TYPE_4_BOMB = 4
-TYPE_5_KING_BOMB = 5
+TYPE_4_BOMB = 44
+TYPE_4_BOMB5 = 45
+TYPE_4_BOMB6 = 46
+TYPE_4_BOMB7 = 47
+TYPE_4_BOMB8 = 48
+TYPE_5_KING_BOMB = 50
+#TYPE_6_3_1 = 6
 TYPE_6_3_1 = 6
 TYPE_7_3_2 = 7
 TYPE_8_SERIAL_SINGLE = 8
 TYPE_9_SERIAL_PAIR = 9
 TYPE_10_SERIAL_TRIPLE = 10
-TYPE_11_SERIAL_3_1 = 11
+# TYPE_11_SERIAL_3_1 = 11
 TYPE_12_SERIAL_3_2 = 12
-TYPE_13_4_2 = 13
-TYPE_14_4_22 = 14
+# TYPE_13_4_2 = 13
+# TYPE_14_4_22 = 14
 TYPE_15_WRONG = 15
 
 # betting round action
diff --git a/douzero/evaluation/simulation.py b/douzero/evaluation/simulation.py
index bac5cbd..1468746 100644
--- a/douzero/evaluation/simulation.py
+++ b/douzero/evaluation/simulation.py
@@ -10,7 +10,7 @@ import BidModel
 def load_card_play_models(card_play_model_path_dict):
     players = {}
 
-    for position in ['landlord', 'landlord_up', 'landlord_down']:
+    for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
         if card_play_model_path_dict[position] == 'rlcard':
             from .rlcard_agent import RLCardAgent
             players[position] = RLCardAgent(position)
@@ -43,30 +43,34 @@ def mp_simulate(card_play_data_list, card_play_model_path_dict, q, output, bid_o
             bid_results = []
             bid_values = []
             bid_info_list = [
-                np.array([[-1,-1,-1],
-                          [-1,-1,-1],
-                          [-1,-1,-1],
-                          [-1,-1,-1]]),
-                np.array([[0,0,0],
-                          [-1,-1,-1],
-                          [-1,-1,-1],
-                          [-1,-1,-1]]),
-                np.array([[1,0,0],
-                          [-1,-1,-1],
-                          [-1,-1,-1],
-                          [-1,-1,-1]]),
-                np.array([[0,0,0],
-                          [0,0,0],
-                          [-1,-1,-1],
-                          [-1,-1,-1]]),
-                np.array([[0,0,1],
-                          [1,0,0],
-                          [-1,-1,-1],
-                          [-1,-1,-1]]),
-                np.array([[0,1,0],
-                          [0,0,1],
-                          [1,0,0],
-                          [-1,-1,-1]]),
+                np.array([[-1,-1,-1,-1],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1]]),
+                np.array([[0,0,0,0],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1]]),
+                np.array([[1,0,0,0],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1]]),
+                np.array([[0,0,0,0],
+                          [0,0,0,0],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1]]),
+                np.array([[0,0,1,0],
+                          [0,0,0,1],
+                          [-1,-1,-1,-1],
+                          [-1,-1,-1,-1]]),
+                np.array([[0,1,0,0],
+                          [0,0,1,0],
+                          [0,0,0,1],
+                          [-1,-1,-1,-1]]),
+                np.array([[0,1,0,0],
+                          [0,0,1,0],
+                          [1,0,0,0],
+                          [-1,-1,-1,-1]]),
             ]
             for bid_info in bid_info_list:
                 bid_obs = douzero.env.env._get_obs_for_bid(1, bid_info, card_play_data["landlord"])
@@ -82,6 +86,7 @@ def mp_simulate(card_play_data_list, card_play_model_path_dict, q, output, bid_o
             print("\nStart ------- " + title)
             print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord"]]))
             print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_down"]]))
+            print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_front"]]))
             print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_up"]]))
         # print(card_play_data)
         count = 0
@@ -127,6 +132,7 @@ def evaluate(landlord, landlord_up, landlord_down, eval_data, num_workers, outpu
     card_play_model_path_dict = {
         'landlord': landlord,
         'landlord_up': landlord_up,
+        'landlord_front': landlord_up,
         'landlord_down': landlord_down}
 
     num_landlord_wins = 0
diff --git a/evaluate.py b/evaluate.py
index 5a838e4..45ea06e 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -10,6 +10,8 @@ if __name__ == '__main__':
             default='baselines/douzero_12/landlord_weights_39762328900.ckpt')
     parser.add_argument('--landlord_up', type=str,
             default='baselines/douzero_12/landlord_up_weights_39762328900.ckpt')
+    parser.add_argument('--landlord_front', type=str,
+            default='baselines/douzero_12/landlord_front_weights_39762328900.ckpt')
     parser.add_argument('--landlord_down', type=str,
             default='baselines/douzero_12/landlord_down_weights_39762328900.ckpt')
     parser.add_argument('--eval_data', type=str,
@@ -25,7 +27,7 @@ if __name__ == '__main__':
     if args.output or args.bid:
         args.num_workers = 1
     t = 3
-    frame = 3085177900
+    frame = 64000
     adp_frame = 2511184300
     # args.landlord = 'baselines/resnet_landlord_%i.ckpt' % frame
     args.landlord_up = 'baselines/resnet_landlord_up_%i.ckpt' % frame
@@ -44,6 +46,7 @@ if __name__ == '__main__':
     elif t == 3:
         args.landlord = 'baselines/resnet_landlord_%i.ckpt' % frame
         args.landlord_up = 'baselines/resnet_landlord_up_%i.ckpt' % frame
+        args.landlord_front = 'baselines/resnet_landlord_front_%i.ckpt' % frame
         args.landlord_down = 'baselines/resnet_landlord_down_%i.ckpt' % frame
     elif t == 4:
         args.landlord = 'baselines/douzero_ADP/landlord.ckpt'
diff --git a/generate_eval_data.py b/generate_eval_data.py
index 6d0ff2a..a4d0afa 100644
--- a/generate_eval_data.py
+++ b/generate_eval_data.py
@@ -4,9 +4,9 @@ import numpy as np
 
 deck = []
 for i in range(3, 15):
-    deck.extend([i for _ in range(4)])
-deck.extend([17 for _ in range(4)])
-deck.extend([20, 30])
+    deck.extend([i for _ in range(8)])
+deck.extend([17 for _ in range(8)])
+deck.extend([20, 20, 30, 30])
 
 def get_parser():
     parser = argparse.ArgumentParser(description='DouZero: random data generator')
@@ -17,10 +17,11 @@ def get_parser():
 def generate():
     _deck = deck.copy()
     np.random.shuffle(_deck)
-    card_play_data = {'landlord': _deck[:20],
-                      'landlord_up': _deck[20:37],
-                      'landlord_down': _deck[37:54],
-                      'three_landlord_cards': _deck[17:20],
+    card_play_data = {'landlord': _deck[:33],
+                      'landlord_up': _deck[33:58],
+                      'landlord_front': _deck[58:83],
+                      'landlord_down': _deck[83:108],
+                    #   'three_landlord_cards': _deck[25:33],
                       }
     for key in card_play_data:
         card_play_data[key].sort()