Add support for new model types
This commit is contained in:
parent
34db1f5ec0
commit
8b2b39216a
|
@ -165,8 +165,9 @@ class GameHelper:
|
|||
self.PicsCV.update({info[0]: imgCv})
|
||||
|
||||
def Screenshot(self, region=None): # -> (im, (left, top))
|
||||
self.Handle = win32gui.FindWindow("Hlddz", None)
|
||||
hwnd = self.Handle
|
||||
# im = Image.open(r"C:\Users\q9294\Desktop\llc.png")
|
||||
# im = Image.open(r"C:\Users\q9294\Desktop\Snipaste_2021-09-05_00-52-51.png")
|
||||
# im = im.resize((1796, 1047))
|
||||
# return im, (0,0)
|
||||
left, top, right, bot = win32gui.GetWindowRect(hwnd)
|
||||
|
@ -315,11 +316,14 @@ class GameHelper:
|
|||
win32gui.PostMessage(self.Handle, WM_LBUTTONDOWN, MK_LBUTTON, lParam)
|
||||
win32gui.PostMessage(self.Handle, WM_LBUTTONUP, MK_LBUTTON, lParam)
|
||||
|
||||
def SelectCards(self, cards):
|
||||
def SelectCards(self, cards, no_check=False):
|
||||
print("选择牌", cards)
|
||||
cards = [card for card in cards]
|
||||
tobeSelected = []
|
||||
tobeSelected.extend(cards)
|
||||
image, windowPos = self.Screenshot()
|
||||
while image.size[0] == 0:
|
||||
image, windowPos = self.Screenshot()
|
||||
handCardsInfo, states = self.GetCards(image)
|
||||
cardSelectMap = []
|
||||
for card in handCardsInfo:
|
||||
|
@ -340,11 +344,16 @@ class GameHelper:
|
|||
for i in range(0, len(clickMap)):
|
||||
if clickMap[i] == 1:
|
||||
self.LeftClick(handCardsInfo[i][1])
|
||||
print("点击", handCardsInfo[i][1])
|
||||
break
|
||||
time.sleep(0.1)
|
||||
if self.Interrupt:
|
||||
break
|
||||
if no_check:
|
||||
return
|
||||
image, _ = self.Screenshot()
|
||||
while image.size[0] == 0:
|
||||
image, windowPos = self.Screenshot()
|
||||
states = self.GetCardsState(image)
|
||||
clickMap = []
|
||||
for i in range(0, len(cardSelectMap)):
|
||||
|
@ -352,4 +361,4 @@ class GameHelper:
|
|||
clickMap.append(0)
|
||||
else:
|
||||
clickMap.append(1)
|
||||
QtWidgets.QApplication.processEvents(QEventLoop.AllEvents, 10)
|
||||
QtWidgets.QApplication.processEvents(QEventLoop.AllEvents, 10)
|
|
@ -7,6 +7,7 @@ import numpy as np
|
|||
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class LandlordLstmModel(nn.Module):
|
||||
def __init__(self):
|
||||
|
@ -78,22 +79,334 @@ class FarmerLstmModel(nn.Module):
|
|||
action = torch.argmax(x,dim=0)[0]
|
||||
return dict(action=action)
|
||||
|
||||
class LandlordLstmNewModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.lstm = nn.LSTM(162, 128, batch_first=True)
|
||||
self.dense1 = nn.Linear(373 + 128, 512)
|
||||
self.dense2 = nn.Linear(512, 512)
|
||||
self.dense3 = nn.Linear(512, 512)
|
||||
self.dense4 = nn.Linear(512, 512)
|
||||
self.dense5 = nn.Linear(512, 512)
|
||||
self.dense6 = nn.Linear(512, 1)
|
||||
|
||||
def forward(self, z, x, return_value=False, flags=None):
|
||||
lstm_out, (h_n, _) = self.lstm(z)
|
||||
lstm_out = lstm_out[:,-1,:]
|
||||
x = torch.cat([lstm_out,x], dim=-1)
|
||||
x = self.dense1(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense2(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense3(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense4(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense5(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense6(x)
|
||||
if return_value:
|
||||
return dict(values=x)
|
||||
else:
|
||||
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
|
||||
action = torch.randint(x.shape[0], (1,))[0]
|
||||
else:
|
||||
action = torch.argmax(x,dim=0)[0]
|
||||
return dict(action=action)
|
||||
|
||||
class FarmerLstmNewModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.lstm = nn.LSTM(162, 128, batch_first=True)
|
||||
self.dense1 = nn.Linear(484 + 128, 512)
|
||||
self.dense2 = nn.Linear(512, 512)
|
||||
self.dense3 = nn.Linear(512, 512)
|
||||
self.dense4 = nn.Linear(512, 512)
|
||||
self.dense5 = nn.Linear(512, 512)
|
||||
self.dense6 = nn.Linear(512, 1)
|
||||
|
||||
def forward(self, z, x, return_value=False, flags=None):
|
||||
lstm_out, (h_n, _) = self.lstm(z)
|
||||
lstm_out = lstm_out[:,-1,:]
|
||||
x = torch.cat([lstm_out,x], dim=-1)
|
||||
x = self.dense1(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense2(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense3(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense4(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense5(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense6(x)
|
||||
if return_value:
|
||||
return dict(values=x)
|
||||
else:
|
||||
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
|
||||
action = torch.randint(x.shape[0], (1,))[0]
|
||||
else:
|
||||
action = torch.argmax(x,dim=0)[0]
|
||||
return dict(action=action)
|
||||
|
||||
class GeneralModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# input: B * 32 * 57
|
||||
# self.lstm = nn.LSTM(162, 512, batch_first=True)
|
||||
self.conv_z_1 = torch.nn.Sequential(
|
||||
nn.Conv2d(1, 64, kernel_size=(1,57)), # B * 1 * 64 * 32
|
||||
nn.ReLU(inplace=True),
|
||||
nn.BatchNorm2d(64),
|
||||
)
|
||||
# Squeeze(-1) B * 64 * 16
|
||||
self.conv_z_2 = torch.nn.Sequential(
|
||||
nn.Conv1d(64, 128, kernel_size=(5,), padding=2), # 128 * 16
|
||||
nn.ReLU(inplace=True),
|
||||
nn.BatchNorm1d(128),
|
||||
)
|
||||
self.conv_z_3 = torch.nn.Sequential(
|
||||
nn.Conv1d(128, 256, kernel_size=(3,), padding=1), # 256 * 8
|
||||
nn.ReLU(inplace=True),
|
||||
nn.BatchNorm1d(256),
|
||||
|
||||
)
|
||||
self.conv_z_4 = torch.nn.Sequential(
|
||||
nn.Conv1d(256, 512, kernel_size=(3,), padding=1), # 512 * 4
|
||||
nn.ReLU(inplace=True),
|
||||
nn.BatchNorm1d(512),
|
||||
|
||||
)
|
||||
|
||||
self.dense1 = nn.Linear(519 + 1024, 1024)
|
||||
self.dense2 = nn.Linear(1024, 512)
|
||||
self.dense3 = nn.Linear(512, 512)
|
||||
self.dense4 = nn.Linear(512, 512)
|
||||
self.dense5 = nn.Linear(512, 512)
|
||||
self.dense6 = nn.Linear(512, 1)
|
||||
|
||||
def forward(self, z, x, return_value=False, flags=None, debug=False):
|
||||
z = z.unsqueeze(1)
|
||||
z = self.conv_z_1(z)
|
||||
z = z.squeeze(-1)
|
||||
z = torch.max_pool1d(z, 2)
|
||||
z = self.conv_z_2(z)
|
||||
z = torch.max_pool1d(z, 2)
|
||||
z = self.conv_z_3(z)
|
||||
z = torch.max_pool1d(z, 2)
|
||||
z = self.conv_z_4(z)
|
||||
z = torch.max_pool1d(z, 2)
|
||||
z = z.flatten(1,2)
|
||||
x = torch.cat([z,x], dim=-1)
|
||||
x = self.dense1(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense2(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense3(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense4(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense5(x)
|
||||
x = torch.relu(x)
|
||||
x = self.dense6(x)
|
||||
if return_value:
|
||||
return dict(values=x)
|
||||
else:
|
||||
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
|
||||
action = torch.randint(x.shape[0], (1,))[0]
|
||||
else:
|
||||
action = torch.argmax(x,dim=0)[0]
|
||||
return dict(action=action, max_value=torch.max(x))
|
||||
|
||||
|
||||
# 用于ResNet18和34的残差块,用的是2个3x3的卷积
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, in_planes, planes, stride=1):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=(3,),
|
||||
stride=(stride,), padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm1d(planes)
|
||||
self.conv2 = nn.Conv1d(planes, planes, kernel_size=(3,),
|
||||
stride=(1,), padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm1d(planes)
|
||||
self.shortcut = nn.Sequential()
|
||||
# 经过处理后的x要与x的维度相同(尺寸和深度)
|
||||
# 如果不相同,需要添加卷积+BN来变换为同一维度
|
||||
if stride != 1 or in_planes != self.expansion * planes:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv1d(in_planes, self.expansion * planes,
|
||||
kernel_size=(1,), stride=(stride,), bias=False),
|
||||
nn.BatchNorm1d(self.expansion * planes)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
out = F.relu(self.bn1(self.conv1(x)))
|
||||
out = self.bn2(self.conv2(out))
|
||||
out += self.shortcut(x)
|
||||
out = F.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
class ResnetModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.in_planes = 80
|
||||
#input 1*54*41
|
||||
self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
|
||||
stride=(2,), padding=1, bias=False) #1*27*80
|
||||
|
||||
self.bn1 = nn.BatchNorm1d(80)
|
||||
|
||||
self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
|
||||
self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
|
||||
self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
|
||||
# self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
||||
self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
|
||||
self.linear2 = nn.Linear(1024, 512)
|
||||
self.linear3 = nn.Linear(512, 256)
|
||||
self.linear4 = nn.Linear(256, 1)
|
||||
|
||||
def _make_layer(self, block, planes, num_blocks, stride):
|
||||
strides = [stride] + [1] * (num_blocks - 1)
|
||||
layers = []
|
||||
for stride in strides:
|
||||
layers.append(block(self.in_planes, planes, stride))
|
||||
self.in_planes = planes * block.expansion
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, z, x, return_value=False, flags=None, debug=False):
|
||||
out = F.relu(self.bn1(self.conv1(z)))
|
||||
out = self.layer1(out)
|
||||
out = self.layer2(out)
|
||||
out = self.layer3(out)
|
||||
out = out.flatten(1,2)
|
||||
out = torch.cat([x,x,x,x,out], dim=-1)
|
||||
out = F.leaky_relu_(self.linear1(out))
|
||||
out = F.leaky_relu_(self.linear2(out))
|
||||
out = F.leaky_relu_(self.linear3(out))
|
||||
out = F.leaky_relu_(self.linear4(out))
|
||||
if return_value:
|
||||
return dict(values=out)
|
||||
else:
|
||||
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
|
||||
action = torch.randint(out.shape[0], (1,))[0]
|
||||
else:
|
||||
action = torch.argmax(out,dim=0)[0]
|
||||
return dict(action=action, max_value=torch.max(out))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class BidModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.dense1 = nn.Linear(114, 512)
|
||||
self.dense2 = nn.Linear(512, 512)
|
||||
self.dense3 = nn.Linear(512, 512)
|
||||
self.dense4 = nn.Linear(512, 512)
|
||||
self.dense5 = nn.Linear(512, 512)
|
||||
self.dense6 = nn.Linear(512, 1)
|
||||
|
||||
def forward(self, z, x, return_value=False, flags=None, debug=False):
|
||||
x = self.dense1(x)
|
||||
x = F.leaky_relu(x)
|
||||
# x = F.relu(x)
|
||||
x = self.dense2(x)
|
||||
x = F.leaky_relu(x)
|
||||
# x = F.relu(x)
|
||||
x = self.dense3(x)
|
||||
x = F.leaky_relu(x)
|
||||
# x = F.relu(x)
|
||||
x = self.dense4(x)
|
||||
x = F.leaky_relu(x)
|
||||
# x = F.relu(x)
|
||||
x = self.dense5(x)
|
||||
# x = F.relu(x)
|
||||
x = F.leaky_relu(x)
|
||||
x = self.dense6(x)
|
||||
if return_value:
|
||||
return dict(values=x)
|
||||
else:
|
||||
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
|
||||
action = torch.randint(x.shape[0], (1,))[0]
|
||||
else:
|
||||
action = torch.argmax(x,dim=0)[0]
|
||||
return dict(action=action, max_value=torch.max(x))
|
||||
|
||||
|
||||
# Model dict is only used in evaluation but not training
|
||||
model_dict = {}
|
||||
model_dict['landlord'] = LandlordLstmModel
|
||||
model_dict['landlord_up'] = FarmerLstmModel
|
||||
model_dict['landlord_down'] = FarmerLstmModel
|
||||
model_dict_resnet = {}
|
||||
model_dict_resnet['landlord'] = ResnetModel
|
||||
model_dict_resnet['landlord_up'] = ResnetModel
|
||||
model_dict_resnet['landlord_down'] = ResnetModel
|
||||
model_dict_resnet['bidding'] = BidModel
|
||||
model_dict_general = {}
|
||||
model_dict_general['landlord'] = GeneralModel
|
||||
model_dict_general['landlord_up'] = GeneralModel
|
||||
model_dict_general['landlord_down'] = GeneralModel
|
||||
model_dict_general['bidding'] = BidModel
|
||||
|
||||
class Model:
|
||||
|
||||
class General_Model:
|
||||
"""
|
||||
The wrapper for the three models. We also wrap several
|
||||
interfaces such as share_memory, eval, etc.
|
||||
"""
|
||||
def __init__(self, device=0):
|
||||
self.models = {}
|
||||
self.models['landlord'] = LandlordLstmModel().to(torch.device('cuda:'+str(device)))
|
||||
self.models['landlord_up'] = FarmerLstmModel().to(torch.device('cuda:'+str(device)))
|
||||
self.models['landlord_down'] = FarmerLstmModel().to(torch.device('cuda:'+str(device)))
|
||||
if not device == "cpu":
|
||||
device = 'cuda:' + str(device)
|
||||
# model = GeneralModel().to(torch.device(device))
|
||||
self.models['landlord'] = GeneralModel1().to(torch.device(device))
|
||||
self.models['landlord_up'] = GeneralModel1().to(torch.device(device))
|
||||
self.models['landlord_down'] = GeneralModel1().to(torch.device(device))
|
||||
self.models['bidding'] = BidModel().to(torch.device(device))
|
||||
|
||||
def forward(self, position, z, x, training=False, flags=None, debug=False):
|
||||
model = self.models[position]
|
||||
return model.forward(z, x, training, flags, debug)
|
||||
|
||||
def share_memory(self):
|
||||
self.models['landlord'].share_memory()
|
||||
self.models['landlord_up'].share_memory()
|
||||
self.models['landlord_down'].share_memory()
|
||||
self.models['bidding'].share_memory()
|
||||
|
||||
def eval(self):
|
||||
self.models['landlord'].eval()
|
||||
self.models['landlord_up'].eval()
|
||||
self.models['landlord_down'].eval()
|
||||
self.models['bidding'].eval()
|
||||
|
||||
def parameters(self, position):
|
||||
return self.models[position].parameters()
|
||||
|
||||
def get_model(self, position):
|
||||
return self.models[position]
|
||||
|
||||
def get_models(self):
|
||||
return self.models
|
||||
|
||||
class OldModel:
|
||||
"""
|
||||
The wrapper for the three models. We also wrap several
|
||||
interfaces such as share_memory, eval, etc.
|
||||
"""
|
||||
def __init__(self, device=0):
|
||||
self.models = {}
|
||||
if not device == "cpu":
|
||||
device = 'cuda:' + str(device)
|
||||
self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
|
||||
self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
|
||||
self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
|
||||
|
||||
def forward(self, position, z, x, training=False, flags=None):
|
||||
model = self.models[position]
|
||||
|
@ -117,3 +430,44 @@ class Model:
|
|||
|
||||
def get_models(self):
|
||||
return self.models
|
||||
|
||||
|
||||
class Model:
|
||||
"""
|
||||
The wrapper for the three models. We also wrap several
|
||||
interfaces such as share_memory, eval, etc.
|
||||
"""
|
||||
def __init__(self, device=0):
|
||||
self.models = {}
|
||||
if not device == "cpu":
|
||||
device = 'cuda:' + str(device)
|
||||
# model = GeneralModel().to(torch.device(device))
|
||||
self.models['landlord'] = ResnetModel().to(torch.device(device))
|
||||
self.models['landlord_up'] = ResnetModel().to(torch.device(device))
|
||||
self.models['landlord_down'] = ResnetModel().to(torch.device(device))
|
||||
self.models['bidding'] = BidModel().to(torch.device(device))
|
||||
|
||||
def forward(self, position, z, x, training=False, flags=None, debug=False):
|
||||
model = self.models[position]
|
||||
return model.forward(z, x, training, flags, debug)
|
||||
|
||||
def share_memory(self):
|
||||
self.models['landlord'].share_memory()
|
||||
self.models['landlord_up'].share_memory()
|
||||
self.models['landlord_down'].share_memory()
|
||||
self.models['bidding'].share_memory()
|
||||
|
||||
def eval(self):
|
||||
self.models['landlord'].eval()
|
||||
self.models['landlord_up'].eval()
|
||||
self.models['landlord_down'].eval()
|
||||
self.models['bidding'].eval()
|
||||
|
||||
def parameters(self, position):
|
||||
return self.models[position].parameters()
|
||||
|
||||
def get_model(self, position):
|
||||
return self.models[position]
|
||||
|
||||
def get_models(self):
|
||||
return self.models
|
|
@ -181,38 +181,25 @@ class DummyAgent(object):
|
|||
"""
|
||||
self.action = action
|
||||
|
||||
def get_obs(infoset):
|
||||
"""
|
||||
This function obtains observations with imperfect information
|
||||
from the infoset. It has three branches since we encode
|
||||
different features for different positions.
|
||||
|
||||
This function will return dictionary named `obs`. It contains
|
||||
several fields. These fields will be used to train the model.
|
||||
One can play with those features to improve the performance.
|
||||
|
||||
`position` is a string that can be landlord/landlord_down/landlord_up
|
||||
|
||||
`x_batch` is a batch of features (excluding the hisorical moves).
|
||||
It also encodes the action feature
|
||||
|
||||
`z_batch` is a batch of features with hisorical moves only.
|
||||
|
||||
`legal_actions` is the legal moves
|
||||
|
||||
`x_no_action`: the features (exluding the hitorical moves and
|
||||
the action features). It does not have the batch dim.
|
||||
|
||||
`z`: same as z_batch but not a batch.
|
||||
"""
|
||||
if infoset.player_position == 'landlord':
|
||||
return _get_obs_landlord(infoset)
|
||||
elif infoset.player_position == 'landlord_up':
|
||||
return _get_obs_landlord_up(infoset)
|
||||
elif infoset.player_position == 'landlord_down':
|
||||
return _get_obs_landlord_down(infoset)
|
||||
def get_obs(infoset, model_type="old"):
|
||||
if model_type == "general":
|
||||
if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
|
||||
raise ValueError('')
|
||||
return _get_obs_general(infoset, infoset.player_position)
|
||||
elif model_type == "resnet":
|
||||
if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
|
||||
raise ValueError('')
|
||||
return _get_obs_resnet(infoset, infoset.player_position)
|
||||
else:
|
||||
raise ValueError('')
|
||||
if infoset.player_position == 'landlord':
|
||||
return _get_obs_landlord(infoset)
|
||||
elif infoset.player_position == 'landlord_up':
|
||||
return _get_obs_landlord_up(infoset)
|
||||
elif infoset.player_position == 'landlord_down':
|
||||
return _get_obs_landlord_down(infoset)
|
||||
else:
|
||||
raise ValueError('')
|
||||
|
||||
def _get_one_hot_array(num_left_cards, max_num_cards):
|
||||
"""
|
||||
|
@ -245,29 +232,41 @@ def _cards2array(list_cards):
|
|||
jokers[1] = 1
|
||||
return np.concatenate((matrix.flatten('F'), jokers))
|
||||
|
||||
def _action_seq_list2array(action_seq_list):
|
||||
"""
|
||||
A utility function to encode the historical moves.
|
||||
We encode the historical 15 actions. If there is
|
||||
no 15 actions, we pad the features with 0. Since
|
||||
three moves is a round in DouDizhu, we concatenate
|
||||
the representations for each consecutive three moves.
|
||||
Finally, we obtain a 5x162 matrix, which will be fed
|
||||
into LSTM for encoding.
|
||||
"""
|
||||
action_seq_array = np.zeros((len(action_seq_list), 54))
|
||||
for row, list_cards in enumerate(action_seq_list):
|
||||
action_seq_array[row, :] = _cards2array(list_cards)
|
||||
action_seq_array = action_seq_array.reshape(5, 162)
|
||||
|
||||
def _action_seq_list2array(action_seq_list, model_type="old"):
|
||||
if model_type == "general":
|
||||
position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2}
|
||||
action_seq_array = np.ones((len(action_seq_list), 57)) * -1 # Default Value -1 for not using area
|
||||
for row, list_cards in enumerate(action_seq_list):
|
||||
if list_cards:
|
||||
action_seq_array[row, :54] = _cards2array(list_cards[1])
|
||||
for pos in position_map:
|
||||
if list_cards[0] == pos:
|
||||
action_seq_array[row, 54 + position_map[pos]] = 1
|
||||
else:
|
||||
action_seq_array[row, 54 + position_map[pos]] = 0
|
||||
elif model_type == "resnet":
|
||||
action_seq_array = np.ones((len(action_seq_list), 54)) * -1 # Default Value -1 for not using area
|
||||
for row, list_cards in enumerate(action_seq_list):
|
||||
if list_cards:
|
||||
action_seq_array[row, :] = _cards2array(list_cards[1])
|
||||
else:
|
||||
action_seq_array = np.zeros((len(action_seq_list), 54))
|
||||
for row, list_cards in enumerate(action_seq_list):
|
||||
if list_cards:
|
||||
action_seq_array[row, :] = _cards2array(list_cards[1])
|
||||
action_seq_array = action_seq_array.reshape(5, 162)
|
||||
return action_seq_array
|
||||
|
||||
def _process_action_seq(sequence, length=15):
|
||||
def _process_action_seq(sequence, length=15, new_model=True):
|
||||
"""
|
||||
A utility function encoding historical moves. We
|
||||
encode 15 moves. If there is no 15 moves, we pad
|
||||
with zeros.
|
||||
"""
|
||||
sequence = sequence[-length:].copy()
|
||||
if new_model:
|
||||
sequence = sequence[::-1]
|
||||
if len(sequence) < length:
|
||||
empty_sequence = [[] for _ in range(length - len(sequence))]
|
||||
empty_sequence.extend(sequence)
|
||||
|
@ -353,18 +352,18 @@ def _get_obs_landlord(infoset):
|
|||
landlord_down_num_cards_left,
|
||||
bomb_num))
|
||||
z = _action_seq_list2array(_process_action_seq(
|
||||
infoset.card_play_action_seq))
|
||||
infoset.card_play_action_seq, 15, False), "old")
|
||||
z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
obs = {
|
||||
'position': 'landlord',
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
'position': 'landlord',
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
return obs
|
||||
|
||||
def _get_obs_landlord_up(infoset):
|
||||
|
@ -451,18 +450,18 @@ def _get_obs_landlord_up(infoset):
|
|||
teammate_num_cards_left,
|
||||
bomb_num))
|
||||
z = _action_seq_list2array(_process_action_seq(
|
||||
infoset.card_play_action_seq))
|
||||
infoset.card_play_action_seq, 15, False), "old")
|
||||
z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
obs = {
|
||||
'position': 'landlord_up',
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
'position': 'landlord_up',
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
return obs
|
||||
|
||||
def _get_obs_landlord_down(infoset):
|
||||
|
@ -555,16 +554,530 @@ def _get_obs_landlord_down(infoset):
|
|||
teammate_num_cards_left,
|
||||
bomb_num))
|
||||
z = _action_seq_list2array(_process_action_seq(
|
||||
infoset.card_play_action_seq))
|
||||
infoset.card_play_action_seq, 15, False), "old")
|
||||
z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
obs = {
|
||||
'position': 'landlord_down',
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
'position': 'landlord_down',
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
return obs
|
||||
|
||||
def _get_obs_resnet(infoset, position):
|
||||
num_legal_actions = len(infoset.legal_actions)
|
||||
my_handcards = _cards2array(infoset.player_hand_cards)
|
||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
other_handcards = _cards2array(infoset.other_hand_cards)
|
||||
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
position_map = {
|
||||
"landlord": [1, 0, 0],
|
||||
"landlord_up": [0, 1, 0],
|
||||
"landlord_down": [0, 0, 1]
|
||||
}
|
||||
position_info = np.array(position_map[position])
|
||||
position_info_batch = np.repeat(position_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bid_info = np.array(infoset.bid_info).flatten()
|
||||
bid_info_batch = np.repeat(bid_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
multiply_info = np.array(infoset.multiply_info)
|
||||
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
three_landlord_cards = _cards2array(infoset.three_landlord_cards)
|
||||
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
last_action = _cards2array(infoset.last_move)
|
||||
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
my_action_batch = np.zeros(my_handcards_batch.shape)
|
||||
for j, action in enumerate(infoset.legal_actions):
|
||||
my_action_batch[j, :] = _cards2array(action)
|
||||
|
||||
landlord_num_cards_left = _get_one_hot_array(
|
||||
infoset.num_cards_left_dict['landlord'], 20)
|
||||
landlord_num_cards_left_batch = np.repeat(
|
||||
landlord_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_num_cards_left = _get_one_hot_array(
|
||||
infoset.num_cards_left_dict['landlord_up'], 17)
|
||||
landlord_up_num_cards_left_batch = np.repeat(
|
||||
landlord_up_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_num_cards_left = _get_one_hot_array(
|
||||
infoset.num_cards_left_dict['landlord_down'], 17)
|
||||
landlord_down_num_cards_left_batch = np.repeat(
|
||||
landlord_down_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
other_handcards_left_list = []
|
||||
for pos in ["landlord", "landlord_up", "landlord_up"]:
|
||||
if pos != position:
|
||||
other_handcards_left_list.extend(infoset.all_handcards[pos])
|
||||
|
||||
landlord_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord'])
|
||||
landlord_played_cards_batch = np.repeat(
|
||||
landlord_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord_up'])
|
||||
landlord_up_played_cards_batch = np.repeat(
|
||||
landlord_up_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord_down'])
|
||||
landlord_down_played_cards_batch = np.repeat(
|
||||
landlord_down_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
num_cards_left = np.hstack((
|
||||
landlord_num_cards_left, # 20
|
||||
landlord_up_num_cards_left, # 17
|
||||
landlord_down_num_cards_left))
|
||||
|
||||
x_batch = np.hstack((
|
||||
bid_info_batch, # 12
|
||||
multiply_info_batch)) # 3
|
||||
x_no_action = np.hstack((
|
||||
bid_info,
|
||||
multiply_info))
|
||||
z =np.vstack((
|
||||
num_cards_left,
|
||||
my_handcards, # 54
|
||||
other_handcards, # 54
|
||||
three_landlord_cards, # 54
|
||||
landlord_played_cards, # 54
|
||||
landlord_up_played_cards, # 54
|
||||
landlord_down_played_cards, # 54
|
||||
_action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32), model_type="resnet")
|
||||
))
|
||||
|
||||
_z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
my_action_batch = my_action_batch[:,np.newaxis,:]
|
||||
z_batch = np.zeros([len(_z_batch),40,54],int)
|
||||
for i in range(0,len(_z_batch)):
|
||||
z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
|
||||
obs = {
|
||||
'position': position,
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
return obs
|
||||
|
||||
def _get_obs_general(infoset, position):
|
||||
num_legal_actions = len(infoset.legal_actions)
|
||||
my_handcards = _cards2array(infoset.player_hand_cards)
|
||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
other_handcards = _cards2array(infoset.other_hand_cards)
|
||||
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
position_map = {
|
||||
"landlord": [1, 0, 0],
|
||||
"landlord_up": [0, 1, 0],
|
||||
"landlord_down": [0, 0, 1]
|
||||
}
|
||||
position_info = np.array(position_map[position])
|
||||
position_info_batch = np.repeat(position_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bid_info = np.array(infoset.bid_info).flatten()
|
||||
bid_info_batch = np.repeat(bid_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
multiply_info = np.array(infoset.multiply_info)
|
||||
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
three_landlord_cards = _cards2array(infoset.three_landlord_cards)
|
||||
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
last_action = _cards2array(infoset.last_move)
|
||||
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
my_action_batch = np.zeros(my_handcards_batch.shape)
|
||||
for j, action in enumerate(infoset.legal_actions):
|
||||
my_action_batch[j, :] = _cards2array(action)
|
||||
|
||||
landlord_num_cards_left = _get_one_hot_array(
|
||||
infoset.num_cards_left_dict['landlord'], 20)
|
||||
landlord_num_cards_left_batch = np.repeat(
|
||||
landlord_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_num_cards_left = _get_one_hot_array(
|
||||
infoset.num_cards_left_dict['landlord_up'], 17)
|
||||
landlord_up_num_cards_left_batch = np.repeat(
|
||||
landlord_up_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_num_cards_left = _get_one_hot_array(
|
||||
infoset.num_cards_left_dict['landlord_down'], 17)
|
||||
landlord_down_num_cards_left_batch = np.repeat(
|
||||
landlord_down_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
other_handcards_left_list = []
|
||||
for pos in ["landlord", "landlord_up", "landlord_up"]:
|
||||
if pos != position:
|
||||
other_handcards_left_list.extend(infoset.all_handcards[pos])
|
||||
|
||||
landlord_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord'])
|
||||
landlord_played_cards_batch = np.repeat(
|
||||
landlord_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord_up'])
|
||||
landlord_up_played_cards_batch = np.repeat(
|
||||
landlord_up_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord_down'])
|
||||
landlord_down_played_cards_batch = np.repeat(
|
||||
landlord_down_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
x_batch = np.hstack((position_info_batch, # 3
|
||||
my_handcards_batch, # 54
|
||||
other_handcards_batch, # 54
|
||||
three_landlord_cards_batch, # 54
|
||||
last_action_batch, # 54
|
||||
landlord_played_cards_batch, # 54
|
||||
landlord_up_played_cards_batch, # 54
|
||||
landlord_down_played_cards_batch, # 54
|
||||
landlord_num_cards_left_batch, # 20
|
||||
landlord_up_num_cards_left_batch, # 17
|
||||
landlord_down_num_cards_left_batch, # 17
|
||||
bomb_num_batch, # 15
|
||||
bid_info_batch, # 12
|
||||
multiply_info_batch, # 3
|
||||
my_action_batch)) # 54
|
||||
x_no_action = np.hstack((position_info,
|
||||
my_handcards,
|
||||
other_handcards,
|
||||
three_landlord_cards,
|
||||
last_action,
|
||||
landlord_played_cards,
|
||||
landlord_up_played_cards,
|
||||
landlord_down_played_cards,
|
||||
landlord_num_cards_left,
|
||||
landlord_up_num_cards_left,
|
||||
landlord_down_num_cards_left,
|
||||
bomb_num,
|
||||
bid_info,
|
||||
multiply_info))
|
||||
z = _action_seq_list2array(_process_action_seq(
|
||||
infoset.card_play_action_seq, 32), "general")
|
||||
z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
obs = {
|
||||
'position': position,
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': infoset.legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
}
|
||||
return obs
|
||||
|
||||
|
||||
def gen_bid_legal_actions(player_id, bid_info):
|
||||
self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]]
|
||||
curr_round = -1
|
||||
for r in range(4):
|
||||
if -1 in self_bid_info[r]:
|
||||
curr_round = r
|
||||
break
|
||||
bid_actions = []
|
||||
if curr_round != -1:
|
||||
self_bid_info[curr_round] = [0, 0, 0]
|
||||
bid_actions.append(np.array(self_bid_info).flatten())
|
||||
self_bid_info[curr_round] = [0, 1, 0]
|
||||
bid_actions.append(np.array(self_bid_info).flatten())
|
||||
return np.array(bid_actions)
|
||||
|
||||
|
||||
def _get_obs_for_bid(player_id, bid_info, hand_cards):
|
||||
all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
|
||||
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
|
||||
num_legal_actions = 2
|
||||
my_handcards = _cards2array(hand_cards)
|
||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
other_cards = []
|
||||
other_cards.extend(all_cards)
|
||||
for card in hand_cards:
|
||||
other_cards.remove(card)
|
||||
other_handcards = _cards2array(other_cards)
|
||||
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
position_info = np.array([0, 0, 0])
|
||||
position_info_batch = np.repeat(position_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bid_legal_actions = gen_bid_legal_actions(player_id, bid_info)
|
||||
bid_info = bid_legal_actions[0]
|
||||
bid_info_batch = bid_legal_actions
|
||||
|
||||
multiply_info = np.array([0, 0, 0])
|
||||
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
three_landlord_cards = _cards2array([])
|
||||
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
last_action = _cards2array([])
|
||||
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
my_action_batch = np.zeros(my_handcards_batch.shape)
|
||||
for j in range(2):
|
||||
my_action_batch[j, :] = _cards2array([])
|
||||
|
||||
landlord_num_cards_left = _get_one_hot_array(0, 20)
|
||||
landlord_num_cards_left_batch = np.repeat(
|
||||
landlord_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_num_cards_left = _get_one_hot_array(0, 17)
|
||||
landlord_up_num_cards_left_batch = np.repeat(
|
||||
landlord_up_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_num_cards_left = _get_one_hot_array(0, 17)
|
||||
landlord_down_num_cards_left_batch = np.repeat(
|
||||
landlord_down_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_played_cards = _cards2array([])
|
||||
landlord_played_cards_batch = np.repeat(
|
||||
landlord_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_played_cards = _cards2array([])
|
||||
landlord_up_played_cards_batch = np.repeat(
|
||||
landlord_up_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_played_cards = _cards2array([])
|
||||
landlord_down_played_cards_batch = np.repeat(
|
||||
landlord_down_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(0)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
x_batch = np.hstack((position_info_batch,
|
||||
my_handcards_batch,
|
||||
other_handcards_batch,
|
||||
three_landlord_cards_batch,
|
||||
last_action_batch,
|
||||
landlord_played_cards_batch,
|
||||
landlord_up_played_cards_batch,
|
||||
landlord_down_played_cards_batch,
|
||||
landlord_num_cards_left_batch,
|
||||
landlord_up_num_cards_left_batch,
|
||||
landlord_down_num_cards_left_batch,
|
||||
bomb_num_batch,
|
||||
bid_info_batch,
|
||||
multiply_info_batch,
|
||||
my_action_batch))
|
||||
x_no_action = np.hstack((position_info,
|
||||
my_handcards,
|
||||
other_handcards,
|
||||
three_landlord_cards,
|
||||
last_action,
|
||||
landlord_played_cards,
|
||||
landlord_up_played_cards,
|
||||
landlord_down_played_cards,
|
||||
landlord_num_cards_left,
|
||||
landlord_up_num_cards_left,
|
||||
landlord_down_num_cards_left,
|
||||
bomb_num))
|
||||
z = _action_seq_list2array(_process_action_seq([], 32))
|
||||
z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
obs = {
|
||||
'position': "",
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': bid_legal_actions,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
"bid_info_batch": bid_info_batch.astype(np.int8),
|
||||
"multiply_info": multiply_info.astype(np.int8)
|
||||
}
|
||||
return obs
|
||||
|
||||
|
||||
def _get_obs_for_multiply(position, bid_info, hand_cards, landlord_cards):
|
||||
all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
|
||||
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
|
||||
num_legal_actions = 3
|
||||
my_handcards = _cards2array(hand_cards)
|
||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
other_cards = []
|
||||
other_cards.extend(all_cards)
|
||||
for card in hand_cards:
|
||||
other_cards.remove(card)
|
||||
other_handcards = _cards2array(other_cards)
|
||||
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
position_map = {
|
||||
"landlord": [1, 0, 0],
|
||||
"landlord_up": [0, 1, 0],
|
||||
"landlord_down": [0, 0, 1]
|
||||
}
|
||||
position_info = np.array(position_map[position])
|
||||
position_info_batch = np.repeat(position_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bid_info = np.array(bid_info).flatten()
|
||||
bid_info_batch = np.repeat(bid_info[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
multiply_info = np.array([0, 0, 0])
|
||||
multiply_info_batch = np.array([[1, 0, 0],
|
||||
[0, 1, 0],
|
||||
[0, 0, 1]])
|
||||
|
||||
three_landlord_cards = _cards2array(landlord_cards)
|
||||
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
last_action = _cards2array([])
|
||||
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
my_action_batch = np.zeros(my_handcards_batch.shape)
|
||||
for j in range(num_legal_actions):
|
||||
my_action_batch[j, :] = _cards2array([])
|
||||
|
||||
landlord_num_cards_left = _get_one_hot_array(0, 20)
|
||||
landlord_num_cards_left_batch = np.repeat(
|
||||
landlord_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_num_cards_left = _get_one_hot_array(0, 17)
|
||||
landlord_up_num_cards_left_batch = np.repeat(
|
||||
landlord_up_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_num_cards_left = _get_one_hot_array(0, 17)
|
||||
landlord_down_num_cards_left_batch = np.repeat(
|
||||
landlord_down_num_cards_left[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_played_cards = _cards2array([])
|
||||
landlord_played_cards_batch = np.repeat(
|
||||
landlord_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_up_played_cards = _cards2array([])
|
||||
landlord_up_played_cards_batch = np.repeat(
|
||||
landlord_up_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
landlord_down_played_cards = _cards2array([])
|
||||
landlord_down_played_cards_batch = np.repeat(
|
||||
landlord_down_played_cards[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(0)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
||||
x_batch = np.hstack((position_info_batch,
|
||||
my_handcards_batch,
|
||||
other_handcards_batch,
|
||||
three_landlord_cards_batch,
|
||||
last_action_batch,
|
||||
landlord_played_cards_batch,
|
||||
landlord_up_played_cards_batch,
|
||||
landlord_down_played_cards_batch,
|
||||
landlord_num_cards_left_batch,
|
||||
landlord_up_num_cards_left_batch,
|
||||
landlord_down_num_cards_left_batch,
|
||||
bomb_num_batch,
|
||||
bid_info_batch,
|
||||
multiply_info_batch,
|
||||
my_action_batch))
|
||||
x_no_action = np.hstack((position_info,
|
||||
my_handcards,
|
||||
other_handcards,
|
||||
three_landlord_cards,
|
||||
last_action,
|
||||
landlord_played_cards,
|
||||
landlord_up_played_cards,
|
||||
landlord_down_played_cards,
|
||||
landlord_num_cards_left,
|
||||
landlord_up_num_cards_left,
|
||||
landlord_down_num_cards_left,
|
||||
bomb_num))
|
||||
z = _action_seq_list2array(_process_action_seq([], 32))
|
||||
z_batch = np.repeat(
|
||||
z[np.newaxis, :, :],
|
||||
num_legal_actions, axis=0)
|
||||
obs = {
|
||||
'position': "",
|
||||
'x_batch': x_batch.astype(np.float32),
|
||||
'z_batch': z_batch.astype(np.float32),
|
||||
'legal_actions': multiply_info_batch,
|
||||
'x_no_action': x_no_action.astype(np.int8),
|
||||
'z': z.astype(np.int8),
|
||||
"bid_info": bid_info.astype(np.int8),
|
||||
"multiply_info_batch": multiply_info.astype(np.int8)
|
||||
}
|
||||
return obs
|
||||
|
|
|
@ -401,13 +401,13 @@ class InfoSet(object):
|
|||
self.player_position = player_position
|
||||
# The hand cands of the current player. A list.
|
||||
self.player_hand_cards = None
|
||||
# The number of cards left for each player. It is a dict with str-->int
|
||||
# The number of cards left for each player. It is a dict with str-->int
|
||||
self.num_cards_left_dict = None
|
||||
# The three landload cards. A list.
|
||||
self.three_landlord_cards = None
|
||||
# The historical moves. It is a list of list
|
||||
self.card_play_action_seq = None
|
||||
# The union of the hand cards of the other two players for the current player
|
||||
# The union of the hand cards of the other two players for the current player
|
||||
self.other_hand_cards = None
|
||||
# The legal actions for the current move. It is a list of list
|
||||
self.legal_actions = None
|
||||
|
@ -419,9 +419,18 @@ class InfoSet(object):
|
|||
self.last_move_dict = None
|
||||
# The played cands so far. It is a list.
|
||||
self.played_cards = None
|
||||
# The hand cards of all the players. It is a dict.
|
||||
# The hand cards of all the players. It is a dict.
|
||||
self.all_handcards = None
|
||||
# Last player position that plays a valid move, i.e., not `pass`
|
||||
self.last_pid = None
|
||||
# The number of bombs played so far
|
||||
self.bomb_num = None
|
||||
|
||||
self.bid_info = [[-1, -1, -1],
|
||||
[-1, -1, -1],
|
||||
[-1, -1, -1],
|
||||
[-1, -1, -1]]
|
||||
|
||||
self.multiply_info = [1, 0, 0]
|
||||
|
||||
self.player_id = None
|
||||
|
|
|
@ -3,9 +3,15 @@ import numpy as np
|
|||
|
||||
from douzero.env.env import get_obs
|
||||
|
||||
def _load_model(position, model_path):
|
||||
from douzero.dmc.models import model_dict
|
||||
model = model_dict[position]()
|
||||
def _load_model(position, model_path, model_type):
|
||||
from douzero.dmc.models import model_dict, model_dict_resnet, model_dict_general
|
||||
print(position, "loads", model_type, "model: ", model_path)
|
||||
if model_type == "general":
|
||||
model = model_dict_general[position]()
|
||||
elif model_type == "resnet":
|
||||
model = model_dict_resnet[position]()
|
||||
else:
|
||||
model = model_dict[position]()
|
||||
model_state_dict = model.state_dict()
|
||||
if torch.cuda.is_available():
|
||||
pretrained = torch.load(model_path, map_location='cuda:0')
|
||||
|
@ -22,14 +28,19 @@ def _load_model(position, model_path):
|
|||
class DeepAgent:
|
||||
|
||||
def __init__(self, position, model_path):
|
||||
self.model = _load_model(position, model_path)
|
||||
self.model_type = "old"
|
||||
if "general" in model_path:
|
||||
self.model_type = "general"
|
||||
elif "resnet" in model_path:
|
||||
self.model_type = "resnet"
|
||||
self.model = _load_model(position, model_path, self.model_type)
|
||||
|
||||
def act(self, infoset):
|
||||
# 只有一个合法动作时直接返回,这样会得不到胜率信息
|
||||
# if len(infoset.legal_actions) == 1:
|
||||
# return infoset.legal_actions[0], 0
|
||||
|
||||
obs = get_obs(infoset)
|
||||
obs = get_obs(infoset, model_type=self.model_type)
|
||||
z_batch = torch.from_numpy(obs['z_batch']).float()
|
||||
x_batch = torch.from_numpy(obs['x_batch']).float()
|
||||
if torch.cuda.is_available():
|
||||
|
|
29
main.py
29
main.py
|
@ -21,6 +21,7 @@ from MainWindow import Ui_Form
|
|||
|
||||
from douzero.env.game import GameEnv
|
||||
from douzero.evaluation.deep_agent import DeepAgent
|
||||
import traceback
|
||||
|
||||
import BidModel
|
||||
import LandlordModel
|
||||
|
@ -83,7 +84,7 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
|
|||
self.shouldExit = 0 # 通知上一轮记牌结束
|
||||
self.canRecord = threading.Lock() # 开始记牌
|
||||
self.card_play_model_path_dict = {
|
||||
'landlord': "baselines/douzero_ADP/landlord.ckpt",
|
||||
'landlord': "baselines/resnet/resnet_landlord_1613536300.ckpt",
|
||||
'landlord_up': "baselines/douzero_ADP/landlord_up.ckpt",
|
||||
'landlord_down': "baselines/douzero_ADP/landlord_down.ckpt"
|
||||
}
|
||||
|
@ -199,9 +200,13 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
|
|||
ai_players[1] = DeepAgent(self.user_position, self.card_play_model_path_dict[self.user_position])
|
||||
|
||||
self.env = GameEnv(ai_players)
|
||||
|
||||
try:
|
||||
self.start()
|
||||
except:
|
||||
except Exception as e:
|
||||
exc_type, exc_obj, exc_tb = sys.exc_info()
|
||||
print(e)
|
||||
traceback.print_tb(exc_tb)
|
||||
self.stop()
|
||||
|
||||
def sleep(self, ms):
|
||||
|
@ -225,12 +230,16 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
|
|||
self.WinRate.setText("评分:" + action_message["win_rate"])
|
||||
print("\n手牌:", str(''.join(
|
||||
[EnvCard2RealCard[c] for c in self.env.info_sets[self.user_position].player_hand_cards])))
|
||||
print("出牌:", action_message["action"] if action_message["action"] else "不出", ", 胜率:",
|
||||
print("出牌:", action_message["action"] if action_message["action"] else "不出", ",得分:",
|
||||
action_message["win_rate"])
|
||||
if action_message["action"] == "":
|
||||
helper.ClickOnImage("pass_btn", region=self.PassBtnPos)
|
||||
else:
|
||||
helper.SelectCards(action_message["action"])
|
||||
hand_cards_str = ''.join([EnvCard2RealCard[c] for c in self.env.info_sets[self.user_position].player_hand_cards])
|
||||
if len(hand_cards_str) == 0 and len(action_message["action"]) == 1:
|
||||
helper.SelectCards(action_message["action"], True)
|
||||
else:
|
||||
helper.SelectCards(action_message["action"])
|
||||
tryCount = 20
|
||||
result = helper.LocateOnScreen("play_card", region=self.PassBtnPos, confidence=0.85)
|
||||
while result is None and tryCount > 0:
|
||||
|
@ -248,14 +257,14 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
|
|||
elif self.play_order == 1:
|
||||
self.RPlayedCard.setText("...")
|
||||
pass_flag = helper.LocateOnScreen('pass',
|
||||
region=self.RPlayedCardsPos,
|
||||
confidence=self.PassConfidence)
|
||||
region=self.RPlayedCardsPos,
|
||||
confidence=self.PassConfidence)
|
||||
self.detect_start_btn()
|
||||
while self.RunGame and self.have_white(self.RPlayedCardsPos) == 0 and pass_flag is None:
|
||||
print("等待下家出牌")
|
||||
self.sleep(100)
|
||||
pass_flag = helper.LocateOnScreen('pass', region=self.RPlayedCardsPos,
|
||||
confidence=self.PassConfidence)
|
||||
confidence=self.PassConfidence)
|
||||
self.detect_start_btn()
|
||||
self.sleep(200)
|
||||
# 未找到"不出"
|
||||
|
@ -281,13 +290,13 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
|
|||
self.LPlayedCard.setText("...")
|
||||
self.detect_start_btn()
|
||||
pass_flag = helper.LocateOnScreen('pass', region=self.LPlayedCardsPos,
|
||||
confidence=self.PassConfidence)
|
||||
confidence=self.PassConfidence)
|
||||
while self.RunGame and self.have_white(self.LPlayedCardsPos) == 0 and pass_flag is None:
|
||||
print("等待上家出牌")
|
||||
self.detect_start_btn()
|
||||
self.sleep(100)
|
||||
pass_flag = helper.LocateOnScreen('pass', region=self.LPlayedCardsPos,
|
||||
confidence=self.PassConfidence)
|
||||
confidence=self.PassConfidence)
|
||||
self.sleep(200)
|
||||
# 不出
|
||||
# 未找到"不出"
|
||||
|
@ -319,7 +328,7 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
|
|||
def find_landlord(self, landlord_flag_pos):
|
||||
for pos in landlord_flag_pos:
|
||||
result = helper.LocateOnScreen("landlord_words", region=pos,
|
||||
confidence=self.LandlordFlagConfidence)
|
||||
confidence=self.LandlordFlagConfidence)
|
||||
if result is not None:
|
||||
return landlord_flag_pos.index(pos)
|
||||
return None
|
||||
|
|
Loading…
Reference in New Issue