Add support for new model types

This commit is contained in:
Vincentzyx 2021-09-05 01:48:08 +08:00
parent 34db1f5ec0
commit 8b2b39216a
7 changed files with 999 additions and 94 deletions

View File

@ -165,8 +165,9 @@ class GameHelper:
self.PicsCV.update({info[0]: imgCv})
def Screenshot(self, region=None): # -> (im, (left, top))
self.Handle = win32gui.FindWindow("Hlddz", None)
hwnd = self.Handle
# im = Image.open(r"C:\Users\q9294\Desktop\llc.png")
# im = Image.open(r"C:\Users\q9294\Desktop\Snipaste_2021-09-05_00-52-51.png")
# im = im.resize((1796, 1047))
# return im, (0,0)
left, top, right, bot = win32gui.GetWindowRect(hwnd)
@ -315,11 +316,14 @@ class GameHelper:
win32gui.PostMessage(self.Handle, WM_LBUTTONDOWN, MK_LBUTTON, lParam)
win32gui.PostMessage(self.Handle, WM_LBUTTONUP, MK_LBUTTON, lParam)
def SelectCards(self, cards):
def SelectCards(self, cards, no_check=False):
print("选择牌", cards)
cards = [card for card in cards]
tobeSelected = []
tobeSelected.extend(cards)
image, windowPos = self.Screenshot()
while image.size[0] == 0:
image, windowPos = self.Screenshot()
handCardsInfo, states = self.GetCards(image)
cardSelectMap = []
for card in handCardsInfo:
@ -340,11 +344,16 @@ class GameHelper:
for i in range(0, len(clickMap)):
if clickMap[i] == 1:
self.LeftClick(handCardsInfo[i][1])
print("点击", handCardsInfo[i][1])
break
time.sleep(0.1)
if self.Interrupt:
break
if no_check:
return
image, _ = self.Screenshot()
while image.size[0] == 0:
image, windowPos = self.Screenshot()
states = self.GetCardsState(image)
clickMap = []
for i in range(0, len(cardSelectMap)):
@ -352,4 +361,4 @@ class GameHelper:
clickMap.append(0)
else:
clickMap.append(1)
QtWidgets.QApplication.processEvents(QEventLoop.AllEvents, 10)
QtWidgets.QApplication.processEvents(QEventLoop.AllEvents, 10)

View File

@ -7,6 +7,7 @@ import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
class LandlordLstmModel(nn.Module):
def __init__(self):
@ -78,22 +79,334 @@ class FarmerLstmModel(nn.Module):
action = torch.argmax(x,dim=0)[0]
return dict(action=action)
class LandlordLstmNewModel(nn.Module):
def __init__(self):
super().__init__()
self.lstm = nn.LSTM(162, 128, batch_first=True)
self.dense1 = nn.Linear(373 + 128, 512)
self.dense2 = nn.Linear(512, 512)
self.dense3 = nn.Linear(512, 512)
self.dense4 = nn.Linear(512, 512)
self.dense5 = nn.Linear(512, 512)
self.dense6 = nn.Linear(512, 1)
def forward(self, z, x, return_value=False, flags=None):
lstm_out, (h_n, _) = self.lstm(z)
lstm_out = lstm_out[:,-1,:]
x = torch.cat([lstm_out,x], dim=-1)
x = self.dense1(x)
x = torch.relu(x)
x = self.dense2(x)
x = torch.relu(x)
x = self.dense3(x)
x = torch.relu(x)
x = self.dense4(x)
x = torch.relu(x)
x = self.dense5(x)
x = torch.relu(x)
x = self.dense6(x)
if return_value:
return dict(values=x)
else:
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
action = torch.randint(x.shape[0], (1,))[0]
else:
action = torch.argmax(x,dim=0)[0]
return dict(action=action)
class FarmerLstmNewModel(nn.Module):
def __init__(self):
super().__init__()
self.lstm = nn.LSTM(162, 128, batch_first=True)
self.dense1 = nn.Linear(484 + 128, 512)
self.dense2 = nn.Linear(512, 512)
self.dense3 = nn.Linear(512, 512)
self.dense4 = nn.Linear(512, 512)
self.dense5 = nn.Linear(512, 512)
self.dense6 = nn.Linear(512, 1)
def forward(self, z, x, return_value=False, flags=None):
lstm_out, (h_n, _) = self.lstm(z)
lstm_out = lstm_out[:,-1,:]
x = torch.cat([lstm_out,x], dim=-1)
x = self.dense1(x)
x = torch.relu(x)
x = self.dense2(x)
x = torch.relu(x)
x = self.dense3(x)
x = torch.relu(x)
x = self.dense4(x)
x = torch.relu(x)
x = self.dense5(x)
x = torch.relu(x)
x = self.dense6(x)
if return_value:
return dict(values=x)
else:
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
action = torch.randint(x.shape[0], (1,))[0]
else:
action = torch.argmax(x,dim=0)[0]
return dict(action=action)
class GeneralModel(nn.Module):
def __init__(self):
super().__init__()
# input: B * 32 * 57
# self.lstm = nn.LSTM(162, 512, batch_first=True)
self.conv_z_1 = torch.nn.Sequential(
nn.Conv2d(1, 64, kernel_size=(1,57)), # B * 1 * 64 * 32
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
)
# Squeeze(-1) B * 64 * 16
self.conv_z_2 = torch.nn.Sequential(
nn.Conv1d(64, 128, kernel_size=(5,), padding=2), # 128 * 16
nn.ReLU(inplace=True),
nn.BatchNorm1d(128),
)
self.conv_z_3 = torch.nn.Sequential(
nn.Conv1d(128, 256, kernel_size=(3,), padding=1), # 256 * 8
nn.ReLU(inplace=True),
nn.BatchNorm1d(256),
)
self.conv_z_4 = torch.nn.Sequential(
nn.Conv1d(256, 512, kernel_size=(3,), padding=1), # 512 * 4
nn.ReLU(inplace=True),
nn.BatchNorm1d(512),
)
self.dense1 = nn.Linear(519 + 1024, 1024)
self.dense2 = nn.Linear(1024, 512)
self.dense3 = nn.Linear(512, 512)
self.dense4 = nn.Linear(512, 512)
self.dense5 = nn.Linear(512, 512)
self.dense6 = nn.Linear(512, 1)
def forward(self, z, x, return_value=False, flags=None, debug=False):
z = z.unsqueeze(1)
z = self.conv_z_1(z)
z = z.squeeze(-1)
z = torch.max_pool1d(z, 2)
z = self.conv_z_2(z)
z = torch.max_pool1d(z, 2)
z = self.conv_z_3(z)
z = torch.max_pool1d(z, 2)
z = self.conv_z_4(z)
z = torch.max_pool1d(z, 2)
z = z.flatten(1,2)
x = torch.cat([z,x], dim=-1)
x = self.dense1(x)
x = torch.relu(x)
x = self.dense2(x)
x = torch.relu(x)
x = self.dense3(x)
x = torch.relu(x)
x = self.dense4(x)
x = torch.relu(x)
x = self.dense5(x)
x = torch.relu(x)
x = self.dense6(x)
if return_value:
return dict(values=x)
else:
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
action = torch.randint(x.shape[0], (1,))[0]
else:
action = torch.argmax(x,dim=0)[0]
return dict(action=action, max_value=torch.max(x))
# 用于ResNet18和34的残差块用的是2个3x3的卷积
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=(3,),
stride=(stride,), padding=1, bias=False)
self.bn1 = nn.BatchNorm1d(planes)
self.conv2 = nn.Conv1d(planes, planes, kernel_size=(3,),
stride=(1,), padding=1, bias=False)
self.bn2 = nn.BatchNorm1d(planes)
self.shortcut = nn.Sequential()
# 经过处理后的x要与x的维度相同(尺寸和深度)
# 如果不相同,需要添加卷积+BN来变换为同一维度
if stride != 1 or in_planes != self.expansion * planes:
self.shortcut = nn.Sequential(
nn.Conv1d(in_planes, self.expansion * planes,
kernel_size=(1,), stride=(stride,), bias=False),
nn.BatchNorm1d(self.expansion * planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResnetModel(nn.Module):
def __init__(self):
super().__init__()
self.in_planes = 80
#input 1*54*41
self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
stride=(2,), padding=1, bias=False) #1*27*80
self.bn1 = nn.BatchNorm1d(80)
self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
# self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
self.linear2 = nn.Linear(1024, 512)
self.linear3 = nn.Linear(512, 256)
self.linear4 = nn.Linear(256, 1)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, z, x, return_value=False, flags=None, debug=False):
out = F.relu(self.bn1(self.conv1(z)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = out.flatten(1,2)
out = torch.cat([x,x,x,x,out], dim=-1)
out = F.leaky_relu_(self.linear1(out))
out = F.leaky_relu_(self.linear2(out))
out = F.leaky_relu_(self.linear3(out))
out = F.leaky_relu_(self.linear4(out))
if return_value:
return dict(values=out)
else:
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
action = torch.randint(out.shape[0], (1,))[0]
else:
action = torch.argmax(out,dim=0)[0]
return dict(action=action, max_value=torch.max(out))
class BidModel(nn.Module):
def __init__(self):
super().__init__()
self.dense1 = nn.Linear(114, 512)
self.dense2 = nn.Linear(512, 512)
self.dense3 = nn.Linear(512, 512)
self.dense4 = nn.Linear(512, 512)
self.dense5 = nn.Linear(512, 512)
self.dense6 = nn.Linear(512, 1)
def forward(self, z, x, return_value=False, flags=None, debug=False):
x = self.dense1(x)
x = F.leaky_relu(x)
# x = F.relu(x)
x = self.dense2(x)
x = F.leaky_relu(x)
# x = F.relu(x)
x = self.dense3(x)
x = F.leaky_relu(x)
# x = F.relu(x)
x = self.dense4(x)
x = F.leaky_relu(x)
# x = F.relu(x)
x = self.dense5(x)
# x = F.relu(x)
x = F.leaky_relu(x)
x = self.dense6(x)
if return_value:
return dict(values=x)
else:
if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
action = torch.randint(x.shape[0], (1,))[0]
else:
action = torch.argmax(x,dim=0)[0]
return dict(action=action, max_value=torch.max(x))
# Model dict is only used in evaluation but not training
model_dict = {}
model_dict['landlord'] = LandlordLstmModel
model_dict['landlord_up'] = FarmerLstmModel
model_dict['landlord_down'] = FarmerLstmModel
model_dict_resnet = {}
model_dict_resnet['landlord'] = ResnetModel
model_dict_resnet['landlord_up'] = ResnetModel
model_dict_resnet['landlord_down'] = ResnetModel
model_dict_resnet['bidding'] = BidModel
model_dict_general = {}
model_dict_general['landlord'] = GeneralModel
model_dict_general['landlord_up'] = GeneralModel
model_dict_general['landlord_down'] = GeneralModel
model_dict_general['bidding'] = BidModel
class Model:
class General_Model:
"""
The wrapper for the three models. We also wrap several
interfaces such as share_memory, eval, etc.
"""
def __init__(self, device=0):
self.models = {}
self.models['landlord'] = LandlordLstmModel().to(torch.device('cuda:'+str(device)))
self.models['landlord_up'] = FarmerLstmModel().to(torch.device('cuda:'+str(device)))
self.models['landlord_down'] = FarmerLstmModel().to(torch.device('cuda:'+str(device)))
if not device == "cpu":
device = 'cuda:' + str(device)
# model = GeneralModel().to(torch.device(device))
self.models['landlord'] = GeneralModel1().to(torch.device(device))
self.models['landlord_up'] = GeneralModel1().to(torch.device(device))
self.models['landlord_down'] = GeneralModel1().to(torch.device(device))
self.models['bidding'] = BidModel().to(torch.device(device))
def forward(self, position, z, x, training=False, flags=None, debug=False):
model = self.models[position]
return model.forward(z, x, training, flags, debug)
def share_memory(self):
self.models['landlord'].share_memory()
self.models['landlord_up'].share_memory()
self.models['landlord_down'].share_memory()
self.models['bidding'].share_memory()
def eval(self):
self.models['landlord'].eval()
self.models['landlord_up'].eval()
self.models['landlord_down'].eval()
self.models['bidding'].eval()
def parameters(self, position):
return self.models[position].parameters()
def get_model(self, position):
return self.models[position]
def get_models(self):
return self.models
class OldModel:
"""
The wrapper for the three models. We also wrap several
interfaces such as share_memory, eval, etc.
"""
def __init__(self, device=0):
self.models = {}
if not device == "cpu":
device = 'cuda:' + str(device)
self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
def forward(self, position, z, x, training=False, flags=None):
model = self.models[position]
@ -117,3 +430,44 @@ class Model:
def get_models(self):
return self.models
class Model:
"""
The wrapper for the three models. We also wrap several
interfaces such as share_memory, eval, etc.
"""
def __init__(self, device=0):
self.models = {}
if not device == "cpu":
device = 'cuda:' + str(device)
# model = GeneralModel().to(torch.device(device))
self.models['landlord'] = ResnetModel().to(torch.device(device))
self.models['landlord_up'] = ResnetModel().to(torch.device(device))
self.models['landlord_down'] = ResnetModel().to(torch.device(device))
self.models['bidding'] = BidModel().to(torch.device(device))
def forward(self, position, z, x, training=False, flags=None, debug=False):
model = self.models[position]
return model.forward(z, x, training, flags, debug)
def share_memory(self):
self.models['landlord'].share_memory()
self.models['landlord_up'].share_memory()
self.models['landlord_down'].share_memory()
self.models['bidding'].share_memory()
def eval(self):
self.models['landlord'].eval()
self.models['landlord_up'].eval()
self.models['landlord_down'].eval()
self.models['bidding'].eval()
def parameters(self, position):
return self.models[position].parameters()
def get_model(self, position):
return self.models[position]
def get_models(self):
return self.models

651
douzero/env/env.py vendored
View File

@ -181,38 +181,25 @@ class DummyAgent(object):
"""
self.action = action
def get_obs(infoset):
"""
This function obtains observations with imperfect information
from the infoset. It has three branches since we encode
different features for different positions.
This function will return dictionary named `obs`. It contains
several fields. These fields will be used to train the model.
One can play with those features to improve the performance.
`position` is a string that can be landlord/landlord_down/landlord_up
`x_batch` is a batch of features (excluding the hisorical moves).
It also encodes the action feature
`z_batch` is a batch of features with hisorical moves only.
`legal_actions` is the legal moves
`x_no_action`: the features (exluding the hitorical moves and
the action features). It does not have the batch dim.
`z`: same as z_batch but not a batch.
"""
if infoset.player_position == 'landlord':
return _get_obs_landlord(infoset)
elif infoset.player_position == 'landlord_up':
return _get_obs_landlord_up(infoset)
elif infoset.player_position == 'landlord_down':
return _get_obs_landlord_down(infoset)
def get_obs(infoset, model_type="old"):
if model_type == "general":
if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
raise ValueError('')
return _get_obs_general(infoset, infoset.player_position)
elif model_type == "resnet":
if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
raise ValueError('')
return _get_obs_resnet(infoset, infoset.player_position)
else:
raise ValueError('')
if infoset.player_position == 'landlord':
return _get_obs_landlord(infoset)
elif infoset.player_position == 'landlord_up':
return _get_obs_landlord_up(infoset)
elif infoset.player_position == 'landlord_down':
return _get_obs_landlord_down(infoset)
else:
raise ValueError('')
def _get_one_hot_array(num_left_cards, max_num_cards):
"""
@ -245,29 +232,41 @@ def _cards2array(list_cards):
jokers[1] = 1
return np.concatenate((matrix.flatten('F'), jokers))
def _action_seq_list2array(action_seq_list):
"""
A utility function to encode the historical moves.
We encode the historical 15 actions. If there is
no 15 actions, we pad the features with 0. Since
three moves is a round in DouDizhu, we concatenate
the representations for each consecutive three moves.
Finally, we obtain a 5x162 matrix, which will be fed
into LSTM for encoding.
"""
action_seq_array = np.zeros((len(action_seq_list), 54))
for row, list_cards in enumerate(action_seq_list):
action_seq_array[row, :] = _cards2array(list_cards)
action_seq_array = action_seq_array.reshape(5, 162)
def _action_seq_list2array(action_seq_list, model_type="old"):
if model_type == "general":
position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2}
action_seq_array = np.ones((len(action_seq_list), 57)) * -1 # Default Value -1 for not using area
for row, list_cards in enumerate(action_seq_list):
if list_cards:
action_seq_array[row, :54] = _cards2array(list_cards[1])
for pos in position_map:
if list_cards[0] == pos:
action_seq_array[row, 54 + position_map[pos]] = 1
else:
action_seq_array[row, 54 + position_map[pos]] = 0
elif model_type == "resnet":
action_seq_array = np.ones((len(action_seq_list), 54)) * -1 # Default Value -1 for not using area
for row, list_cards in enumerate(action_seq_list):
if list_cards:
action_seq_array[row, :] = _cards2array(list_cards[1])
else:
action_seq_array = np.zeros((len(action_seq_list), 54))
for row, list_cards in enumerate(action_seq_list):
if list_cards:
action_seq_array[row, :] = _cards2array(list_cards[1])
action_seq_array = action_seq_array.reshape(5, 162)
return action_seq_array
def _process_action_seq(sequence, length=15):
def _process_action_seq(sequence, length=15, new_model=True):
"""
A utility function encoding historical moves. We
encode 15 moves. If there is no 15 moves, we pad
with zeros.
"""
sequence = sequence[-length:].copy()
if new_model:
sequence = sequence[::-1]
if len(sequence) < length:
empty_sequence = [[] for _ in range(length - len(sequence))]
empty_sequence.extend(sequence)
@ -353,18 +352,18 @@ def _get_obs_landlord(infoset):
landlord_down_num_cards_left,
bomb_num))
z = _action_seq_list2array(_process_action_seq(
infoset.card_play_action_seq))
infoset.card_play_action_seq, 15, False), "old")
z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
obs = {
'position': 'landlord',
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
'position': 'landlord',
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
return obs
def _get_obs_landlord_up(infoset):
@ -451,18 +450,18 @@ def _get_obs_landlord_up(infoset):
teammate_num_cards_left,
bomb_num))
z = _action_seq_list2array(_process_action_seq(
infoset.card_play_action_seq))
infoset.card_play_action_seq, 15, False), "old")
z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
obs = {
'position': 'landlord_up',
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
'position': 'landlord_up',
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
return obs
def _get_obs_landlord_down(infoset):
@ -555,16 +554,530 @@ def _get_obs_landlord_down(infoset):
teammate_num_cards_left,
bomb_num))
z = _action_seq_list2array(_process_action_seq(
infoset.card_play_action_seq))
infoset.card_play_action_seq, 15, False), "old")
z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
obs = {
'position': 'landlord_down',
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
'position': 'landlord_down',
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
return obs
def _get_obs_resnet(infoset, position):
num_legal_actions = len(infoset.legal_actions)
my_handcards = _cards2array(infoset.player_hand_cards)
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
num_legal_actions, axis=0)
other_handcards = _cards2array(infoset.other_hand_cards)
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
num_legal_actions, axis=0)
position_map = {
"landlord": [1, 0, 0],
"landlord_up": [0, 1, 0],
"landlord_down": [0, 0, 1]
}
position_info = np.array(position_map[position])
position_info_batch = np.repeat(position_info[np.newaxis, :],
num_legal_actions, axis=0)
bid_info = np.array(infoset.bid_info).flatten()
bid_info_batch = np.repeat(bid_info[np.newaxis, :],
num_legal_actions, axis=0)
multiply_info = np.array(infoset.multiply_info)
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
num_legal_actions, axis=0)
three_landlord_cards = _cards2array(infoset.three_landlord_cards)
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
num_legal_actions, axis=0)
last_action = _cards2array(infoset.last_move)
last_action_batch = np.repeat(last_action[np.newaxis, :],
num_legal_actions, axis=0)
my_action_batch = np.zeros(my_handcards_batch.shape)
for j, action in enumerate(infoset.legal_actions):
my_action_batch[j, :] = _cards2array(action)
landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 20)
landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_up'], 17)
landlord_up_num_cards_left_batch = np.repeat(
landlord_up_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 17)
landlord_down_num_cards_left_batch = np.repeat(
landlord_down_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
other_handcards_left_list = []
for pos in ["landlord", "landlord_up", "landlord_up"]:
if pos != position:
other_handcards_left_list.extend(infoset.all_handcards[pos])
landlord_played_cards = _cards2array(
infoset.played_cards['landlord'])
landlord_played_cards_batch = np.repeat(
landlord_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_played_cards = _cards2array(
infoset.played_cards['landlord_up'])
landlord_up_played_cards_batch = np.repeat(
landlord_up_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_played_cards = _cards2array(
infoset.played_cards['landlord_down'])
landlord_down_played_cards_batch = np.repeat(
landlord_down_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
bomb_num = _get_one_hot_bomb(
infoset.bomb_num)
bomb_num_batch = np.repeat(
bomb_num[np.newaxis, :],
num_legal_actions, axis=0)
num_cards_left = np.hstack((
landlord_num_cards_left, # 20
landlord_up_num_cards_left, # 17
landlord_down_num_cards_left))
x_batch = np.hstack((
bid_info_batch, # 12
multiply_info_batch)) # 3
x_no_action = np.hstack((
bid_info,
multiply_info))
z =np.vstack((
num_cards_left,
my_handcards, # 54
other_handcards, # 54
three_landlord_cards, # 54
landlord_played_cards, # 54
landlord_up_played_cards, # 54
landlord_down_played_cards, # 54
_action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32), model_type="resnet")
))
_z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
my_action_batch = my_action_batch[:,np.newaxis,:]
z_batch = np.zeros([len(_z_batch),40,54],int)
for i in range(0,len(_z_batch)):
z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
obs = {
'position': position,
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
return obs
def _get_obs_general(infoset, position):
num_legal_actions = len(infoset.legal_actions)
my_handcards = _cards2array(infoset.player_hand_cards)
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
num_legal_actions, axis=0)
other_handcards = _cards2array(infoset.other_hand_cards)
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
num_legal_actions, axis=0)
position_map = {
"landlord": [1, 0, 0],
"landlord_up": [0, 1, 0],
"landlord_down": [0, 0, 1]
}
position_info = np.array(position_map[position])
position_info_batch = np.repeat(position_info[np.newaxis, :],
num_legal_actions, axis=0)
bid_info = np.array(infoset.bid_info).flatten()
bid_info_batch = np.repeat(bid_info[np.newaxis, :],
num_legal_actions, axis=0)
multiply_info = np.array(infoset.multiply_info)
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
num_legal_actions, axis=0)
three_landlord_cards = _cards2array(infoset.three_landlord_cards)
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
num_legal_actions, axis=0)
last_action = _cards2array(infoset.last_move)
last_action_batch = np.repeat(last_action[np.newaxis, :],
num_legal_actions, axis=0)
my_action_batch = np.zeros(my_handcards_batch.shape)
for j, action in enumerate(infoset.legal_actions):
my_action_batch[j, :] = _cards2array(action)
landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 20)
landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_up'], 17)
landlord_up_num_cards_left_batch = np.repeat(
landlord_up_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 17)
landlord_down_num_cards_left_batch = np.repeat(
landlord_down_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
other_handcards_left_list = []
for pos in ["landlord", "landlord_up", "landlord_up"]:
if pos != position:
other_handcards_left_list.extend(infoset.all_handcards[pos])
landlord_played_cards = _cards2array(
infoset.played_cards['landlord'])
landlord_played_cards_batch = np.repeat(
landlord_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_played_cards = _cards2array(
infoset.played_cards['landlord_up'])
landlord_up_played_cards_batch = np.repeat(
landlord_up_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_played_cards = _cards2array(
infoset.played_cards['landlord_down'])
landlord_down_played_cards_batch = np.repeat(
landlord_down_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
bomb_num = _get_one_hot_bomb(
infoset.bomb_num)
bomb_num_batch = np.repeat(
bomb_num[np.newaxis, :],
num_legal_actions, axis=0)
x_batch = np.hstack((position_info_batch, # 3
my_handcards_batch, # 54
other_handcards_batch, # 54
three_landlord_cards_batch, # 54
last_action_batch, # 54
landlord_played_cards_batch, # 54
landlord_up_played_cards_batch, # 54
landlord_down_played_cards_batch, # 54
landlord_num_cards_left_batch, # 20
landlord_up_num_cards_left_batch, # 17
landlord_down_num_cards_left_batch, # 17
bomb_num_batch, # 15
bid_info_batch, # 12
multiply_info_batch, # 3
my_action_batch)) # 54
x_no_action = np.hstack((position_info,
my_handcards,
other_handcards,
three_landlord_cards,
last_action,
landlord_played_cards,
landlord_up_played_cards,
landlord_down_played_cards,
landlord_num_cards_left,
landlord_up_num_cards_left,
landlord_down_num_cards_left,
bomb_num,
bid_info,
multiply_info))
z = _action_seq_list2array(_process_action_seq(
infoset.card_play_action_seq, 32), "general")
z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
obs = {
'position': position,
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
return obs
def gen_bid_legal_actions(player_id, bid_info):
self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]]
curr_round = -1
for r in range(4):
if -1 in self_bid_info[r]:
curr_round = r
break
bid_actions = []
if curr_round != -1:
self_bid_info[curr_round] = [0, 0, 0]
bid_actions.append(np.array(self_bid_info).flatten())
self_bid_info[curr_round] = [0, 1, 0]
bid_actions.append(np.array(self_bid_info).flatten())
return np.array(bid_actions)
def _get_obs_for_bid(player_id, bid_info, hand_cards):
all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
num_legal_actions = 2
my_handcards = _cards2array(hand_cards)
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
num_legal_actions, axis=0)
other_cards = []
other_cards.extend(all_cards)
for card in hand_cards:
other_cards.remove(card)
other_handcards = _cards2array(other_cards)
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
num_legal_actions, axis=0)
position_info = np.array([0, 0, 0])
position_info_batch = np.repeat(position_info[np.newaxis, :],
num_legal_actions, axis=0)
bid_legal_actions = gen_bid_legal_actions(player_id, bid_info)
bid_info = bid_legal_actions[0]
bid_info_batch = bid_legal_actions
multiply_info = np.array([0, 0, 0])
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
num_legal_actions, axis=0)
three_landlord_cards = _cards2array([])
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
num_legal_actions, axis=0)
last_action = _cards2array([])
last_action_batch = np.repeat(last_action[np.newaxis, :],
num_legal_actions, axis=0)
my_action_batch = np.zeros(my_handcards_batch.shape)
for j in range(2):
my_action_batch[j, :] = _cards2array([])
landlord_num_cards_left = _get_one_hot_array(0, 20)
landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_num_cards_left = _get_one_hot_array(0, 17)
landlord_up_num_cards_left_batch = np.repeat(
landlord_up_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_num_cards_left = _get_one_hot_array(0, 17)
landlord_down_num_cards_left_batch = np.repeat(
landlord_down_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_played_cards = _cards2array([])
landlord_played_cards_batch = np.repeat(
landlord_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_played_cards = _cards2array([])
landlord_up_played_cards_batch = np.repeat(
landlord_up_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_played_cards = _cards2array([])
landlord_down_played_cards_batch = np.repeat(
landlord_down_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
bomb_num = _get_one_hot_bomb(0)
bomb_num_batch = np.repeat(
bomb_num[np.newaxis, :],
num_legal_actions, axis=0)
x_batch = np.hstack((position_info_batch,
my_handcards_batch,
other_handcards_batch,
three_landlord_cards_batch,
last_action_batch,
landlord_played_cards_batch,
landlord_up_played_cards_batch,
landlord_down_played_cards_batch,
landlord_num_cards_left_batch,
landlord_up_num_cards_left_batch,
landlord_down_num_cards_left_batch,
bomb_num_batch,
bid_info_batch,
multiply_info_batch,
my_action_batch))
x_no_action = np.hstack((position_info,
my_handcards,
other_handcards,
three_landlord_cards,
last_action,
landlord_played_cards,
landlord_up_played_cards,
landlord_down_played_cards,
landlord_num_cards_left,
landlord_up_num_cards_left,
landlord_down_num_cards_left,
bomb_num))
z = _action_seq_list2array(_process_action_seq([], 32))
z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
obs = {
'position': "",
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': bid_legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
"bid_info_batch": bid_info_batch.astype(np.int8),
"multiply_info": multiply_info.astype(np.int8)
}
return obs
def _get_obs_for_multiply(position, bid_info, hand_cards, landlord_cards):
all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
num_legal_actions = 3
my_handcards = _cards2array(hand_cards)
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
num_legal_actions, axis=0)
other_cards = []
other_cards.extend(all_cards)
for card in hand_cards:
other_cards.remove(card)
other_handcards = _cards2array(other_cards)
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
num_legal_actions, axis=0)
position_map = {
"landlord": [1, 0, 0],
"landlord_up": [0, 1, 0],
"landlord_down": [0, 0, 1]
}
position_info = np.array(position_map[position])
position_info_batch = np.repeat(position_info[np.newaxis, :],
num_legal_actions, axis=0)
bid_info = np.array(bid_info).flatten()
bid_info_batch = np.repeat(bid_info[np.newaxis, :],
num_legal_actions, axis=0)
multiply_info = np.array([0, 0, 0])
multiply_info_batch = np.array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
three_landlord_cards = _cards2array(landlord_cards)
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
num_legal_actions, axis=0)
last_action = _cards2array([])
last_action_batch = np.repeat(last_action[np.newaxis, :],
num_legal_actions, axis=0)
my_action_batch = np.zeros(my_handcards_batch.shape)
for j in range(num_legal_actions):
my_action_batch[j, :] = _cards2array([])
landlord_num_cards_left = _get_one_hot_array(0, 20)
landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_num_cards_left = _get_one_hot_array(0, 17)
landlord_up_num_cards_left_batch = np.repeat(
landlord_up_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_num_cards_left = _get_one_hot_array(0, 17)
landlord_down_num_cards_left_batch = np.repeat(
landlord_down_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0)
landlord_played_cards = _cards2array([])
landlord_played_cards_batch = np.repeat(
landlord_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_up_played_cards = _cards2array([])
landlord_up_played_cards_batch = np.repeat(
landlord_up_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
landlord_down_played_cards = _cards2array([])
landlord_down_played_cards_batch = np.repeat(
landlord_down_played_cards[np.newaxis, :],
num_legal_actions, axis=0)
bomb_num = _get_one_hot_bomb(0)
bomb_num_batch = np.repeat(
bomb_num[np.newaxis, :],
num_legal_actions, axis=0)
x_batch = np.hstack((position_info_batch,
my_handcards_batch,
other_handcards_batch,
three_landlord_cards_batch,
last_action_batch,
landlord_played_cards_batch,
landlord_up_played_cards_batch,
landlord_down_played_cards_batch,
landlord_num_cards_left_batch,
landlord_up_num_cards_left_batch,
landlord_down_num_cards_left_batch,
bomb_num_batch,
bid_info_batch,
multiply_info_batch,
my_action_batch))
x_no_action = np.hstack((position_info,
my_handcards,
other_handcards,
three_landlord_cards,
last_action,
landlord_played_cards,
landlord_up_played_cards,
landlord_down_played_cards,
landlord_num_cards_left,
landlord_up_num_cards_left,
landlord_down_num_cards_left,
bomb_num))
z = _action_seq_list2array(_process_action_seq([], 32))
z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
obs = {
'position': "",
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': multiply_info_batch,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
"bid_info": bid_info.astype(np.int8),
"multiply_info_batch": multiply_info.astype(np.int8)
}
return obs

15
douzero/env/game.py vendored
View File

@ -401,13 +401,13 @@ class InfoSet(object):
self.player_position = player_position
# The hand cands of the current player. A list.
self.player_hand_cards = None
# The number of cards left for each player. It is a dict with str-->int
# The number of cards left for each player. It is a dict with str-->int
self.num_cards_left_dict = None
# The three landload cards. A list.
self.three_landlord_cards = None
# The historical moves. It is a list of list
self.card_play_action_seq = None
# The union of the hand cards of the other two players for the current player
# The union of the hand cards of the other two players for the current player
self.other_hand_cards = None
# The legal actions for the current move. It is a list of list
self.legal_actions = None
@ -419,9 +419,18 @@ class InfoSet(object):
self.last_move_dict = None
# The played cands so far. It is a list.
self.played_cards = None
# The hand cards of all the players. It is a dict.
# The hand cards of all the players. It is a dict.
self.all_handcards = None
# Last player position that plays a valid move, i.e., not `pass`
self.last_pid = None
# The number of bombs played so far
self.bomb_num = None
self.bid_info = [[-1, -1, -1],
[-1, -1, -1],
[-1, -1, -1],
[-1, -1, -1]]
self.multiply_info = [1, 0, 0]
self.player_id = None

View File

@ -3,9 +3,15 @@ import numpy as np
from douzero.env.env import get_obs
def _load_model(position, model_path):
from douzero.dmc.models import model_dict
model = model_dict[position]()
def _load_model(position, model_path, model_type):
from douzero.dmc.models import model_dict, model_dict_resnet, model_dict_general
print(position, "loads", model_type, "model: ", model_path)
if model_type == "general":
model = model_dict_general[position]()
elif model_type == "resnet":
model = model_dict_resnet[position]()
else:
model = model_dict[position]()
model_state_dict = model.state_dict()
if torch.cuda.is_available():
pretrained = torch.load(model_path, map_location='cuda:0')
@ -22,14 +28,19 @@ def _load_model(position, model_path):
class DeepAgent:
def __init__(self, position, model_path):
self.model = _load_model(position, model_path)
self.model_type = "old"
if "general" in model_path:
self.model_type = "general"
elif "resnet" in model_path:
self.model_type = "resnet"
self.model = _load_model(position, model_path, self.model_type)
def act(self, infoset):
# 只有一个合法动作时直接返回,这样会得不到胜率信息
# if len(infoset.legal_actions) == 1:
# return infoset.legal_actions[0], 0
obs = get_obs(infoset)
obs = get_obs(infoset, model_type=self.model_type)
z_batch = torch.from_numpy(obs['z_batch']).float()
x_batch = torch.from_numpy(obs['x_batch']).float()
if torch.cuda.is_available():

29
main.py
View File

@ -21,6 +21,7 @@ from MainWindow import Ui_Form
from douzero.env.game import GameEnv
from douzero.evaluation.deep_agent import DeepAgent
import traceback
import BidModel
import LandlordModel
@ -83,7 +84,7 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
self.shouldExit = 0 # 通知上一轮记牌结束
self.canRecord = threading.Lock() # 开始记牌
self.card_play_model_path_dict = {
'landlord': "baselines/douzero_ADP/landlord.ckpt",
'landlord': "baselines/resnet/resnet_landlord_1613536300.ckpt",
'landlord_up': "baselines/douzero_ADP/landlord_up.ckpt",
'landlord_down': "baselines/douzero_ADP/landlord_down.ckpt"
}
@ -199,9 +200,13 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
ai_players[1] = DeepAgent(self.user_position, self.card_play_model_path_dict[self.user_position])
self.env = GameEnv(ai_players)
try:
self.start()
except:
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
print(e)
traceback.print_tb(exc_tb)
self.stop()
def sleep(self, ms):
@ -225,12 +230,16 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
self.WinRate.setText("评分:" + action_message["win_rate"])
print("\n手牌:", str(''.join(
[EnvCard2RealCard[c] for c in self.env.info_sets[self.user_position].player_hand_cards])))
print("出牌:", action_message["action"] if action_message["action"] else "不出", " 胜率",
print("出牌:", action_message["action"] if action_message["action"] else "不出", "得分",
action_message["win_rate"])
if action_message["action"] == "":
helper.ClickOnImage("pass_btn", region=self.PassBtnPos)
else:
helper.SelectCards(action_message["action"])
hand_cards_str = ''.join([EnvCard2RealCard[c] for c in self.env.info_sets[self.user_position].player_hand_cards])
if len(hand_cards_str) == 0 and len(action_message["action"]) == 1:
helper.SelectCards(action_message["action"], True)
else:
helper.SelectCards(action_message["action"])
tryCount = 20
result = helper.LocateOnScreen("play_card", region=self.PassBtnPos, confidence=0.85)
while result is None and tryCount > 0:
@ -248,14 +257,14 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
elif self.play_order == 1:
self.RPlayedCard.setText("...")
pass_flag = helper.LocateOnScreen('pass',
region=self.RPlayedCardsPos,
confidence=self.PassConfidence)
region=self.RPlayedCardsPos,
confidence=self.PassConfidence)
self.detect_start_btn()
while self.RunGame and self.have_white(self.RPlayedCardsPos) == 0 and pass_flag is None:
print("等待下家出牌")
self.sleep(100)
pass_flag = helper.LocateOnScreen('pass', region=self.RPlayedCardsPos,
confidence=self.PassConfidence)
confidence=self.PassConfidence)
self.detect_start_btn()
self.sleep(200)
# 未找到"不出"
@ -281,13 +290,13 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
self.LPlayedCard.setText("...")
self.detect_start_btn()
pass_flag = helper.LocateOnScreen('pass', region=self.LPlayedCardsPos,
confidence=self.PassConfidence)
confidence=self.PassConfidence)
while self.RunGame and self.have_white(self.LPlayedCardsPos) == 0 and pass_flag is None:
print("等待上家出牌")
self.detect_start_btn()
self.sleep(100)
pass_flag = helper.LocateOnScreen('pass', region=self.LPlayedCardsPos,
confidence=self.PassConfidence)
confidence=self.PassConfidence)
self.sleep(200)
# 不出
# 未找到"不出"
@ -319,7 +328,7 @@ class MyPyQT_Form(QtWidgets.QWidget, Ui_Form):
def find_landlord(self, landlord_flag_pos):
for pos in landlord_flag_pos:
result = helper.LocateOnScreen("landlord_words", region=pos,
confidence=self.LandlordFlagConfidence)
confidence=self.LandlordFlagConfidence)
if result is not None:
return landlord_flag_pos.index(pos)
return None