Merge branch 'unified' of ssh://git@git.zaneyork.cn:2222/douzero/Douzero_Resnet.git into main

This commit is contained in:
ZaneYork 2022-01-05 11:04:48 +08:00
commit f0883bbf31
10 changed files with 443 additions and 53 deletions

View File

@ -31,6 +31,8 @@ parser.add_argument('--load_model', action='store_true',
help='Load an existing model') help='Load an existing model')
parser.add_argument('--old_model', action='store_true', parser.add_argument('--old_model', action='store_true',
help='Use vanilla model') help='Use vanilla model')
parser.add_argument('--unified_model', action='store_true',
help='Use unified model')
parser.add_argument('--lite_model', action='store_true', parser.add_argument('--lite_model', action='store_true',
help='Use lite card model') help='Use lite card model')
parser.add_argument('--lagacy_model', action='store_true', parser.add_argument('--lagacy_model', action='store_true',

View File

@ -13,8 +13,8 @@ from torch import nn
import douzero.dmc.models import douzero.dmc.models
import douzero.env.env import douzero.env.env
from .file_writer import FileWriter from .file_writer import FileWriter
from .models import Model, OldModel from .models import Model, OldModel, UnifiedModel
from .utils import get_batch, log, create_env, create_optimizers, act, infer_logic from .utils import get_batch, log, create_optimizers, act, infer_logic
import psutil import psutil
import shutil import shutil
import requests import requests
@ -87,6 +87,8 @@ def train(flags):
# Initialize actor models # Initialize actor models
if flags.old_model: if flags.old_model:
actor_model = OldModel(device="cpu", flags = flags, lite_model = flags.lite_model) actor_model = OldModel(device="cpu", flags = flags, lite_model = flags.lite_model)
elif flags.unified_model:
actor_model = UnifiedModel(device="cpu", flags = flags, lite_model = flags.lite_model)
else: else:
actor_model = Model(device="cpu", flags = flags, lite_model = flags.lite_model) actor_model = Model(device="cpu", flags = flags, lite_model = flags.lite_model)
actor_model.eval() actor_model.eval()
@ -100,6 +102,8 @@ def train(flags):
# Learner model for training # Learner model for training
if flags.old_model: if flags.old_model:
learner_model = OldModel(device=flags.training_device, lite_model = flags.lite_model) learner_model = OldModel(device=flags.training_device, lite_model = flags.lite_model)
elif flags.unified_model:
learner_model = UnifiedModel(device=flags.training_device, lite_model = flags.lite_model)
else: else:
learner_model = Model(device=flags.training_device, lite_model = flags.lite_model) learner_model = Model(device=flags.training_device, lite_model = flags.lite_model)
@ -116,13 +120,18 @@ def train(flags):
] ]
frames, stats = 0, {k: 0 for k in stat_keys} frames, stats = 0, {k: 0 for k in stat_keys}
position_frames = {'landlord':0, 'landlord_up':0, 'landlord_front':0, 'landlord_down':0} position_frames = {'landlord':0, 'landlord_up':0, 'landlord_front':0, 'landlord_down':0}
if flags.unified_model:
lock = threading.Lock()
position_locks = {'landlord': lock, 'landlord_up': lock, 'landlord_front': lock, 'landlord_down': lock, 'uni': lock}
else:
position_locks = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_front': threading.Lock(), 'landlord_down': threading.Lock()} position_locks = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_front': threading.Lock(), 'landlord_down': threading.Lock()}
def sync_onnx_model(frames): def sync_onnx_model(frames):
p_path = '%s/%s' % (flags.onnx_model_path, flags.xpid) p_path = '%s/%s' % (flags.onnx_model_path, flags.xpid)
if not os.path.exists(p_path): if not os.path.exists(p_path):
os.makedirs(p_path) os.makedirs(p_path)
for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']: positions = ['uni'] if flags.unified_model else ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
for position in positions:
if flags.enable_onnx: if flags.enable_onnx:
model_path = '%s/%s/model_%s.onnx' % (flags.onnx_model_path, flags.xpid, position) model_path = '%s/%s/model_%s.onnx' % (flags.onnx_model_path, flags.xpid, position)
onnx_params = learner_model.get_model(position)\ onnx_params = learner_model.get_model(position)\
@ -149,6 +158,12 @@ def train(flags):
checkpoint_states = torch.load( checkpoint_states = torch.load(
checkpointpath, map_location=("cuda:"+str(flags.training_device) if flags.training_device != "cpu" else "cpu") checkpointpath, map_location=("cuda:"+str(flags.training_device) if flags.training_device != "cpu" else "cpu")
) )
if flags.unified_model:
learner_model.get_model('uni').load_state_dict(checkpoint_states["model_state_dict"]['uni'])
optimizers['uni'].load_state_dict(checkpoint_states["optimizer_state_dict"]['uni'])
if not flags.enable_onnx:
actor_model.get_model('uni').load_state_dict(checkpoint_states["model_state_dict"]['uni'])
else:
for k in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']: # ['landlord', 'landlord_up', 'landlord_down'] for k in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']: # ['landlord', 'landlord_up', 'landlord_down']
learner_model.get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k]) learner_model.get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k])
optimizers[k].load_state_dict(checkpoint_states["optimizer_state_dict"][k]) optimizers[k].load_state_dict(checkpoint_states["optimizer_state_dict"][k])
@ -206,7 +221,7 @@ def train(flags):
while frames < flags.total_frames: while frames < flags.total_frames:
batch = get_batch(batch_queues, position, flags, local_lock) batch = get_batch(batch_queues, position, flags, local_lock)
_stats = learn(position, actor_model, learner_model.get_model(position), batch, _stats = learn(position, actor_model, learner_model.get_model(position), batch,
optimizers[position], flags, position_lock) optimizers['uni'], flags, position_lock)
with lock: with lock:
for k in _stats: for k in _stats:
stats[k] = _stats[k] stats[k] = _stats[k]
@ -244,7 +259,7 @@ def train(flags):
}, checkpointpath + '.new') }, checkpointpath + '.new')
# Save the weights for evaluation purpose # Save the weights for evaluation purpose
for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']: for position in ['uni'] if flags.unified_model else ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
model_weights_dir = os.path.expandvars(os.path.expanduser( model_weights_dir = os.path.expandvars(os.path.expanduser(
'%s/%s/%s' % (flags.savedir, flags.xpid, "general_" + position + '_' + str(frames) + '.ckpt'))) '%s/%s/%s' % (flags.savedir, flags.xpid, "general_" + position + '_' + str(frames) + '.ckpt')))
torch.save(learner_model.get_model(position).state_dict(), model_weights_dir) torch.save(learner_model.get_model(position).state_dict(), model_weights_dir)
@ -255,13 +270,18 @@ def train(flags):
type = '' type = ''
if flags.old_model: if flags.old_model:
type += 'vanilla' type += 'vanilla'
elif flags.unified_model:
type += 'unified'
else: else:
type += 'resnet' type += 'resnet'
try:
requests.post(flags.upload_url, data={ requests.post(flags.upload_url, data={
'type': type, 'type': type,
'position': position, 'position': position,
'frame': frames 'frame': frames
}, files = {'model_file':('model.ckpt', open(model_weights_dir, 'rb'))}) }, files = {'model_file':('model.ckpt', open(model_weights_dir, 'rb'))})
except:
print("模型上传失败")
os.remove(model_weights_dir) os.remove(model_weights_dir)
shutil.move(checkpointpath + '.new', checkpointpath) shutil.move(checkpointpath + '.new', checkpointpath)

View File

@ -400,6 +400,74 @@ class GeneralModelLite(nn.Module):
return dict(values=out) return dict(values=out)
class UnifiedModelLite(nn.Module):
def __init__(self):
super().__init__()
self.in_planes = 30
#input 1*69*15
self.conv1 = nn.Conv1d(15, 30, kernel_size=(3,),
stride=(2,), padding=1, bias=False) #1*35*30
self.bn1 = nn.BatchNorm1d(30)
self.layer1 = self._make_layer(BasicBlock, 30, 2, stride=2)#1*18*30
self.layer2 = self._make_layer(BasicBlock, 60, 2, stride=2)#1*9*60
self.layer3 = self._make_layer(BasicBlock, 120, 2, stride=2)#1*5*120
# self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.lstm = nn.LSTM(276, 128, batch_first=True)
self.linear1 = nn.Linear(120 * BasicBlock.expansion * 5 + 128, 2048)
self.linear2 = nn.Linear(2048, 1024)
self.linear3 = nn.Linear(1024, 512)
self.linear4 = nn.Linear(512, 256)
self.linear5 = nn.Linear(256, 1)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def get_onnx_params(self, device=None):
return {
'args': (
torch.randn(1, 15, 69, requires_grad=True, device=device),
torch.randn(1, 24, 276, requires_grad=True, device=device),
),
'input_names': ['z_batch','x_batch'],
'output_names': ['values'],
'dynamic_axes': {
'z_batch': {
0: "batch_size"
},
'x_batch': {
0: "batch_size"
},
'values': {
0: "batch_size"
}
}
}
def forward(self, z, x):
out = F.relu(self.bn1(self.conv1(z)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = out.flatten(1,2)
lstm_out, (h_n, _) = self.lstm(x)
lstm_out = lstm_out[:,-1,:]
out = torch.cat([lstm_out,out], dim=1)
out = F.leaky_relu_(self.linear1(out))
out = F.leaky_relu_(self.linear2(out))
out = F.leaky_relu_(self.linear3(out))
out = F.leaky_relu_(self.linear4(out))
out = F.leaky_relu_(self.linear5(out))
return dict(values=out)
class GeneralModel(nn.Module): class GeneralModel(nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -492,6 +560,11 @@ model_dict_new_lite['landlord'] = GeneralModelLite
model_dict_new_lite['landlord_up'] = GeneralModelLite model_dict_new_lite['landlord_up'] = GeneralModelLite
model_dict_new_lite['landlord_front'] = GeneralModelLite model_dict_new_lite['landlord_front'] = GeneralModelLite
model_dict_new_lite['landlord_down'] = GeneralModelLite model_dict_new_lite['landlord_down'] = GeneralModelLite
model_dict_uni_lite = {}
model_dict_uni_lite['landlord'] = UnifiedModelLite
model_dict_uni_lite['landlord_up'] = UnifiedModelLite
model_dict_uni_lite['landlord_front'] = UnifiedModelLite
model_dict_uni_lite['landlord_down'] = UnifiedModelLite
def forward_logic(self_model, position, z, x, device='cpu', return_value=False, flags=None): def forward_logic(self_model, position, z, x, device='cpu', return_value=False, flags=None):
legal_count = len(z) legal_count = len(z)
@ -675,3 +748,64 @@ class Model:
def get_models(self): def get_models(self):
return self.models return self.models
class UnifiedModel:
"""
The wrapper for the three models. We also wrap several
interfaces such as share_memory, eval, etc.
"""
def __init__(self, device=0, flags=None, lite_model = False):
self.onnx_models = {}
self.model = None
self.models = {}
self.flags = flags
if not device == "cpu":
device = 'cuda:' + str(device)
self.device = torch.device(device)
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
if flags is not None and flags.enable_onnx:
self.model = None
else:
if lite_model:
self.model = UnifiedModelLite().to(self.device)
for position in positions:
self.models[position] = self.model
else:
self.model = GeneralModel().to(self.device)
self.onnx_model = None
def set_onnx_model(self, device='cpu'):
model_path = os.path.abspath('%s/%s/model_uni.onnx' % (self.flags.onnx_model_path, self.flags.xpid))
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
if device == 'cpu':
self.onnx_model = onnxruntime.InferenceSession(get_example(model_path), providers=['CPUExecutionProvider'])
else:
self.onnx_model = onnxruntime.InferenceSession(get_example(model_path), providers=['CUDAExecutionProvider'])
for position in positions:
self.onnx_models[position] = self.onnx_model
def get_onnx_params(self, position):
self.model.get_onnx_params(self.device)
def forward(self, position, z, x, device='cpu', return_value=False, flags=None):
return forward_logic(self, position, z, x, device, return_value, flags)
def share_memory(self):
if self.model is not None:
self.model.share_memory()
def eval(self):
if self.model is not None:
self.model.eval()
def parameters(self, position):
return self.model.parameters()
def get_model(self, position):
return self.model
def get_models(self):
return {
'uni' : self.model
}

View File

@ -73,7 +73,7 @@ log.setLevel(logging.INFO)
Buffers = typing.Dict[str, typing.List[torch.Tensor]] Buffers = typing.Dict[str, typing.List[torch.Tensor]]
def create_env(flags): def create_env(flags):
return Env(flags.objective, flags.old_model, flags.lagacy_model, flags.lite_model) return Env(flags.objective, flags.old_model, flags.lagacy_model, flags.lite_model, flags.unified_model)
def get_batch(b_queues, position, flags, lock): def get_batch(b_queues, position, flags, lock):
""" """
@ -104,6 +104,14 @@ def create_optimizers(flags, learner_model):
""" """
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down'] positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
optimizers = {} optimizers = {}
if flags.unified_model:
position = 'uni'
optimizer = RAdam(
learner_model.parameters(position),
lr=flags.learning_rate,
eps=flags.epsilon)
optimizers[position] = optimizer
else:
for position in positions: for position in positions:
optimizer = RAdam( optimizer = RAdam(
learner_model.parameters(position), learner_model.parameters(position),
@ -116,6 +124,9 @@ def create_optimizers(flags, learner_model):
def infer_logic(i, device, infer_queues, model, flags, onnx_frame): def infer_logic(i, device, infer_queues, model, flags, onnx_frame):
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down'] positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
if not flags.enable_onnx: if not flags.enable_onnx:
if flags.unified_model:
model.model.to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
else:
for pos in positions: for pos in positions:
model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device)))) model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
last_onnx_frame = -1 last_onnx_frame = -1

211
douzero/env/env.py vendored
View File

@ -49,7 +49,47 @@ NumOnesJoker2ArrayCompressed = {0: np.array([0, 0, 0, 0, 0]),
12: np.array([0, 0, 1, 1, 0]), 12: np.array([0, 0, 1, 1, 0]),
13: np.array([1, 0, 1, 1, 0]), 13: np.array([1, 0, 1, 1, 0]),
15: np.array([1, 1, 1, 1, 0])} 15: np.array([1, 1, 1, 1, 0])}
PositionInfoArray = {
'landlord': np.array([1, 0, 0, 0]),
'landlord_down': np.array([0, 1, 0, 0]),
'landlord_front': np.array([0, 0, 1, 0]),
'landlord_up': np.array([0, 0, 0, 1]),
}
FaceUpLevelArray = {
0x00: np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
0x01: np.array([1, 0, 0, 0, 0, 0, 0, 0, 0]),
0x02: np.array([0, 1, 0, 0, 0, 0, 0, 0, 0]),
0x03: np.array([1, 1, 0, 0, 0, 0, 0, 0, 0]),
0x04: np.array([0, 0, 1, 0, 0, 0, 0, 0, 0]),
0x05: np.array([1, 0, 1, 0, 0, 0, 0, 0, 0]),
0x06: np.array([0, 1, 1, 0, 0, 0, 0, 0, 0]),
0x07: np.array([1, 1, 1, 0, 0, 0, 0, 0, 0]),
0x08: np.array([0, 0, 0, 1, 0, 0, 0, 0, 0]),
0x09: np.array([1, 0, 0, 1, 0, 0, 0, 0, 0]),
0x0A: np.array([0, 1, 0, 1, 0, 0, 0, 0, 0]),
0x0B: np.array([1, 1, 0, 1, 0, 0, 0, 0, 0]),
0x0C: np.array([0, 0, 1, 1, 0, 0, 0, 0, 0]),
0x0D: np.array([1, 0, 1, 1, 0, 0, 0, 0, 0]),
0x0E: np.array([0, 1, 1, 1, 0, 0, 0, 0, 0]),
0x0F: np.array([1, 1, 1, 1, 0, 0, 0, 0, 0]),
0x10: np.array([0, 0, 0, 0, 1, 0, 0, 0, 0]),
0x11: np.array([1, 0, 0, 0, 1, 0, 0, 0, 0]),
0x12: np.array([0, 1, 0, 0, 1, 0, 0, 0, 0]),
0x13: np.array([1, 1, 0, 0, 1, 0, 0, 0, 0]),
0x14: np.array([0, 0, 1, 0, 1, 0, 0, 0, 0]),
0x15: np.array([1, 0, 1, 0, 1, 0, 0, 0, 0]),
0x16: np.array([0, 1, 1, 0, 1, 0, 0, 0, 0]),
0x17: np.array([1, 1, 1, 0, 1, 0, 0, 0, 0]),
0x18: np.array([0, 0, 0, 1, 1, 0, 0, 0, 0]),
0x19: np.array([1, 0, 0, 1, 1, 0, 0, 0, 0]),
0x1A: np.array([0, 1, 0, 1, 1, 0, 0, 0, 0]),
0x1B: np.array([1, 1, 0, 1, 1, 0, 0, 0, 0]),
0x1C: np.array([0, 0, 1, 1, 1, 0, 0, 0, 0]),
0x1D: np.array([1, 0, 1, 1, 1, 0, 0, 0, 0]),
0x1E: np.array([0, 1, 1, 1, 1, 0, 0, 0, 0]),
0x1F: np.array([1, 1, 1, 1, 1, 0, 0, 0, 0]),
}
deck = [] deck = []
for i in range(3, 15): for i in range(3, 15):
@ -63,7 +103,7 @@ class Env:
Doudizhu multi-agent wrapper Doudizhu multi-agent wrapper
""" """
def __init__(self, objective, old_model, legacy_model=False, lite_model = False): def __init__(self, objective, old_model, legacy_model=False, lite_model = False, unified_model = False):
""" """
Objective is wp/adp/logadp. It indicates whether considers Objective is wp/adp/logadp. It indicates whether considers
bomb in reward calculation. Here, we use dummy agents. bomb in reward calculation. Here, we use dummy agents.
@ -77,7 +117,8 @@ class Env:
""" """
self.objective = objective self.objective = objective
self.use_legacy = legacy_model self.use_legacy = legacy_model
self.use_general = not old_model self.use_unified = unified_model
self.use_general = not old_model and not unified_model
self.lite_model = lite_model self.lite_model = lite_model
# Initialize players # Initialize players
@ -107,6 +148,8 @@ class Env:
'landlord_up': _deck[33:58], 'landlord_up': _deck[33:58],
'landlord_front': _deck[58:83], 'landlord_front': _deck[58:83],
'landlord_down': _deck[83:108], 'landlord_down': _deck[83:108],
'three_landlord_cards': _deck[25:33],
'three_landlord_cards_all': _deck[25:33],
} }
for key in card_play_data: for key in card_play_data:
card_play_data[key].sort() card_play_data[key].sort()
@ -124,7 +167,7 @@ class Env:
self._env.info_sets[pos].player_id = pid self._env.info_sets[pos].player_id = pid
self.infoset = self._game_infoset self.infoset = self._game_infoset
return get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model) return get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model, self.use_unified)
def step(self, action): def step(self, action):
""" """
@ -152,7 +195,7 @@ class Env:
} }
obs = None obs = None
else: else:
obs = get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model) obs = get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model, self.use_unified)
return obs, reward, done, {} return obs, reward, done, {}
def _get_reward(self, pos): def _get_reward(self, pos):
@ -250,7 +293,7 @@ class DummyAgent(object):
self.action = action self.action = action
def get_obs(infoset, use_general=True, use_legacy = False, lite_model = False): def get_obs(infoset, use_general=True, use_legacy = False, lite_model = False, use_unified=False):
""" """
This function obtains observations with imperfect information This function obtains observations with imperfect information
from the infoset. It has three branches since we encode from the infoset. It has three branches since we encode
@ -278,6 +321,8 @@ def get_obs(infoset, use_general=True, use_legacy = False, lite_model = False):
if infoset.player_position not in ["landlord", "landlord_up", "landlord_front", "landlord_down"]: if infoset.player_position not in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
raise ValueError('') raise ValueError('')
return _get_obs_general(infoset, infoset.player_position, lite_model) return _get_obs_general(infoset, infoset.player_position, lite_model)
elif use_unified:
return _get_obs_unified(infoset, infoset.player_position, lite_model)
else: else:
if infoset.player_position == 'landlord': if infoset.player_position == 'landlord':
return _get_obs_landlord(infoset, use_legacy, lite_model) return _get_obs_landlord(infoset, use_legacy, lite_model)
@ -312,6 +357,12 @@ def _get_one_hot_array(num_left_cards, max_num_cards, compress_size = 0):
return one_hot return one_hot
def _cards2noise(list_cards, compressed_form = False):
if compressed_form:
return np.random.randint(0, 2, 69, dtype=np.int8)
else:
return np.random.randint(0, 2, 108, dtype=np.int8)
def _cards2array(list_cards, compressed_form = False): def _cards2array(list_cards, compressed_form = False):
""" """
A utility function that transforms the actions, i.e., A utility function that transforms the actions, i.e.,
@ -381,7 +432,7 @@ def _cards2array(list_cards, compressed_form = False):
# # action_seq_array = action_seq_array.reshape(5, 162) # # action_seq_array = action_seq_array.reshape(5, 162)
# return action_seq_array # return action_seq_array
def _action_seq_list2array(action_seq_list, new_model=True, compressed_form = False): def _action_seq_list2array(action_seq_list, new_model=True, compressed_form = False, use_unified = False):
""" """
A utility function to encode the historical moves. A utility function to encode the historical moves.
We encode the historical 20 actions. If there is We encode the historical 20 actions. If there is
@ -404,6 +455,19 @@ def _action_seq_list2array(action_seq_list, new_model=True, compressed_form = Fa
for row, list_cards in enumerate(action_seq_list): for row, list_cards in enumerate(action_seq_list):
if list_cards != []: if list_cards != []:
action_seq_array[row, :108] = _cards2array(list_cards[1], compressed_form) action_seq_array[row, :108] = _cards2array(list_cards[1], compressed_form)
elif use_unified:
if compressed_form:
action_seq_array = np.zeros((len(action_seq_list), 69))
for row, list_cards in enumerate(action_seq_list):
if list_cards != []:
action_seq_array[row, :] = _cards2array(list_cards[1], compressed_form)
action_seq_array = action_seq_array.reshape(24, 276)
else:
action_seq_array = np.zeros((len(action_seq_list), 108))
for row, list_cards in enumerate(action_seq_list):
if list_cards != []:
action_seq_array[row, :] = _cards2array(list_cards[1], compressed_form)
action_seq_array = action_seq_array.reshape(24, 432)
else: else:
if compressed_form: if compressed_form:
action_seq_array = np.zeros((len(action_seq_list), 69)) action_seq_array = np.zeros((len(action_seq_list), 69))
@ -442,7 +506,7 @@ def _process_action_seq(sequence, length=20, new_model=True):
return sequence return sequence
def _get_one_hot_bomb(bomb_num, use_legacy = False): def _get_one_hot_bomb(bomb_num, use_legacy = False, compressed_form = False):
""" """
A utility function to encode the number of bombs A utility function to encode the number of bombs
into one-hot representation. into one-hot representation.
@ -451,7 +515,7 @@ def _get_one_hot_bomb(bomb_num, use_legacy = False):
one_hot = np.zeros(29) one_hot = np.zeros(29)
one_hot[bomb_num[0] + bomb_num[1]] = 1 one_hot[bomb_num[0] + bomb_num[1]] = 1
else: else:
one_hot = np.zeros(56) # 14 + 15 + 27 one_hot = np.zeros(56 if compressed_form else 95) # 14 + 15 + 27
one_hot[bomb_num[0]] = 1 one_hot[bomb_num[0]] = 1
one_hot[14 + bomb_num[1]] = 1 one_hot[14 + bomb_num[1]] = 1
one_hot[29 + bomb_num[2]] = 1 one_hot[29 + bomb_num[2]] = 1
@ -1000,3 +1064,134 @@ def _get_obs_general(infoset, position, compressed_form = False):
'z': z.astype(np.int8), 'z': z.astype(np.int8),
} }
return obs return obs
'''
face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down
'''
def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = 0):
num_legal_actions = len(infoset.legal_actions)
my_handcards = _cards2array(infoset.player_hand_cards, compressed_form)
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
num_legal_actions, axis=0)
other_handcards = _cards2array(infoset.other_hand_cards, compressed_form)
my_action_batch = np.zeros(my_handcards_batch.shape)
for j, action in enumerate(infoset.legal_actions):
my_action_batch[j, :] = _cards2array(action, compressed_form)
landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 33, 15 if compressed_form else 0)
landlord_up_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_up'], 25, 8 if compressed_form else 0)
landlord_front_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_front'], 25, 8 if compressed_form else 0)
landlord_down_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 25, 8 if compressed_form else 0)
landlord_played_cards = _cards2array(
infoset.played_cards['landlord'], compressed_form)
landlord_up_played_cards = _cards2array(
infoset.played_cards['landlord_up'], compressed_form)
landlord_front_played_cards = _cards2array(
infoset.played_cards['landlord_front'], compressed_form)
landlord_down_played_cards = _cards2array(
infoset.played_cards['landlord_down'], compressed_form)
if (face_up_level & 0x01) > 0:
three_landlord_cards = _cards2array(
infoset.three_landlord_cards, compressed_form)
three_landlord_cards_all = _cards2array(
infoset.three_landlord_cards_all, compressed_form)
else:
three_landlord_cards = _cards2noise(
infoset.three_landlord_cards, compressed_form)
three_landlord_cards_all = _cards2noise(
infoset.three_landlord_cards_all, compressed_form)
if (face_up_level & 0x02) > 0:
landlord_cards = _cards2array(
infoset.all_handcards['landlord'], compressed_form)
else:
landlord_cards = _cards2noise(
infoset.all_handcards['landlord'], compressed_form)
if (face_up_level & 0x04) > 0:
landlord_up_cards = _cards2array(
infoset.all_handcards['landlord_up'], compressed_form)
else:
landlord_up_cards = _cards2noise(
infoset.all_handcards['landlord_up'], compressed_form)
if (face_up_level & 0x08) > 0:
landlord_front_cards = _cards2array(
infoset.all_handcards['landlord_front'], compressed_form)
else:
landlord_front_cards = _cards2noise(
infoset.all_handcards['landlord_front'], compressed_form)
if (face_up_level & 0x10) > 0:
landlord_down_cards = _cards2array(
infoset.all_handcards['landlord_down'], compressed_form)
else:
landlord_down_cards = _cards2noise(
infoset.all_handcards['landlord_down'], compressed_form)
bomb_num = _get_one_hot_bomb(
infoset.bomb_num, compressed_form=compressed_form) # 56/95
base_info = np.hstack((
PositionInfoArray[position], # 4
FaceUpLevelArray[face_up_level], # 9
bomb_num, #56
))
num_cards_left = np.hstack((
landlord_num_cards_left, # 33/18
landlord_up_num_cards_left, # 25/17
landlord_front_num_cards_left, # 25/17
landlord_down_num_cards_left)) # 25/17
x_no_action = _action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 96), False, compressed_form, True) # 24*276 / 24*432
x_batch = np.repeat(
x_no_action[np.newaxis, :],
num_legal_actions, axis=0)
z =np.vstack((
base_info, # 69
num_cards_left, # 108 / 18+17*3=69
my_handcards, # 108/69
other_handcards, # 108/69
landlord_played_cards, # 108/69
landlord_up_played_cards, # 108/69
landlord_front_played_cards, # 108/69
landlord_down_played_cards, # 108/69
landlord_cards, # 108/69
landlord_up_cards, # 108/69
landlord_front_cards, # 108/69
landlord_down_cards, # 108/69
three_landlord_cards, # 108/69
three_landlord_cards_all, # 108/69
))
_z_batch = np.repeat(
z[np.newaxis, :, :],
num_legal_actions, axis=0)
my_action_batch = my_action_batch[:,np.newaxis,:]
z_batch = np.concatenate((my_action_batch, _z_batch), axis=1)
obs = {
'position': position,
'x_batch': x_batch.astype(np.float32),
'z_batch': z_batch.astype(np.float32),
'legal_actions': infoset.legal_actions,
'x_no_action': x_no_action.astype(np.int8),
'z': z.astype(np.int8),
}
return obs

40
douzero/env/game.py vendored
View File

@ -127,7 +127,8 @@ class GameEnv(object):
self.card_play_action_seq = [] self.card_play_action_seq = []
# self.three_landlord_cards = None self.three_landlord_cards = None
self.three_landlord_cards_all = None
self.game_over = False self.game_over = False
self.acting_player_position = None self.acting_player_position = None
@ -185,7 +186,12 @@ class GameEnv(object):
card_play_data['landlord_front'] card_play_data['landlord_front']
self.info_sets['landlord_down'].player_hand_cards = \ self.info_sets['landlord_down'].player_hand_cards = \
card_play_data['landlord_down'] card_play_data['landlord_down']
# self.three_landlord_cards = card_play_data['three_landlord_cards'] if 'three_landlord_cards' not in card_play_data.keys():
self.three_landlord_cards = card_play_data['landlord'][25:33]
self.three_landlord_cards_all = card_play_data['landlord'][25:33]
else:
self.three_landlord_cards = card_play_data['three_landlord_cards'][:]
self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:]
self.get_acting_player_position() self.get_acting_player_position()
self.game_infoset = self.get_infoset() self.game_infoset = self.get_infoset()
@ -253,15 +259,15 @@ class GameEnv(object):
self.played_cards[self.acting_player_position] += action self.played_cards[self.acting_player_position] += action
# if self.acting_player_position == 'landlord' and \ if self.acting_player_position == 'landlord' and \
# len(action) > 0 and \ len(action) > 0 and \
# len(self.three_landlord_cards) > 0: len(self.three_landlord_cards) > 0:
# for card in action: for card in action:
# if len(self.three_landlord_cards) > 0: if len(self.three_landlord_cards) > 0:
# if card in self.three_landlord_cards: if card in self.three_landlord_cards:
# self.three_landlord_cards.remove(card) self.three_landlord_cards.remove(card)
# else: else:
# break break
self.game_done() self.game_done()
if not self.game_over: if not self.game_over:
@ -333,7 +339,8 @@ class GameEnv(object):
def reset(self): def reset(self):
self.card_play_action_seq = [] self.card_play_action_seq = []
# self.three_landlord_cards = None self.three_landlord_cards = None
self.three_landlord_cards_all = None
self.game_over = False self.game_over = False
self.acting_player_position = None self.acting_player_position = None
@ -397,8 +404,10 @@ class GameEnv(object):
self.info_sets[self.acting_player_position].played_cards = \ self.info_sets[self.acting_player_position].played_cards = \
self.played_cards self.played_cards
# self.info_sets[self.acting_player_position].three_landlord_cards = \ self.info_sets[self.acting_player_position].three_landlord_cards = \
# self.three_landlord_cards self.three_landlord_cards
self.info_sets[self.acting_player_position].three_landlord_cards_all = \
self.three_landlord_cards_all
self.info_sets[self.acting_player_position].card_play_action_seq = \ self.info_sets[self.acting_player_position].card_play_action_seq = \
self.card_play_action_seq self.card_play_action_seq
@ -424,7 +433,8 @@ class InfoSet(object):
# The number of cards left for each player. It is a dict with str-->int # The number of cards left for each player. It is a dict with str-->int
self.num_cards_left_dict = None self.num_cards_left_dict = None
# The three landload cards. A list. # The three landload cards. A list.
# self.three_landlord_cards = None self.three_landlord_cards = None
self.three_landlord_cards_all = None
# The historical moves. It is a list of list # The historical moves. It is a list of list
self.card_play_action_seq = None self.card_play_action_seq = None
# The union of the hand cards of the other two players for the current player # The union of the hand cards of the other two players for the current player

View File

@ -6,8 +6,8 @@ from onnxruntime.datasets import get_example
from douzero.env.env import get_obs from douzero.env.env import get_obs
def _load_model(position, model_path, model_type, use_legacy, use_lite, use_onnx=False): def _load_model(position, model_path, model_type, use_legacy, use_lite, use_unified, use_onnx=False):
from douzero.dmc.models import model_dict_new, model_dict_new_lite, model_dict, model_dict_legacy, model_dict_lite from douzero.dmc.models import model_dict_new, model_dict_new_lite, model_dict, model_dict_legacy, model_dict_lite, model_dict_uni_lite
model_path_onnx = model_path + '.onnx' model_path_onnx = model_path + '.onnx'
if use_onnx and os.path.exists(model_path_onnx): if use_onnx and os.path.exists(model_path_onnx):
return None return None
@ -20,6 +20,11 @@ def _load_model(position, model_path, model_type, use_legacy, use_lite, use_onnx
else: else:
if use_legacy: if use_legacy:
model = model_dict_legacy[position]() model = model_dict_legacy[position]()
elif use_unified:
if use_lite:
model = model_dict_uni_lite[position]()
else:
model = model_dict[position]()
else: else:
if use_lite: if use_lite:
model = model_dict_lite[position]() model = model_dict_lite[position]()
@ -52,9 +57,10 @@ class DeepAgent:
def __init__(self, position, model_path, use_onnx=False): def __init__(self, position, model_path, use_onnx=False):
self.use_legacy = True if "legacy" in model_path else False self.use_legacy = True if "legacy" in model_path else False
self.use_unified = True if "uni" in model_path else False
self.lite_model = True if "lite" in model_path else False self.lite_model = True if "lite" in model_path else False
self.model_type = "general" if "resnet" in model_path else "old" self.model_type = "general" if "resnet" in model_path else "old"
self.model = _load_model(position, model_path, self.model_type, self.use_legacy, self.lite_model, use_onnx=use_onnx) self.model = _load_model(position, model_path, self.model_type, self.use_legacy, self.lite_model, self.use_unified, use_onnx=use_onnx)
self.onnx_model_path = os.path.abspath(model_path + '.onnx') self.onnx_model_path = os.path.abspath(model_path + '.onnx')
self.use_onnx = use_onnx self.use_onnx = use_onnx
self.onnx_model = None self.onnx_model = None
@ -71,7 +77,7 @@ class DeepAgent:
if not with_confidence and len(infoset.legal_actions) == 1: if not with_confidence and len(infoset.legal_actions) == 1:
return infoset.legal_actions[0] return infoset.legal_actions[0]
obs = get_obs(infoset, self.model_type == "general", self.use_legacy, self.lite_model) obs = get_obs(infoset, self.model_type == "general", self.use_legacy, self.lite_model, self.use_unified)
if self.onnx_model is None: if self.onnx_model is None:
z_batch = torch.from_numpy(obs['z_batch']).float() z_batch = torch.from_numpy(obs['z_batch']).float()

View File

@ -58,15 +58,15 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
'Dou Dizhu Evaluation') 'Dou Dizhu Evaluation')
parser.add_argument('--landlord', type=str, parser.add_argument('--landlord', type=str,
default='baselines/douzero_12/landlord_weights_39762328900.ckpt') default='baselines/lite_uni_5001600.ckpt')
parser.add_argument('--landlord_up', type=str, parser.add_argument('--landlord_up', type=str,
default='baselines/douzero_12/landlord_up_weights_39762328900.ckpt') default='baselines/lite_uni_5001600.ckpt')
parser.add_argument('--landlord_front', type=str, parser.add_argument('--landlord_front', type=str,
default='baselines/douzero_12/landlord_front_weights_39762328900.ckpt') default='baselines/lite_uni_5001600.ckpt')
parser.add_argument('--landlord_down', type=str, parser.add_argument('--landlord_down', type=str,
default='baselines/douzero_12/landlord_down_weights_39762328900.ckpt') default='baselines/lite_uni_5001600.ckpt')
parser.add_argument('--eval_data', type=str, parser.add_argument('--eval_data', type=str,
default='eval_data_200.pkl') default='eval_data_200_r.pkl')
parser.add_argument('--num_workers', type=int, default=3) parser.add_argument('--num_workers', type=int, default=3)
parser.add_argument('--gpu_device', type=str, default='0') parser.add_argument('--gpu_device', type=str, default='0')
parser.add_argument('--output', type=bool, default=True) parser.add_argument('--output', type=bool, default=True)
@ -80,6 +80,17 @@ if __name__ == '__main__':
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_device os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_device
landlord_wp, farmer_wp, landlord_adp, farmer_adp = evaluate(args.landlord,
args.landlord_up,
args.landlord_front,
args.landlord_down,
args.eval_data,
args.num_workers,
args.output,
args.title)
print(landlord_wp, farmer_wp, landlord_adp, farmer_adp)
os._exit(0)
baselines = [ baselines = [
{'folder': 'baselines', 'prefix': 'legacy_general', 'frame': 736107200}, {'folder': 'baselines', 'prefix': 'legacy_general', 'frame': 736107200},
{'folder': 'baselines', 'prefix': 'legacy_general', 'frame': 479412800}, {'folder': 'baselines', 'prefix': 'legacy_general', 'frame': 479412800},

View File

@ -28,7 +28,7 @@ RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
@app.route('/upload', methods=['POST']) @app.route('/upload', methods=['POST'])
def upload(): def upload():
type = request.form.get('type') type = request.form.get('type')
if type not in ['lite_resnet', 'lite_vanilla', 'legacy_vanilla']: if type not in ['lite_resnet', 'lite_vanilla', 'legacy_vanilla', 'lite_unified']:
return jsonify({'status': -1, 'message': 'illegal type'}) return jsonify({'status': -1, 'message': 'illegal type'})
position = request.form.get("position") position = request.form.get("position")
if position not in positions: if position not in positions:

View File

@ -21,7 +21,8 @@ def generate():
'landlord_up': _deck[33:58], 'landlord_up': _deck[33:58],
'landlord_front': _deck[58:83], 'landlord_front': _deck[58:83],
'landlord_down': _deck[83:108], 'landlord_down': _deck[83:108],
# 'three_landlord_cards': _deck[25:33], 'three_landlord_cards': _deck[25:33],
'three_landlord_cards_all': _deck[25:33],
} }
for key in card_play_data: for key in card_play_data:
card_play_data[key].sort() card_play_data[key].sort()