改造为4人斗地主
This commit is contained in:
parent
9c1c56d91d
commit
aab93d66c6
|
@ -16,7 +16,7 @@ from .file_writer import FileWriter
|
||||||
from .models import Model, OldModel
|
from .models import Model, OldModel
|
||||||
from .utils import get_batch, log, create_env, create_optimizers, act
|
from .utils import get_batch, log, create_env, create_optimizers, act
|
||||||
|
|
||||||
mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_down', 'bidding']}
|
mean_episode_return_buf = {p:deque(maxlen=100) for p in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']}
|
||||||
|
|
||||||
def compute_loss(logits, targets):
|
def compute_loss(logits, targets):
|
||||||
loss = ((logits.squeeze(-1) - targets)**2).mean()
|
loss = ((logits.squeeze(-1) - targets)**2).mean()
|
||||||
|
@ -27,7 +27,7 @@ def compute_loss_for_bid(outputs, reward):
|
||||||
|
|
||||||
def learn(position, actor_models, model, batch, optimizer, flags, lock):
|
def learn(position, actor_models, model, batch, optimizer, flags, lock):
|
||||||
"""Performs a learning (optimization) step."""
|
"""Performs a learning (optimization) step."""
|
||||||
position_index = {"landlord": 31, "landlord_up": 32, "landlord_down": 33}
|
position_index = {"landlord": 31, "landlord_up": 32, 'landlord_front': 33, "landlord_down": 34}
|
||||||
print("Learn", position)
|
print("Learn", position)
|
||||||
if flags.training_device != "cpu":
|
if flags.training_device != "cpu":
|
||||||
device = torch.device('cuda:'+str(flags.training_device))
|
device = torch.device('cuda:'+str(flags.training_device))
|
||||||
|
@ -46,7 +46,8 @@ def learn(position, actor_models, model, batch, optimizer, flags, lock):
|
||||||
with lock:
|
with lock:
|
||||||
learner_outputs = model(obs_z, obs_x, return_value=True)
|
learner_outputs = model(obs_z, obs_x, return_value=True)
|
||||||
if position == "bidding":
|
if position == "bidding":
|
||||||
pass
|
loss = compute_loss(learner_outputs['values'], target)
|
||||||
|
# pass
|
||||||
else:
|
else:
|
||||||
loss = compute_loss(learner_outputs['values'], target)
|
loss = compute_loss(learner_outputs['values'], target)
|
||||||
stats = {
|
stats = {
|
||||||
|
@ -101,7 +102,7 @@ def train(flags):
|
||||||
# Initialize queues
|
# Initialize queues
|
||||||
actor_processes = []
|
actor_processes = []
|
||||||
ctx = mp.get_context('spawn')
|
ctx = mp.get_context('spawn')
|
||||||
batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()}
|
batch_queues = {"landlord": ctx.SimpleQueue(), "landlord_up": ctx.SimpleQueue(), 'landlord_front': ctx.SimpleQueue(), "landlord_down": ctx.SimpleQueue(), "bidding": ctx.SimpleQueue()}
|
||||||
|
|
||||||
# Learner model for training
|
# Learner model for training
|
||||||
learner_model = Model(device=flags.training_device)
|
learner_model = Model(device=flags.training_device)
|
||||||
|
@ -115,20 +116,22 @@ def train(flags):
|
||||||
'loss_landlord',
|
'loss_landlord',
|
||||||
'mean_episode_return_landlord_up',
|
'mean_episode_return_landlord_up',
|
||||||
'loss_landlord_up',
|
'loss_landlord_up',
|
||||||
|
'mean_episode_return_landlord_front',
|
||||||
|
'loss_landlord_front',
|
||||||
'mean_episode_return_landlord_down',
|
'mean_episode_return_landlord_down',
|
||||||
'loss_landlord_down',
|
'loss_landlord_down',
|
||||||
'mean_episode_return_bidding',
|
'mean_episode_return_bidding',
|
||||||
'loss_bidding',
|
'loss_bidding',
|
||||||
]
|
]
|
||||||
frames, stats = 0, {k: 0 for k in stat_keys}
|
frames, stats = 0, {k: 0 for k in stat_keys}
|
||||||
position_frames = {'landlord':0, 'landlord_up':0, 'landlord_down':0, 'bidding': 0}
|
position_frames = {'landlord':0, 'landlord_up':0, 'landlord_front':0, 'landlord_down':0, 'bidding': 0}
|
||||||
|
|
||||||
# Load models if any
|
# Load models if any
|
||||||
if flags.load_model and os.path.exists(checkpointpath):
|
if flags.load_model and os.path.exists(checkpointpath):
|
||||||
checkpoint_states = torch.load(
|
checkpoint_states = torch.load(
|
||||||
checkpointpath, map_location=("cuda:"+str(flags.training_device) if flags.training_device != "cpu" else "cpu")
|
checkpointpath, map_location=("cuda:"+str(flags.training_device) if flags.training_device != "cpu" else "cpu")
|
||||||
)
|
)
|
||||||
for k in ['landlord', 'landlord_up', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down']
|
for k in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down']
|
||||||
learner_model.get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k])
|
learner_model.get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k])
|
||||||
optimizers[k].load_state_dict(checkpoint_states["optimizer_state_dict"][k])
|
optimizers[k].load_state_dict(checkpoint_states["optimizer_state_dict"][k])
|
||||||
for device in device_iterator:
|
for device in device_iterator:
|
||||||
|
@ -176,12 +179,12 @@ def train(flags):
|
||||||
threads = []
|
threads = []
|
||||||
locks = {}
|
locks = {}
|
||||||
for device in device_iterator:
|
for device in device_iterator:
|
||||||
locks[device] = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
|
locks[device] = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_front': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
|
||||||
position_locks = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
|
position_locks = {'landlord': threading.Lock(), 'landlord_up': threading.Lock(), 'landlord_front': threading.Lock(), 'landlord_down': threading.Lock(), 'bidding': threading.Lock()}
|
||||||
|
|
||||||
for device in device_iterator:
|
for device in device_iterator:
|
||||||
for i in range(flags.num_threads):
|
for i in range(flags.num_threads):
|
||||||
for position in ['landlord', 'landlord_up', 'landlord_down', 'bidding']:
|
for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']:
|
||||||
thread = threading.Thread(
|
thread = threading.Thread(
|
||||||
target=batch_and_learn, name='batch-and-learn-%d' % i, args=(i,device,position,locks[device][position],position_locks[position]))
|
target=batch_and_learn, name='batch-and-learn-%d' % i, args=(i,device,position,locks[device][position],position_locks[position]))
|
||||||
thread.start()
|
thread.start()
|
||||||
|
@ -202,7 +205,7 @@ def train(flags):
|
||||||
}, checkpointpath)
|
}, checkpointpath)
|
||||||
|
|
||||||
# Save the weights for evaluation purpose
|
# Save the weights for evaluation purpose
|
||||||
for position in ['landlord', 'landlord_up', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down']
|
for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
|
||||||
model_weights_dir = os.path.expandvars(os.path.expanduser(
|
model_weights_dir = os.path.expandvars(os.path.expanduser(
|
||||||
'%s/%s/%s' % (flags.savedir, flags.xpid, "general_"+position+'_'+str(frames)+'.ckpt')))
|
'%s/%s/%s' % (flags.savedir, flags.xpid, "general_"+position+'_'+str(frames)+'.ckpt')))
|
||||||
torch.save(learner_model.get_model(position).state_dict(), model_weights_dir)
|
torch.save(learner_model.get_model(position).state_dict(), model_weights_dir)
|
||||||
|
@ -229,15 +232,17 @@ def train(flags):
|
||||||
fps_avg = np.mean(fps_log)
|
fps_avg = np.mean(fps_log)
|
||||||
|
|
||||||
position_fps = {k:(position_frames[k]-position_start_frames[k])/(end_time-start_time) for k in position_frames}
|
position_fps = {k:(position_frames[k]-position_start_frames[k])/(end_time-start_time) for k in position_frames}
|
||||||
log.info('After %i (L:%i U:%i D:%i) frames: @ %.1f fps (avg@ %.1f fps) (L:%.1f U:%.1f D:%.1f) Stats:\n%s',
|
log.info('After %i (L:%i U:%i F:%i D:%i) frames: @ %.1f fps (avg@ %.1f fps) (L:%.1f U:%.1f F:%.1f D:%.1f) Stats:\n%s',
|
||||||
frames,
|
frames,
|
||||||
position_frames['landlord'],
|
position_frames['landlord'],
|
||||||
position_frames['landlord_up'],
|
position_frames['landlord_up'],
|
||||||
|
position_frames['landlord_front'],
|
||||||
position_frames['landlord_down'],
|
position_frames['landlord_down'],
|
||||||
fps,
|
fps,
|
||||||
fps_avg,
|
fps_avg,
|
||||||
position_fps['landlord'],
|
position_fps['landlord'],
|
||||||
position_fps['landlord_up'],
|
position_fps['landlord_up'],
|
||||||
|
position_fps['landlord_front'],
|
||||||
position_fps['landlord_down'],
|
position_fps['landlord_down'],
|
||||||
pprint.pformat(stats))
|
pprint.pformat(stats))
|
||||||
|
|
||||||
|
|
|
@ -12,13 +12,13 @@ import torch.nn.functional as F
|
||||||
class LandlordLstmModel(nn.Module):
|
class LandlordLstmModel(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.lstm = nn.LSTM(162, 128, batch_first=True)
|
self.lstm = nn.LSTM(432, 128, batch_first=True)
|
||||||
self.dense1 = nn.Linear(373 + 128, 512)
|
self.dense1 = nn.Linear(846 + 128, 1024)
|
||||||
self.dense2 = nn.Linear(512, 512)
|
self.dense2 = nn.Linear(1024, 1024)
|
||||||
self.dense3 = nn.Linear(512, 512)
|
self.dense3 = nn.Linear(1024, 768)
|
||||||
self.dense4 = nn.Linear(512, 512)
|
self.dense4 = nn.Linear(768, 512)
|
||||||
self.dense5 = nn.Linear(512, 512)
|
self.dense5 = nn.Linear(512, 256)
|
||||||
self.dense6 = nn.Linear(512, 1)
|
self.dense6 = nn.Linear(256, 1)
|
||||||
|
|
||||||
def forward(self, z, x, return_value=False, flags=None):
|
def forward(self, z, x, return_value=False, flags=None):
|
||||||
lstm_out, (h_n, _) = self.lstm(z)
|
lstm_out, (h_n, _) = self.lstm(z)
|
||||||
|
@ -47,13 +47,13 @@ class LandlordLstmModel(nn.Module):
|
||||||
class FarmerLstmModel(nn.Module):
|
class FarmerLstmModel(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.lstm = nn.LSTM(162, 128, batch_first=True)
|
self.lstm = nn.LSTM(432, 128, batch_first=True)
|
||||||
self.dense1 = nn.Linear(484 + 128, 512)
|
self.dense1 = nn.Linear(1178 + 128, 1024)
|
||||||
self.dense2 = nn.Linear(512, 512)
|
self.dense2 = nn.Linear(1024, 1024)
|
||||||
self.dense3 = nn.Linear(512, 512)
|
self.dense3 = nn.Linear(1024, 768)
|
||||||
self.dense4 = nn.Linear(512, 512)
|
self.dense4 = nn.Linear(768, 512)
|
||||||
self.dense5 = nn.Linear(512, 512)
|
self.dense5 = nn.Linear(512, 256)
|
||||||
self.dense6 = nn.Linear(512, 1)
|
self.dense6 = nn.Linear(256, 1)
|
||||||
|
|
||||||
def forward(self, z, x, return_value=False, flags=None):
|
def forward(self, z, x, return_value=False, flags=None):
|
||||||
lstm_out, (h_n, _) = self.lstm(z)
|
lstm_out, (h_n, _) = self.lstm(z)
|
||||||
|
@ -82,8 +82,8 @@ class FarmerLstmModel(nn.Module):
|
||||||
class LandlordLstmNewModel(nn.Module):
|
class LandlordLstmNewModel(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.lstm = nn.LSTM(162, 128, batch_first=True)
|
self.lstm = nn.LSTM(432, 128, batch_first=True)
|
||||||
self.dense1 = nn.Linear(373 + 128, 512)
|
self.dense1 = nn.Linear(846 + 128, 512)
|
||||||
self.dense2 = nn.Linear(512, 512)
|
self.dense2 = nn.Linear(512, 512)
|
||||||
self.dense3 = nn.Linear(512, 512)
|
self.dense3 = nn.Linear(512, 512)
|
||||||
self.dense4 = nn.Linear(512, 512)
|
self.dense4 = nn.Linear(512, 512)
|
||||||
|
@ -117,8 +117,8 @@ class LandlordLstmNewModel(nn.Module):
|
||||||
class FarmerLstmNewModel(nn.Module):
|
class FarmerLstmNewModel(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.lstm = nn.LSTM(162, 128, batch_first=True)
|
self.lstm = nn.LSTM(432, 128, batch_first=True)
|
||||||
self.dense1 = nn.Linear(484 + 128, 512)
|
self.dense1 = nn.Linear(1178 + 128, 512)
|
||||||
self.dense2 = nn.Linear(512, 512)
|
self.dense2 = nn.Linear(512, 512)
|
||||||
self.dense3 = nn.Linear(512, 512)
|
self.dense3 = nn.Linear(512, 512)
|
||||||
self.dense4 = nn.Linear(512, 512)
|
self.dense4 = nn.Linear(512, 512)
|
||||||
|
@ -253,20 +253,22 @@ class GeneralModel(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.in_planes = 80
|
self.in_planes = 80
|
||||||
#input 1*54*41
|
#input 1*108*41
|
||||||
self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
|
self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
|
||||||
stride=(2,), padding=1, bias=False) #1*27*80
|
stride=(2,), padding=1, bias=False) #1*54*80
|
||||||
|
|
||||||
self.bn1 = nn.BatchNorm1d(80)
|
self.bn1 = nn.BatchNorm1d(80)
|
||||||
|
|
||||||
self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
|
self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*27*80
|
||||||
self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
|
self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*14*160
|
||||||
self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
|
self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*7*320
|
||||||
|
self.layer4 = self._make_layer(BasicBlock, 640, 2, stride=2)#1*4*320
|
||||||
# self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
# self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
||||||
self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
|
self.linear1 = nn.Linear(640 * BasicBlock.expansion * 4 + 24 * 4, 2048)
|
||||||
self.linear2 = nn.Linear(1024, 512)
|
self.linear2 = nn.Linear(2048, 1024)
|
||||||
self.linear3 = nn.Linear(512, 256)
|
self.linear3 = nn.Linear(1024, 512)
|
||||||
self.linear4 = nn.Linear(256, 1)
|
self.linear4 = nn.Linear(512, 256)
|
||||||
|
self.linear5 = nn.Linear(256, 1)
|
||||||
|
|
||||||
def _make_layer(self, block, planes, num_blocks, stride):
|
def _make_layer(self, block, planes, num_blocks, stride):
|
||||||
strides = [stride] + [1] * (num_blocks - 1)
|
strides = [stride] + [1] * (num_blocks - 1)
|
||||||
|
@ -281,12 +283,14 @@ class GeneralModel(nn.Module):
|
||||||
out = self.layer1(out)
|
out = self.layer1(out)
|
||||||
out = self.layer2(out)
|
out = self.layer2(out)
|
||||||
out = self.layer3(out)
|
out = self.layer3(out)
|
||||||
|
out = self.layer4(out)
|
||||||
out = out.flatten(1,2)
|
out = out.flatten(1,2)
|
||||||
out = torch.cat([x,x,x,x,out], dim=-1)
|
out = torch.cat([x,x,x,x,out], dim=-1)
|
||||||
out = F.leaky_relu_(self.linear1(out))
|
out = F.leaky_relu_(self.linear1(out))
|
||||||
out = F.leaky_relu_(self.linear2(out))
|
out = F.leaky_relu_(self.linear2(out))
|
||||||
out = F.leaky_relu_(self.linear3(out))
|
out = F.leaky_relu_(self.linear3(out))
|
||||||
out = F.leaky_relu_(self.linear4(out))
|
out = F.leaky_relu_(self.linear4(out))
|
||||||
|
out = F.leaky_relu_(self.linear5(out))
|
||||||
if return_value:
|
if return_value:
|
||||||
return dict(values=out)
|
return dict(values=out)
|
||||||
else:
|
else:
|
||||||
|
@ -304,7 +308,7 @@ class BidModel(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.dense1 = nn.Linear(114, 512)
|
self.dense1 = nn.Linear(208, 512)
|
||||||
self.dense2 = nn.Linear(512, 512)
|
self.dense2 = nn.Linear(512, 512)
|
||||||
self.dense3 = nn.Linear(512, 512)
|
self.dense3 = nn.Linear(512, 512)
|
||||||
self.dense4 = nn.Linear(512, 512)
|
self.dense4 = nn.Linear(512, 512)
|
||||||
|
@ -342,15 +346,18 @@ class BidModel(nn.Module):
|
||||||
model_dict = {}
|
model_dict = {}
|
||||||
model_dict['landlord'] = LandlordLstmModel
|
model_dict['landlord'] = LandlordLstmModel
|
||||||
model_dict['landlord_up'] = FarmerLstmModel
|
model_dict['landlord_up'] = FarmerLstmModel
|
||||||
|
model_dict['landlord_front'] = FarmerLstmModel
|
||||||
model_dict['landlord_down'] = FarmerLstmModel
|
model_dict['landlord_down'] = FarmerLstmModel
|
||||||
model_dict_new = {}
|
model_dict_new = {}
|
||||||
model_dict_new['landlord'] = GeneralModel
|
model_dict_new['landlord'] = GeneralModel
|
||||||
model_dict_new['landlord_up'] = GeneralModel
|
model_dict_new['landlord_up'] = GeneralModel
|
||||||
|
model_dict_new['landlord_front'] = GeneralModel
|
||||||
model_dict_new['landlord_down'] = GeneralModel
|
model_dict_new['landlord_down'] = GeneralModel
|
||||||
model_dict_new['bidding'] = BidModel
|
model_dict_new['bidding'] = BidModel
|
||||||
model_dict_lstm = {}
|
model_dict_lstm = {}
|
||||||
model_dict_lstm['landlord'] = GeneralModel
|
model_dict_lstm['landlord'] = GeneralModel
|
||||||
model_dict_lstm['landlord_up'] = GeneralModel
|
model_dict_lstm['landlord_up'] = GeneralModel
|
||||||
|
model_dict_lstm['landlord_front'] = GeneralModel
|
||||||
model_dict_lstm['landlord_down'] = GeneralModel
|
model_dict_lstm['landlord_down'] = GeneralModel
|
||||||
|
|
||||||
class General_Model:
|
class General_Model:
|
||||||
|
@ -365,6 +372,7 @@ class General_Model:
|
||||||
# model = GeneralModel().to(torch.device(device))
|
# model = GeneralModel().to(torch.device(device))
|
||||||
self.models['landlord'] = GeneralModel1().to(torch.device(device))
|
self.models['landlord'] = GeneralModel1().to(torch.device(device))
|
||||||
self.models['landlord_up'] = GeneralModel1().to(torch.device(device))
|
self.models['landlord_up'] = GeneralModel1().to(torch.device(device))
|
||||||
|
self.models['landlord_front'] = GeneralModel1().to(torch.device(device))
|
||||||
self.models['landlord_down'] = GeneralModel1().to(torch.device(device))
|
self.models['landlord_down'] = GeneralModel1().to(torch.device(device))
|
||||||
self.models['bidding'] = BidModel().to(torch.device(device))
|
self.models['bidding'] = BidModel().to(torch.device(device))
|
||||||
|
|
||||||
|
@ -375,12 +383,14 @@ class General_Model:
|
||||||
def share_memory(self):
|
def share_memory(self):
|
||||||
self.models['landlord'].share_memory()
|
self.models['landlord'].share_memory()
|
||||||
self.models['landlord_up'].share_memory()
|
self.models['landlord_up'].share_memory()
|
||||||
|
self.models['landlord_front'].share_memory()
|
||||||
self.models['landlord_down'].share_memory()
|
self.models['landlord_down'].share_memory()
|
||||||
self.models['bidding'].share_memory()
|
self.models['bidding'].share_memory()
|
||||||
|
|
||||||
def eval(self):
|
def eval(self):
|
||||||
self.models['landlord'].eval()
|
self.models['landlord'].eval()
|
||||||
self.models['landlord_up'].eval()
|
self.models['landlord_up'].eval()
|
||||||
|
self.models['landlord_front'].eval()
|
||||||
self.models['landlord_down'].eval()
|
self.models['landlord_down'].eval()
|
||||||
self.models['bidding'].eval()
|
self.models['bidding'].eval()
|
||||||
|
|
||||||
|
@ -404,6 +414,7 @@ class OldModel:
|
||||||
device = 'cuda:' + str(device)
|
device = 'cuda:' + str(device)
|
||||||
self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
|
self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
|
||||||
self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
|
self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
|
||||||
|
self.models['landlord_front'] = FarmerLstmModel().to(torch.device(device))
|
||||||
self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
|
self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
|
||||||
|
|
||||||
def forward(self, position, z, x, training=False, flags=None):
|
def forward(self, position, z, x, training=False, flags=None):
|
||||||
|
@ -413,11 +424,13 @@ class OldModel:
|
||||||
def share_memory(self):
|
def share_memory(self):
|
||||||
self.models['landlord'].share_memory()
|
self.models['landlord'].share_memory()
|
||||||
self.models['landlord_up'].share_memory()
|
self.models['landlord_up'].share_memory()
|
||||||
|
self.models['landlord_front'].share_memory()
|
||||||
self.models['landlord_down'].share_memory()
|
self.models['landlord_down'].share_memory()
|
||||||
|
|
||||||
def eval(self):
|
def eval(self):
|
||||||
self.models['landlord'].eval()
|
self.models['landlord'].eval()
|
||||||
self.models['landlord_up'].eval()
|
self.models['landlord_up'].eval()
|
||||||
|
self.models['landlord_front'].eval()
|
||||||
self.models['landlord_down'].eval()
|
self.models['landlord_down'].eval()
|
||||||
|
|
||||||
def parameters(self, position):
|
def parameters(self, position):
|
||||||
|
@ -442,6 +455,7 @@ class Model:
|
||||||
# model = GeneralModel().to(torch.device(device))
|
# model = GeneralModel().to(torch.device(device))
|
||||||
self.models['landlord'] = GeneralModel().to(torch.device(device))
|
self.models['landlord'] = GeneralModel().to(torch.device(device))
|
||||||
self.models['landlord_up'] = GeneralModel().to(torch.device(device))
|
self.models['landlord_up'] = GeneralModel().to(torch.device(device))
|
||||||
|
self.models['landlord_front'] = GeneralModel().to(torch.device(device))
|
||||||
self.models['landlord_down'] = GeneralModel().to(torch.device(device))
|
self.models['landlord_down'] = GeneralModel().to(torch.device(device))
|
||||||
self.models['bidding'] = BidModel().to(torch.device(device))
|
self.models['bidding'] = BidModel().to(torch.device(device))
|
||||||
|
|
||||||
|
@ -452,12 +466,14 @@ class Model:
|
||||||
def share_memory(self):
|
def share_memory(self):
|
||||||
self.models['landlord'].share_memory()
|
self.models['landlord'].share_memory()
|
||||||
self.models['landlord_up'].share_memory()
|
self.models['landlord_up'].share_memory()
|
||||||
|
self.models['landlord_front'].share_memory()
|
||||||
self.models['landlord_down'].share_memory()
|
self.models['landlord_down'].share_memory()
|
||||||
self.models['bidding'].share_memory()
|
self.models['bidding'].share_memory()
|
||||||
|
|
||||||
def eval(self):
|
def eval(self):
|
||||||
self.models['landlord'].eval()
|
self.models['landlord'].eval()
|
||||||
self.models['landlord_up'].eval()
|
self.models['landlord_up'].eval()
|
||||||
|
self.models['landlord_front'].eval()
|
||||||
self.models['landlord_down'].eval()
|
self.models['landlord_down'].eval()
|
||||||
self.models['bidding'].eval()
|
self.models['bidding'].eval()
|
||||||
|
|
||||||
|
@ -470,11 +486,3 @@ class Model:
|
||||||
def get_models(self):
|
def get_models(self):
|
||||||
return self.models
|
return self.models
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -16,11 +16,15 @@ from douzero.env import Env
|
||||||
Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
|
Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
|
||||||
11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
|
11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
|
||||||
|
|
||||||
NumOnes2Array = {0: np.array([0, 0, 0, 0]),
|
NumOnes2Array = {0: np.array([0, 0, 0, 0, 0, 0, 0, 0]),
|
||||||
1: np.array([1, 0, 0, 0]),
|
1: np.array([1, 0, 0, 0, 0, 0, 0, 0]),
|
||||||
2: np.array([1, 1, 0, 0]),
|
2: np.array([1, 1, 0, 0, 0, 0, 0, 0]),
|
||||||
3: np.array([1, 1, 1, 0]),
|
3: np.array([1, 1, 1, 0, 0, 0, 0, 0]),
|
||||||
4: np.array([1, 1, 1, 1])}
|
4: np.array([1, 1, 1, 1, 0, 0, 0, 0]),
|
||||||
|
5: np.array([1, 1, 1, 1, 1, 0, 0, 0]),
|
||||||
|
6: np.array([1, 1, 1, 1, 1, 1, 0, 0]),
|
||||||
|
7: np.array([1, 1, 1, 1, 1, 1, 1, 0]),
|
||||||
|
8: np.array([1, 1, 1, 1, 1, 1, 1, 1])}
|
||||||
|
|
||||||
shandle = logging.StreamHandler()
|
shandle = logging.StreamHandler()
|
||||||
shandle.setFormatter(
|
shandle.setFormatter(
|
||||||
|
@ -60,7 +64,7 @@ def create_optimizers(flags, learner_model):
|
||||||
"""
|
"""
|
||||||
Create three optimizers for the three positions
|
Create three optimizers for the three positions
|
||||||
"""
|
"""
|
||||||
positions = ['landlord', 'landlord_up', 'landlord_down', 'bidding']
|
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']
|
||||||
optimizers = {}
|
optimizers = {}
|
||||||
for position in positions:
|
for position in positions:
|
||||||
optimizer = RAdam(
|
optimizer = RAdam(
|
||||||
|
@ -72,7 +76,7 @@ def create_optimizers(flags, learner_model):
|
||||||
|
|
||||||
|
|
||||||
def act(i, device, batch_queues, model, flags):
|
def act(i, device, batch_queues, model, flags):
|
||||||
positions = ['landlord', 'landlord_up', 'landlord_down', 'bidding']
|
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']
|
||||||
for pos in positions:
|
for pos in positions:
|
||||||
model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
|
model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
|
||||||
try:
|
try:
|
||||||
|
@ -90,9 +94,9 @@ def act(i, device, batch_queues, model, flags):
|
||||||
type_buf = {p: [] for p in positions}
|
type_buf = {p: [] for p in positions}
|
||||||
obs_x_batch_buf = {p: [] for p in positions}
|
obs_x_batch_buf = {p: [] for p in positions}
|
||||||
|
|
||||||
position_index = {"landlord": 31, "landlord_up": 32, "landlord_down": 33}
|
position_index = {"landlord": 31, "landlord_up": 32, "landlord_front": 33, "landlord_down": 34}
|
||||||
bid_type_index = {"landlord": 41, "landlord_up": 42, "landlord_down": 43}
|
bid_type_index = {"landlord": 41, "landlord_up": 42, "landlord_front": 43, "landlord_down": 43}
|
||||||
bid_type_map = {41: "landlord", 42: "landlord_up", 43: "landlord_down"}
|
bid_type_map = {41: "landlord", 42: "landlord_up", 43: "landlord_front", 44: "landlord_down"}
|
||||||
|
|
||||||
position, obs, env_output = env.initial(model, device, flags=flags)
|
position, obs, env_output = env.initial(model, device, flags=flags)
|
||||||
bid_obs_buffer = env_output["begin_buf"]["bid_obs_buffer"]
|
bid_obs_buffer = env_output["begin_buf"]["bid_obs_buffer"]
|
||||||
|
@ -149,7 +153,7 @@ def act(i, device, batch_queues, model, flags):
|
||||||
target_buf[p].append(episode_return)
|
target_buf[p].append(episode_return)
|
||||||
break
|
break
|
||||||
for p in positions:
|
for p in positions:
|
||||||
if size[p] > T:
|
while size[p] > T:
|
||||||
# print(p, "epr", torch.stack([torch.tensor(ndarr, device="cpu") for ndarr in episode_return_buf[p][:T]]),)
|
# print(p, "epr", torch.stack([torch.tensor(ndarr, device="cpu") for ndarr in episode_return_buf[p][:T]]),)
|
||||||
batch_queues[p].put({
|
batch_queues[p].put({
|
||||||
"done": torch.stack([torch.tensor(ndarr, device="cpu") for ndarr in done_buf[p][:T]]),
|
"done": torch.stack([torch.tensor(ndarr, device="cpu") for ndarr in done_buf[p][:T]]),
|
||||||
|
@ -182,18 +186,22 @@ def _cards2tensor(list_cards):
|
||||||
See Figure 2 in https://arxiv.org/pdf/2106.06135.pdf
|
See Figure 2 in https://arxiv.org/pdf/2106.06135.pdf
|
||||||
"""
|
"""
|
||||||
if len(list_cards) == 0:
|
if len(list_cards) == 0:
|
||||||
return torch.zeros(54, dtype=torch.int8)
|
return torch.zeros(108, dtype=torch.int8)
|
||||||
|
|
||||||
matrix = np.zeros([4, 13], dtype=np.int8)
|
matrix = np.zeros([8, 13], dtype=np.int8)
|
||||||
jokers = np.zeros(2, dtype=np.int8)
|
jokers = np.zeros(4, dtype=np.int8)
|
||||||
counter = Counter(list_cards)
|
counter = Counter(list_cards)
|
||||||
for card, num_times in counter.items():
|
for card, num_times in counter.items():
|
||||||
if card < 20:
|
if card < 20:
|
||||||
matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
|
matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
|
||||||
elif card == 20:
|
elif card == 20:
|
||||||
jokers[0] = 1
|
jokers[0] = 1
|
||||||
|
if num_times == 2:
|
||||||
|
jokers[1] = 1
|
||||||
elif card == 30:
|
elif card == 30:
|
||||||
jokers[1] = 1
|
jokers[2] = 1
|
||||||
|
if num_times == 2:
|
||||||
|
jokers[3] = 1
|
||||||
matrix = np.concatenate((matrix.flatten('F'), jokers))
|
matrix = np.concatenate((matrix.flatten('F'), jokers))
|
||||||
matrix = torch.from_numpy(matrix)
|
matrix = torch.from_numpy(matrix)
|
||||||
return matrix
|
return matrix
|
||||||
|
|
|
@ -11,17 +11,22 @@ env_url = "http://od.vcccz.com/hechuan/env.py"
|
||||||
Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
|
Card2Column = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7,
|
||||||
11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
|
11: 8, 12: 9, 13: 10, 14: 11, 17: 12}
|
||||||
|
|
||||||
NumOnes2Array = {0: np.array([0, 0, 0, 0]),
|
NumOnes2Array = {0: np.array([0, 0, 0, 0, 0, 0, 0, 0]),
|
||||||
1: np.array([1, 0, 0, 0]),
|
1: np.array([1, 0, 0, 0, 0, 0, 0, 0]),
|
||||||
2: np.array([1, 1, 0, 0]),
|
2: np.array([1, 1, 0, 0, 0, 0, 0, 0]),
|
||||||
3: np.array([1, 1, 1, 0]),
|
3: np.array([1, 1, 1, 0, 0, 0, 0, 0]),
|
||||||
4: np.array([1, 1, 1, 1])}
|
4: np.array([1, 1, 1, 1, 0, 0, 0, 0]),
|
||||||
|
5: np.array([1, 1, 1, 1, 1, 0, 0, 0]),
|
||||||
|
6: np.array([1, 1, 1, 1, 1, 1, 0, 0]),
|
||||||
|
7: np.array([1, 1, 1, 1, 1, 1, 1, 0]),
|
||||||
|
8: np.array([1, 1, 1, 1, 1, 1, 1, 1])}
|
||||||
|
|
||||||
|
|
||||||
deck = []
|
deck = []
|
||||||
for i in range(3, 15):
|
for i in range(3, 15):
|
||||||
deck.extend([i for _ in range(4)])
|
deck.extend([i for _ in range(8)])
|
||||||
deck.extend([17 for _ in range(4)])
|
deck.extend([17 for _ in range(8)])
|
||||||
deck.extend([20, 30])
|
deck.extend([20, 20, 30, 30])
|
||||||
|
|
||||||
|
|
||||||
class Env:
|
class Env:
|
||||||
|
@ -46,7 +51,7 @@ class Env:
|
||||||
# Initialize players
|
# Initialize players
|
||||||
# We use three dummy player for the target position
|
# We use three dummy player for the target position
|
||||||
self.players = {}
|
self.players = {}
|
||||||
for position in ['landlord', 'landlord_up', 'landlord_down']:
|
for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
|
||||||
self.players[position] = DummyAgent(position)
|
self.players[position] = DummyAgent(position)
|
||||||
|
|
||||||
# Initialize the internal environment
|
# Initialize the internal environment
|
||||||
|
@ -67,10 +72,11 @@ class Env:
|
||||||
if model is None:
|
if model is None:
|
||||||
_deck = deck.copy()
|
_deck = deck.copy()
|
||||||
np.random.shuffle(_deck)
|
np.random.shuffle(_deck)
|
||||||
card_play_data = {'landlord': _deck[:20],
|
card_play_data = {'landlord': _deck[:33],
|
||||||
'landlord_up': _deck[20:37],
|
'landlord_up': _deck[33:58],
|
||||||
'landlord_down': _deck[37:54],
|
'landlord_front': _deck[58:83],
|
||||||
'three_landlord_cards': _deck[17:20],
|
'landlord_down': _deck[83:108],
|
||||||
|
# 'three_landlord_cards': _deck[17:20],
|
||||||
}
|
}
|
||||||
for key in card_play_data:
|
for key in card_play_data:
|
||||||
card_play_data[key].sort()
|
card_play_data[key].sort()
|
||||||
|
@ -97,18 +103,20 @@ class Env:
|
||||||
_deck = deck.copy()
|
_deck = deck.copy()
|
||||||
np.random.shuffle(_deck)
|
np.random.shuffle(_deck)
|
||||||
card_play_data = [
|
card_play_data = [
|
||||||
_deck[:17],
|
_deck[:25],
|
||||||
_deck[17:34],
|
_deck[25:50],
|
||||||
_deck[34:51],
|
_deck[50:75],
|
||||||
|
_deck[75:100],
|
||||||
]
|
]
|
||||||
for i in range(3):
|
for i in range(4):
|
||||||
card_play_data[i].sort()
|
card_play_data[i].sort()
|
||||||
landlord_cards = _deck[51:54]
|
landlord_cards = _deck[100:108]
|
||||||
landlord_cards.sort()
|
landlord_cards.sort()
|
||||||
bid_info = np.array([[-1, -1, -1],
|
bid_info = np.array([[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1]])
|
[-1, -1, -1, -1],
|
||||||
|
[-1, -1, -1, -1]])
|
||||||
bidding_player = random.randint(0, 2)
|
bidding_player = random.randint(0, 2)
|
||||||
# bidding_player = 0 # debug
|
# bidding_player = 0 # debug
|
||||||
first_bid = -1
|
first_bid = -1
|
||||||
|
@ -116,7 +124,7 @@ class Env:
|
||||||
bid_count = 0
|
bid_count = 0
|
||||||
if bid_limit <= 0:
|
if bid_limit <= 0:
|
||||||
force_bid = True
|
force_bid = True
|
||||||
for r in range(3):
|
for r in range(4):
|
||||||
bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
|
bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
|
action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
|
||||||
|
@ -137,19 +145,19 @@ class Env:
|
||||||
bid_count += 1
|
bid_count += 1
|
||||||
if first_bid == -1:
|
if first_bid == -1:
|
||||||
first_bid = bidding_player
|
first_bid = bidding_player
|
||||||
for p in range(3):
|
for p in range(4):
|
||||||
if p == bidding_player:
|
if p == bidding_player:
|
||||||
bid_info[r][p] = 1
|
bid_info[r][p] = 1
|
||||||
else:
|
else:
|
||||||
bid_info[r][p] = 0
|
bid_info[r][p] = 0
|
||||||
else:
|
else:
|
||||||
bid_info[r] = [0, 0, 0]
|
bid_info[r] = [0, 0, 0, 0]
|
||||||
bidding_player = (bidding_player + 1) % 3
|
bidding_player = (bidding_player + 1) % 4
|
||||||
one_count = np.count_nonzero(bid_info == 1)
|
one_count = np.count_nonzero(bid_info == 1)
|
||||||
if one_count == 0:
|
if one_count == 0:
|
||||||
continue
|
continue
|
||||||
elif one_count > 1:
|
elif one_count > 1:
|
||||||
r = 3
|
r = 4
|
||||||
bidding_player = first_bid
|
bidding_player = first_bid
|
||||||
bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
|
bidding_obs = _get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
@ -163,7 +171,7 @@ class Env:
|
||||||
if action["action"] == 1:
|
if action["action"] == 1:
|
||||||
last_bid = bidding_player
|
last_bid = bidding_player
|
||||||
bid_count += 1
|
bid_count += 1
|
||||||
for p in range(3):
|
for p in range(4):
|
||||||
if p == bidding_player:
|
if p == bidding_player:
|
||||||
bid_info[r][p] = 1
|
bid_info[r][p] = 1
|
||||||
else:
|
else:
|
||||||
|
@ -171,20 +179,23 @@ class Env:
|
||||||
break
|
break
|
||||||
card_play_data[last_bid].extend(landlord_cards)
|
card_play_data[last_bid].extend(landlord_cards)
|
||||||
card_play_data = {'landlord': card_play_data[last_bid],
|
card_play_data = {'landlord': card_play_data[last_bid],
|
||||||
'landlord_up': card_play_data[(last_bid - 1) % 3],
|
'landlord_up': card_play_data[(last_bid - 1) % 4],
|
||||||
'landlord_down': card_play_data[(last_bid + 1) % 3],
|
'landlord_down': card_play_data[(last_bid + 1) % 4],
|
||||||
'three_landlord_cards': landlord_cards,
|
'landlord_front': card_play_data[(last_bid + 2) % 4],
|
||||||
|
# 'three_landlord_cards': landlord_cards,
|
||||||
}
|
}
|
||||||
card_play_data["landlord"].sort()
|
card_play_data["landlord"].sort()
|
||||||
player_ids = {
|
player_ids = {
|
||||||
'landlord': last_bid,
|
'landlord': last_bid,
|
||||||
'landlord_up': (last_bid - 1) % 3,
|
'landlord_up': (last_bid - 1) % 4,
|
||||||
'landlord_down': (last_bid + 1) % 3,
|
'landlord_down': (last_bid + 1) % 4,
|
||||||
|
'landlord_front': (last_bid + 2) % 4,
|
||||||
}
|
}
|
||||||
player_positions = {
|
player_positions = {
|
||||||
last_bid: 'landlord',
|
last_bid: 'landlord',
|
||||||
(last_bid - 1) % 3: 'landlord_up',
|
(last_bid - 1) % 4: 'landlord_up',
|
||||||
(last_bid + 1) % 3: 'landlord_down'
|
(last_bid + 1) % 4: 'landlord_down',
|
||||||
|
(last_bid + 2) % 4: 'landlord_front',
|
||||||
}
|
}
|
||||||
for bid_obs in bid_obs_buffer:
|
for bid_obs in bid_obs_buffer:
|
||||||
bid_obs.update({"position": player_positions[bid_obs["pid"]]})
|
bid_obs.update({"position": player_positions[bid_obs["pid"]]})
|
||||||
|
@ -192,14 +203,15 @@ class Env:
|
||||||
# Initialize the cards
|
# Initialize the cards
|
||||||
self._env.card_play_init(card_play_data)
|
self._env.card_play_init(card_play_data)
|
||||||
multiply_map = [
|
multiply_map = [
|
||||||
np.array([1, 0, 0]),
|
np.array([1, 0, 0, 0]),
|
||||||
np.array([0, 1, 0]),
|
np.array([0, 1, 0, 0]),
|
||||||
np.array([0, 0, 1])
|
np.array([0, 0, 1, 0]),
|
||||||
|
np.array([0, 0, 0, 1])
|
||||||
]
|
]
|
||||||
for pos in ["landlord", "landlord_up", "landlord_down"]:
|
for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
|
||||||
pid = player_ids[pos]
|
pid = player_ids[pos]
|
||||||
self._env.info_sets[pos].player_id = pid
|
self._env.info_sets[pos].player_id = pid
|
||||||
self._env.info_sets[pos].bid_info = bid_info[:, [(pid - 1) % 3, pid, (pid + 1) % 3]]
|
self._env.info_sets[pos].bid_info = bid_info[:, [(pid - 1) % 4, pid, (pid + 1) % 4, (pid + 2) % 4]]
|
||||||
self._env.bid_count = bid_count
|
self._env.bid_count = bid_count
|
||||||
# multiply_obs = _get_obs_for_multiply(pos, self._env.info_sets[pos].bid_info, card_play_data[pos],
|
# multiply_obs = _get_obs_for_multiply(pos, self._env.info_sets[pos].bid_info, card_play_data[pos],
|
||||||
# landlord_cards)
|
# landlord_cards)
|
||||||
|
@ -245,11 +257,13 @@ class Env:
|
||||||
"play": {
|
"play": {
|
||||||
"landlord": self._get_reward("landlord"),
|
"landlord": self._get_reward("landlord"),
|
||||||
"landlord_up": self._get_reward("landlord_up"),
|
"landlord_up": self._get_reward("landlord_up"),
|
||||||
|
"landlord_front": self._get_reward("landlord_front"),
|
||||||
"landlord_down": self._get_reward("landlord_down")
|
"landlord_down": self._get_reward("landlord_down")
|
||||||
},
|
},
|
||||||
"bid": {
|
"bid": {
|
||||||
"landlord": self._get_reward_bidding("landlord")*2,
|
"landlord": self._get_reward_bidding("landlord")*3,
|
||||||
"landlord_up": self._get_reward_bidding("landlord_up"),
|
"landlord_up": self._get_reward_bidding("landlord_up"),
|
||||||
|
"landlord_front": self._get_reward_bidding("landlord_front"),
|
||||||
"landlord_down": self._get_reward_bidding("landlord_down")
|
"landlord_down": self._get_reward_bidding("landlord_down")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -269,16 +283,19 @@ class Env:
|
||||||
self_bomb_num = self._env.pos_bomb_num[pos]
|
self_bomb_num = self._env.pos_bomb_num[pos]
|
||||||
if winner == 'landlord':
|
if winner == 'landlord':
|
||||||
if self.objective == 'adp':
|
if self.objective == 'adp':
|
||||||
return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8
|
return (1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] + self._env.multiply_count[pos]) /8
|
||||||
|
return (2.0 ** bomb_num[0]) * (3.0 ** bomb_num[1])
|
||||||
elif self.objective == 'logadp':
|
elif self.objective == 'logadp':
|
||||||
return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
|
return (1.0 - self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
|
||||||
|
return bomb_num[0] + bomb_num[1] + 1.0
|
||||||
else:
|
else:
|
||||||
return 1.0 - self._env.step_count * 0.0033
|
return 1.0 - self._env.step_count * 0.0033
|
||||||
|
return 1.0
|
||||||
else:
|
else:
|
||||||
if self.objective == 'adp':
|
if self.objective == 'adp':
|
||||||
return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num +self._env.multiply_count[pos]) /8
|
return (-1.1 - self._env.step_count * 0.0033) * 1.3 ** (bomb_num[0] + bomb_num[1] +self._env.multiply_count[pos]) /8
|
||||||
elif self.objective == 'logadp':
|
elif self.objective == 'logadp':
|
||||||
return (-1.0 + self._env.step_count * 0.0033) * 1.3**self_bomb_num * 2**self._env.multiply_count[pos] / 4
|
return (-1.0 + self._env.step_count * 0.0033) * 1.3**(self_bomb_num) * 2**self._env.multiply_count[pos] / 4
|
||||||
else:
|
else:
|
||||||
return -1.0 + self._env.step_count * 0.0033
|
return -1.0 + self._env.step_count * 0.0033
|
||||||
|
|
||||||
|
@ -371,12 +388,12 @@ def get_obs(infoset, use_general=True):
|
||||||
This function obtains observations with imperfect information
|
This function obtains observations with imperfect information
|
||||||
from the infoset. It has three branches since we encode
|
from the infoset. It has three branches since we encode
|
||||||
different features for different positions.
|
different features for different positions.
|
||||||
|
|
||||||
This function will return dictionary named `obs`. It contains
|
This function will return dictionary named `obs`. It contains
|
||||||
several fields. These fields will be used to train the model.
|
several fields. These fields will be used to train the model.
|
||||||
One can play with those features to improve the performance.
|
One can play with those features to improve the performance.
|
||||||
|
|
||||||
`position` is a string that can be landlord/landlord_down/landlord_up
|
`position` is a string that can be landlord/landlord_down/landlord_front/landlord_up
|
||||||
|
|
||||||
`x_batch` is a batch of features (excluding the hisorical moves).
|
`x_batch` is a batch of features (excluding the hisorical moves).
|
||||||
It also encodes the action feature
|
It also encodes the action feature
|
||||||
|
@ -391,7 +408,7 @@ def get_obs(infoset, use_general=True):
|
||||||
`z`: same as z_batch but not a batch.
|
`z`: same as z_batch but not a batch.
|
||||||
"""
|
"""
|
||||||
if use_general:
|
if use_general:
|
||||||
if infoset.player_position not in ["landlord", "landlord_up", "landlord_down"]:
|
if infoset.player_position not in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
|
||||||
raise ValueError('')
|
raise ValueError('')
|
||||||
return _get_obs_general(infoset, infoset.player_position)
|
return _get_obs_general(infoset, infoset.player_position)
|
||||||
else:
|
else:
|
||||||
|
@ -399,6 +416,8 @@ def get_obs(infoset, use_general=True):
|
||||||
return _get_obs_landlord(infoset)
|
return _get_obs_landlord(infoset)
|
||||||
elif infoset.player_position == 'landlord_up':
|
elif infoset.player_position == 'landlord_up':
|
||||||
return _get_obs_landlord_up(infoset)
|
return _get_obs_landlord_up(infoset)
|
||||||
|
elif infoset.player_position == 'landlord_front':
|
||||||
|
return _get_obs_landlord_front(infoset)
|
||||||
elif infoset.player_position == 'landlord_down':
|
elif infoset.player_position == 'landlord_down':
|
||||||
return _get_obs_landlord_down(infoset)
|
return _get_obs_landlord_down(infoset)
|
||||||
else:
|
else:
|
||||||
|
@ -424,18 +443,22 @@ def _cards2array(list_cards):
|
||||||
the representations.
|
the representations.
|
||||||
"""
|
"""
|
||||||
if len(list_cards) == 0:
|
if len(list_cards) == 0:
|
||||||
return np.zeros(54, dtype=np.int8)
|
return np.zeros(108, dtype=np.int8)
|
||||||
|
|
||||||
matrix = np.zeros([4, 13], dtype=np.int8)
|
matrix = np.zeros([8, 13], dtype=np.int8)
|
||||||
jokers = np.zeros(2, dtype=np.int8)
|
jokers = np.zeros(4, dtype=np.int8)
|
||||||
counter = Counter(list_cards)
|
counter = Counter(list_cards)
|
||||||
for card, num_times in counter.items():
|
for card, num_times in counter.items():
|
||||||
if card < 20:
|
if card < 20:
|
||||||
matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
|
matrix[:, Card2Column[card]] = NumOnes2Array[num_times]
|
||||||
elif card == 20:
|
elif card == 20:
|
||||||
jokers[0] = 1
|
jokers[0] = 1
|
||||||
|
if num_times == 2:
|
||||||
|
jokers[1] = 1
|
||||||
elif card == 30:
|
elif card == 30:
|
||||||
jokers[1] = 1
|
jokers[2] = 1
|
||||||
|
if num_times == 2:
|
||||||
|
jokers[3] = 1
|
||||||
return np.concatenate((matrix.flatten('F'), jokers))
|
return np.concatenate((matrix.flatten('F'), jokers))
|
||||||
|
|
||||||
|
|
||||||
|
@ -449,7 +472,7 @@ def _cards2array(list_cards):
|
||||||
# Finally, we obtain a 5x162 matrix, which will be fed
|
# Finally, we obtain a 5x162 matrix, which will be fed
|
||||||
# into LSTM for encoding.
|
# into LSTM for encoding.
|
||||||
# """
|
# """
|
||||||
# action_seq_array = np.zeros((len(action_seq_list), 54))
|
# action_seq_array = np.zeros((len(action_seq_list), 108))
|
||||||
# for row, list_cards in enumerate(action_seq_list):
|
# for row, list_cards in enumerate(action_seq_list):
|
||||||
# action_seq_array[row, :] = _cards2array(list_cards)
|
# action_seq_array[row, :] = _cards2array(list_cards)
|
||||||
# # action_seq_array = action_seq_array.reshape(5, 162)
|
# # action_seq_array = action_seq_array.reshape(5, 162)
|
||||||
|
@ -458,26 +481,26 @@ def _cards2array(list_cards):
|
||||||
def _action_seq_list2array(action_seq_list, new_model=True):
|
def _action_seq_list2array(action_seq_list, new_model=True):
|
||||||
"""
|
"""
|
||||||
A utility function to encode the historical moves.
|
A utility function to encode the historical moves.
|
||||||
We encode the historical 15 actions. If there is
|
We encode the historical 20 actions. If there is
|
||||||
no 15 actions, we pad the features with 0. Since
|
no 20 actions, we pad the features with 0. Since
|
||||||
three moves is a round in DouDizhu, we concatenate
|
three moves is a round in DouDizhu, we concatenate
|
||||||
the representations for each consecutive three moves.
|
the representations for each consecutive three moves.
|
||||||
Finally, we obtain a 5x162 matrix, which will be fed
|
Finally, we obtain a 5x432 matrix, which will be fed
|
||||||
into LSTM for encoding.
|
into LSTM for encoding.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if new_model:
|
if new_model:
|
||||||
position_map = {"landlord": 0, "landlord_up": 1, "landlord_down": 2}
|
# position_map = {"landlord": 0, "landlord_up": 1, "landlord_front": 2, "landlord_down": 3}
|
||||||
action_seq_array = np.ones((len(action_seq_list), 54)) * -1 # Default Value -1 for not using area
|
action_seq_array = np.ones((len(action_seq_list), 108)) * -1 # Default Value -1 for not using area
|
||||||
for row, list_cards in enumerate(action_seq_list):
|
for row, list_cards in enumerate(action_seq_list):
|
||||||
if list_cards != []:
|
if list_cards != []:
|
||||||
action_seq_array[row, :54] = _cards2array(list_cards[1])
|
action_seq_array[row, :108] = _cards2array(list_cards[1])
|
||||||
else:
|
else:
|
||||||
action_seq_array = np.zeros((len(action_seq_list), 54))
|
action_seq_array = np.zeros((len(action_seq_list), 108))
|
||||||
for row, list_cards in enumerate(action_seq_list):
|
for row, list_cards in enumerate(action_seq_list):
|
||||||
if list_cards != []:
|
if list_cards != []:
|
||||||
action_seq_array[row, :] = _cards2array(list_cards[1])
|
action_seq_array[row, :] = _cards2array(list_cards[1])
|
||||||
action_seq_array = action_seq_array.reshape(5, 162)
|
action_seq_array = action_seq_array.reshape(5, 432)
|
||||||
return action_seq_array
|
return action_seq_array
|
||||||
|
|
||||||
# action_seq_array = np.zeros((len(action_seq_list), 54))
|
# action_seq_array = np.zeros((len(action_seq_list), 54))
|
||||||
|
@ -487,10 +510,10 @@ def _action_seq_list2array(action_seq_list, new_model=True):
|
||||||
# return action_seq_array
|
# return action_seq_array
|
||||||
|
|
||||||
|
|
||||||
def _process_action_seq(sequence, length=15, new_model=True):
|
def _process_action_seq(sequence, length=20, new_model=True):
|
||||||
"""
|
"""
|
||||||
A utility function encoding historical moves. We
|
A utility function encoding historical moves. We
|
||||||
encode 15 moves. If there is no 15 moves, we pad
|
encode 20 moves. If there is no 20 moves, we pad
|
||||||
with zeros.
|
with zeros.
|
||||||
"""
|
"""
|
||||||
sequence = sequence[-length:].copy()
|
sequence = sequence[-length:].copy()
|
||||||
|
@ -508,8 +531,8 @@ def _get_one_hot_bomb(bomb_num):
|
||||||
A utility function to encode the number of bombs
|
A utility function to encode the number of bombs
|
||||||
into one-hot representation.
|
into one-hot representation.
|
||||||
"""
|
"""
|
||||||
one_hot = np.zeros(15)
|
one_hot = np.zeros(29)
|
||||||
one_hot[bomb_num] = 1
|
one_hot[bomb_num[0] + bomb_num[1]] = 1
|
||||||
return one_hot
|
return one_hot
|
||||||
|
|
||||||
|
|
||||||
|
@ -536,13 +559,19 @@ def _get_obs_landlord(infoset):
|
||||||
my_action_batch[j, :] = _cards2array(action)
|
my_action_batch[j, :] = _cards2array(action)
|
||||||
|
|
||||||
landlord_up_num_cards_left = _get_one_hot_array(
|
landlord_up_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord_up'], 17)
|
infoset.num_cards_left_dict['landlord_up'], 25)
|
||||||
landlord_up_num_cards_left_batch = np.repeat(
|
landlord_up_num_cards_left_batch = np.repeat(
|
||||||
landlord_up_num_cards_left[np.newaxis, :],
|
landlord_up_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
landlord_front_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord_front'], 25)
|
||||||
|
landlord_front_num_cards_left_batch = np.repeat(
|
||||||
|
landlord_front_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
landlord_down_num_cards_left = _get_one_hot_array(
|
landlord_down_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord_down'], 17)
|
infoset.num_cards_left_dict['landlord_down'], 25)
|
||||||
landlord_down_num_cards_left_batch = np.repeat(
|
landlord_down_num_cards_left_batch = np.repeat(
|
||||||
landlord_down_num_cards_left[np.newaxis, :],
|
landlord_down_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
@ -553,6 +582,12 @@ def _get_obs_landlord(infoset):
|
||||||
landlord_up_played_cards[np.newaxis, :],
|
landlord_up_played_cards[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
landlord_front_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord_front'])
|
||||||
|
landlord_front_played_cards_batch = np.repeat(
|
||||||
|
landlord_front_played_cards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
landlord_down_played_cards = _cards2array(
|
landlord_down_played_cards = _cards2array(
|
||||||
infoset.played_cards['landlord_down'])
|
infoset.played_cards['landlord_down'])
|
||||||
landlord_down_played_cards_batch = np.repeat(
|
landlord_down_played_cards_batch = np.repeat(
|
||||||
|
@ -569,8 +604,10 @@ def _get_obs_landlord(infoset):
|
||||||
other_handcards_batch,
|
other_handcards_batch,
|
||||||
last_action_batch,
|
last_action_batch,
|
||||||
landlord_up_played_cards_batch,
|
landlord_up_played_cards_batch,
|
||||||
|
landlord_front_played_cards_batch,
|
||||||
landlord_down_played_cards_batch,
|
landlord_down_played_cards_batch,
|
||||||
landlord_up_num_cards_left_batch,
|
landlord_up_num_cards_left_batch,
|
||||||
|
landlord_front_num_cards_left_batch,
|
||||||
landlord_down_num_cards_left_batch,
|
landlord_down_num_cards_left_batch,
|
||||||
bomb_num_batch,
|
bomb_num_batch,
|
||||||
my_action_batch))
|
my_action_batch))
|
||||||
|
@ -578,23 +615,25 @@ def _get_obs_landlord(infoset):
|
||||||
other_handcards,
|
other_handcards,
|
||||||
last_action,
|
last_action,
|
||||||
landlord_up_played_cards,
|
landlord_up_played_cards,
|
||||||
|
landlord_front_played_cards,
|
||||||
landlord_down_played_cards,
|
landlord_down_played_cards,
|
||||||
landlord_up_num_cards_left,
|
landlord_up_num_cards_left,
|
||||||
|
landlord_front_num_cards_left,
|
||||||
landlord_down_num_cards_left,
|
landlord_down_num_cards_left,
|
||||||
bomb_num))
|
bomb_num))
|
||||||
z = _action_seq_list2array(_process_action_seq(
|
z = _action_seq_list2array(_process_action_seq(
|
||||||
infoset.card_play_action_seq, 15, False), False)
|
infoset.card_play_action_seq, 20, False), False)
|
||||||
z_batch = np.repeat(
|
z_batch = np.repeat(
|
||||||
z[np.newaxis, :, :],
|
z[np.newaxis, :, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
obs = {
|
obs = {
|
||||||
'position': 'landlord',
|
'position': 'landlord',
|
||||||
'x_batch': x_batch.astype(np.float32),
|
'x_batch': x_batch.astype(np.float32),
|
||||||
'z_batch': z_batch.astype(np.float32),
|
'z_batch': z_batch.astype(np.float32),
|
||||||
'legal_actions': infoset.legal_actions,
|
'legal_actions': infoset.legal_actions,
|
||||||
'x_no_action': x_no_action.astype(np.int8),
|
'x_no_action': x_no_action.astype(np.int8),
|
||||||
'z': z.astype(np.int8),
|
'z': z.astype(np.int8),
|
||||||
}
|
}
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def _get_obs_landlord_up(infoset):
|
def _get_obs_landlord_up(infoset):
|
||||||
|
@ -625,7 +664,7 @@ def _get_obs_landlord_up(infoset):
|
||||||
last_landlord_action[np.newaxis, :],
|
last_landlord_action[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
landlord_num_cards_left = _get_one_hot_array(
|
landlord_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord'], 20)
|
infoset.num_cards_left_dict['landlord'], 33)
|
||||||
landlord_num_cards_left_batch = np.repeat(
|
landlord_num_cards_left_batch = np.repeat(
|
||||||
landlord_num_cards_left[np.newaxis, :],
|
landlord_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
@ -642,7 +681,7 @@ def _get_obs_landlord_up(infoset):
|
||||||
last_teammate_action[np.newaxis, :],
|
last_teammate_action[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
teammate_num_cards_left = _get_one_hot_array(
|
teammate_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord_down'], 17)
|
infoset.num_cards_left_dict['landlord_down'], 25)
|
||||||
teammate_num_cards_left_batch = np.repeat(
|
teammate_num_cards_left_batch = np.repeat(
|
||||||
teammate_num_cards_left[np.newaxis, :],
|
teammate_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
@ -653,6 +692,144 @@ def _get_obs_landlord_up(infoset):
|
||||||
teammate_played_cards[np.newaxis, :],
|
teammate_played_cards[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
last_teammate_front_action = _cards2array(
|
||||||
|
infoset.last_move_dict['landlord_front'])
|
||||||
|
last_teammate_front_action_batch = np.repeat(
|
||||||
|
last_teammate_front_action[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
teammate_front_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord_front'], 25)
|
||||||
|
teammate_front_num_cards_left_batch = np.repeat(
|
||||||
|
teammate_front_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
teammate_front_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord_front'])
|
||||||
|
teammate_front_played_cards_batch = np.repeat(
|
||||||
|
teammate_front_played_cards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
bomb_num = _get_one_hot_bomb(
|
||||||
|
infoset.bomb_num)
|
||||||
|
bomb_num_batch = np.repeat(
|
||||||
|
bomb_num[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
x_batch = np.hstack((my_handcards_batch,
|
||||||
|
other_handcards_batch,
|
||||||
|
landlord_played_cards_batch,
|
||||||
|
teammate_played_cards_batch,
|
||||||
|
teammate_front_played_cards_batch,
|
||||||
|
last_action_batch,
|
||||||
|
last_landlord_action_batch,
|
||||||
|
last_teammate_action_batch,
|
||||||
|
last_teammate_front_action_batch,
|
||||||
|
landlord_num_cards_left_batch,
|
||||||
|
teammate_num_cards_left_batch,
|
||||||
|
teammate_front_num_cards_left_batch,
|
||||||
|
bomb_num_batch,
|
||||||
|
my_action_batch))
|
||||||
|
x_no_action = np.hstack((my_handcards,
|
||||||
|
other_handcards,
|
||||||
|
landlord_played_cards,
|
||||||
|
teammate_played_cards,
|
||||||
|
teammate_front_played_cards,
|
||||||
|
last_action,
|
||||||
|
last_landlord_action,
|
||||||
|
last_teammate_action,
|
||||||
|
last_teammate_front_action,
|
||||||
|
landlord_num_cards_left,
|
||||||
|
teammate_num_cards_left,
|
||||||
|
teammate_front_num_cards_left,
|
||||||
|
bomb_num))
|
||||||
|
z = _action_seq_list2array(_process_action_seq(
|
||||||
|
infoset.card_play_action_seq, 20, False), False)
|
||||||
|
z_batch = np.repeat(
|
||||||
|
z[np.newaxis, :, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
obs = {
|
||||||
|
'position': 'landlord_up',
|
||||||
|
'x_batch': x_batch.astype(np.float32),
|
||||||
|
'z_batch': z_batch.astype(np.float32),
|
||||||
|
'legal_actions': infoset.legal_actions,
|
||||||
|
'x_no_action': x_no_action.astype(np.int8),
|
||||||
|
'z': z.astype(np.int8),
|
||||||
|
}
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _get_obs_landlord_front(infoset):
|
||||||
|
"""
|
||||||
|
Obttain the landlord_front features. See Table 5 in
|
||||||
|
https://arxiv.org/pdf/2106.06135.pdf
|
||||||
|
"""
|
||||||
|
num_legal_actions = len(infoset.legal_actions)
|
||||||
|
my_handcards = _cards2array(infoset.player_hand_cards)
|
||||||
|
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
other_handcards = _cards2array(infoset.other_hand_cards)
|
||||||
|
other_handcards_batch = np.repeat(other_handcards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
last_action = _cards2array(infoset.last_move)
|
||||||
|
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
my_action_batch = np.zeros(my_handcards_batch.shape)
|
||||||
|
for j, action in enumerate(infoset.legal_actions):
|
||||||
|
my_action_batch[j, :] = _cards2array(action)
|
||||||
|
|
||||||
|
last_landlord_action = _cards2array(
|
||||||
|
infoset.last_move_dict['landlord'])
|
||||||
|
last_landlord_action_batch = np.repeat(
|
||||||
|
last_landlord_action[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
landlord_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord'], 33)
|
||||||
|
landlord_num_cards_left_batch = np.repeat(
|
||||||
|
landlord_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
landlord_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord'])
|
||||||
|
landlord_played_cards_batch = np.repeat(
|
||||||
|
landlord_played_cards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
last_teammate_action = _cards2array(
|
||||||
|
infoset.last_move_dict['landlord_down'])
|
||||||
|
last_teammate_action_batch = np.repeat(
|
||||||
|
last_teammate_action[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
teammate_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord_down'], 25)
|
||||||
|
teammate_num_cards_left_batch = np.repeat(
|
||||||
|
teammate_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
teammate_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord_down'])
|
||||||
|
teammate_played_cards_batch = np.repeat(
|
||||||
|
teammate_played_cards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
last_teammate_front_action = _cards2array(
|
||||||
|
infoset.last_move_dict['landlord_front'])
|
||||||
|
last_teammate_front_action_batch = np.repeat(
|
||||||
|
last_teammate_front_action[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
teammate_front_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord_front'], 25)
|
||||||
|
teammate_front_num_cards_left_batch = np.repeat(
|
||||||
|
teammate_front_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
teammate_front_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord_front'])
|
||||||
|
teammate_front_played_cards_batch = np.repeat(
|
||||||
|
teammate_played_cards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
bomb_num = _get_one_hot_bomb(
|
bomb_num = _get_one_hot_bomb(
|
||||||
infoset.bomb_num)
|
infoset.bomb_num)
|
||||||
bomb_num_batch = np.repeat(
|
bomb_num_batch = np.repeat(
|
||||||
|
@ -663,36 +840,42 @@ def _get_obs_landlord_up(infoset):
|
||||||
other_handcards_batch,
|
other_handcards_batch,
|
||||||
landlord_played_cards_batch,
|
landlord_played_cards_batch,
|
||||||
teammate_played_cards_batch,
|
teammate_played_cards_batch,
|
||||||
|
teammate_front_played_cards_batch,
|
||||||
last_action_batch,
|
last_action_batch,
|
||||||
last_landlord_action_batch,
|
last_landlord_action_batch,
|
||||||
last_teammate_action_batch,
|
last_teammate_action_batch,
|
||||||
|
last_teammate_front_action_batch,
|
||||||
landlord_num_cards_left_batch,
|
landlord_num_cards_left_batch,
|
||||||
teammate_num_cards_left_batch,
|
teammate_num_cards_left_batch,
|
||||||
|
teammate_front_num_cards_left_batch,
|
||||||
bomb_num_batch,
|
bomb_num_batch,
|
||||||
my_action_batch))
|
my_action_batch))
|
||||||
x_no_action = np.hstack((my_handcards,
|
x_no_action = np.hstack((my_handcards,
|
||||||
other_handcards,
|
other_handcards,
|
||||||
landlord_played_cards,
|
landlord_played_cards,
|
||||||
teammate_played_cards,
|
teammate_played_cards,
|
||||||
|
teammate_front_played_cards,
|
||||||
last_action,
|
last_action,
|
||||||
last_landlord_action,
|
last_landlord_action,
|
||||||
last_teammate_action,
|
last_teammate_action,
|
||||||
|
last_teammate_front_action,
|
||||||
landlord_num_cards_left,
|
landlord_num_cards_left,
|
||||||
teammate_num_cards_left,
|
teammate_num_cards_left,
|
||||||
|
teammate_front_num_cards_left,
|
||||||
bomb_num))
|
bomb_num))
|
||||||
z = _action_seq_list2array(_process_action_seq(
|
z = _action_seq_list2array(_process_action_seq(
|
||||||
infoset.card_play_action_seq, 15, False), False)
|
infoset.card_play_action_seq, 20, False), False)
|
||||||
z_batch = np.repeat(
|
z_batch = np.repeat(
|
||||||
z[np.newaxis, :, :],
|
z[np.newaxis, :, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
obs = {
|
obs = {
|
||||||
'position': 'landlord_up',
|
'position': 'landlord_front',
|
||||||
'x_batch': x_batch.astype(np.float32),
|
'x_batch': x_batch.astype(np.float32),
|
||||||
'z_batch': z_batch.astype(np.float32),
|
'z_batch': z_batch.astype(np.float32),
|
||||||
'legal_actions': infoset.legal_actions,
|
'legal_actions': infoset.legal_actions,
|
||||||
'x_no_action': x_no_action.astype(np.int8),
|
'x_no_action': x_no_action.astype(np.int8),
|
||||||
'z': z.astype(np.int8),
|
'z': z.astype(np.int8),
|
||||||
}
|
}
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def _get_obs_landlord_down(infoset):
|
def _get_obs_landlord_down(infoset):
|
||||||
|
@ -723,7 +906,7 @@ def _get_obs_landlord_down(infoset):
|
||||||
last_landlord_action[np.newaxis, :],
|
last_landlord_action[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
landlord_num_cards_left = _get_one_hot_array(
|
landlord_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord'], 20)
|
infoset.num_cards_left_dict['landlord'], 33)
|
||||||
landlord_num_cards_left_batch = np.repeat(
|
landlord_num_cards_left_batch = np.repeat(
|
||||||
landlord_num_cards_left[np.newaxis, :],
|
landlord_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
@ -740,7 +923,7 @@ def _get_obs_landlord_down(infoset):
|
||||||
last_teammate_action[np.newaxis, :],
|
last_teammate_action[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
teammate_num_cards_left = _get_one_hot_array(
|
teammate_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord_up'], 17)
|
infoset.num_cards_left_dict['landlord_up'], 25)
|
||||||
teammate_num_cards_left_batch = np.repeat(
|
teammate_num_cards_left_batch = np.repeat(
|
||||||
teammate_num_cards_left[np.newaxis, :],
|
teammate_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
@ -751,10 +934,21 @@ def _get_obs_landlord_down(infoset):
|
||||||
teammate_played_cards[np.newaxis, :],
|
teammate_played_cards[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
landlord_played_cards = _cards2array(
|
last_teammate_front_action = _cards2array(
|
||||||
infoset.played_cards['landlord'])
|
infoset.last_move_dict['landlord_front'])
|
||||||
landlord_played_cards_batch = np.repeat(
|
last_teammate_front_action_batch = np.repeat(
|
||||||
landlord_played_cards[np.newaxis, :],
|
last_teammate_front_action[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
teammate_front_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord_front'], 25)
|
||||||
|
teammate_front_num_cards_left_batch = np.repeat(
|
||||||
|
teammate_front_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
teammate_front_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord_front'])
|
||||||
|
teammate_front_played_cards_batch = np.repeat(
|
||||||
|
teammate_front_played_cards[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
bomb_num = _get_one_hot_bomb(
|
bomb_num = _get_one_hot_bomb(
|
||||||
|
@ -767,36 +961,42 @@ def _get_obs_landlord_down(infoset):
|
||||||
other_handcards_batch,
|
other_handcards_batch,
|
||||||
landlord_played_cards_batch,
|
landlord_played_cards_batch,
|
||||||
teammate_played_cards_batch,
|
teammate_played_cards_batch,
|
||||||
|
teammate_front_played_cards_batch,
|
||||||
last_action_batch,
|
last_action_batch,
|
||||||
last_landlord_action_batch,
|
last_landlord_action_batch,
|
||||||
last_teammate_action_batch,
|
last_teammate_action_batch,
|
||||||
|
last_teammate_front_action_batch,
|
||||||
landlord_num_cards_left_batch,
|
landlord_num_cards_left_batch,
|
||||||
teammate_num_cards_left_batch,
|
teammate_num_cards_left_batch,
|
||||||
|
teammate_front_num_cards_left_batch,
|
||||||
bomb_num_batch,
|
bomb_num_batch,
|
||||||
my_action_batch))
|
my_action_batch))
|
||||||
x_no_action = np.hstack((my_handcards,
|
x_no_action = np.hstack((my_handcards,
|
||||||
other_handcards,
|
other_handcards,
|
||||||
landlord_played_cards,
|
landlord_played_cards,
|
||||||
teammate_played_cards,
|
teammate_played_cards,
|
||||||
|
teammate_front_played_cards,
|
||||||
last_action,
|
last_action,
|
||||||
last_landlord_action,
|
last_landlord_action,
|
||||||
last_teammate_action,
|
last_teammate_action,
|
||||||
|
last_teammate_front_action,
|
||||||
landlord_num_cards_left,
|
landlord_num_cards_left,
|
||||||
teammate_num_cards_left,
|
teammate_num_cards_left,
|
||||||
|
teammate_front_num_cards_left,
|
||||||
bomb_num))
|
bomb_num))
|
||||||
z = _action_seq_list2array(_process_action_seq(
|
z = _action_seq_list2array(_process_action_seq(
|
||||||
infoset.card_play_action_seq, 15, False), False)
|
infoset.card_play_action_seq, 20, False), False)
|
||||||
z_batch = np.repeat(
|
z_batch = np.repeat(
|
||||||
z[np.newaxis, :, :],
|
z[np.newaxis, :, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
obs = {
|
obs = {
|
||||||
'position': 'landlord_down',
|
'position': 'landlord_down',
|
||||||
'x_batch': x_batch.astype(np.float32),
|
'x_batch': x_batch.astype(np.float32),
|
||||||
'z_batch': z_batch.astype(np.float32),
|
'z_batch': z_batch.astype(np.float32),
|
||||||
'legal_actions': infoset.legal_actions,
|
'legal_actions': infoset.legal_actions,
|
||||||
'x_no_action': x_no_action.astype(np.int8),
|
'x_no_action': x_no_action.astype(np.int8),
|
||||||
'z': z.astype(np.int8),
|
'z': z.astype(np.int8),
|
||||||
}
|
}
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def _get_obs_landlord_withbid(infoset):
|
def _get_obs_landlord_withbid(infoset):
|
||||||
|
@ -869,7 +1069,7 @@ def _get_obs_landlord_withbid(infoset):
|
||||||
landlord_down_num_cards_left,
|
landlord_down_num_cards_left,
|
||||||
bomb_num))
|
bomb_num))
|
||||||
z = _action_seq_list2array(_process_action_seq(
|
z = _action_seq_list2array(_process_action_seq(
|
||||||
infoset.card_play_action_seq, 15, False), False)
|
infoset.card_play_action_seq, 20, False), False)
|
||||||
z_batch = np.repeat(
|
z_batch = np.repeat(
|
||||||
z[np.newaxis, :, :],
|
z[np.newaxis, :, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
@ -970,21 +1170,21 @@ def _get_obs_general1(infoset, position):
|
||||||
bomb_num[np.newaxis, :],
|
bomb_num[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
x_batch = np.hstack((position_info_batch, # 3
|
x_batch = np.hstack((position_info_batch, # 4
|
||||||
my_handcards_batch, # 54
|
my_handcards_batch, # 108
|
||||||
other_handcards_batch, # 54
|
other_handcards_batch, # 108
|
||||||
three_landlord_cards_batch, # 54
|
three_landlord_cards_batch, # 108
|
||||||
last_action_batch, # 54
|
last_action_batch, # 108
|
||||||
landlord_played_cards_batch, # 54
|
landlord_played_cards_batch, # 108
|
||||||
landlord_up_played_cards_batch, # 54
|
landlord_up_played_cards_batch, # 108
|
||||||
landlord_down_played_cards_batch, # 54
|
landlord_down_played_cards_batch, # 108
|
||||||
landlord_num_cards_left_batch, # 20
|
landlord_num_cards_left_batch, # 20
|
||||||
landlord_up_num_cards_left_batch, # 17
|
landlord_up_num_cards_left_batch, # 17
|
||||||
landlord_down_num_cards_left_batch, # 17
|
landlord_down_num_cards_left_batch, # 17
|
||||||
bomb_num_batch, # 15
|
bomb_num_batch, # 15
|
||||||
bid_info_batch, # 12
|
bid_info_batch, # 12
|
||||||
multiply_info_batch, # 3
|
multiply_info_batch, # 3
|
||||||
my_action_batch)) # 54
|
my_action_batch)) # 108
|
||||||
x_no_action = np.hstack((position_info,
|
x_no_action = np.hstack((position_info,
|
||||||
my_handcards,
|
my_handcards,
|
||||||
other_handcards,
|
other_handcards,
|
||||||
|
@ -1025,9 +1225,10 @@ def _get_obs_general(infoset, position):
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
position_map = {
|
position_map = {
|
||||||
"landlord": [1, 0, 0],
|
"landlord": [1, 0, 0, 0],
|
||||||
"landlord_up": [0, 1, 0],
|
"landlord_up": [0, 1, 0, 0],
|
||||||
"landlord_down": [0, 0, 1]
|
"landlord_front": [0, 0, 1, 0],
|
||||||
|
"landlord_down": [0, 0, 0, 1]
|
||||||
}
|
}
|
||||||
position_info = np.array(position_map[position])
|
position_info = np.array(position_map[position])
|
||||||
position_info_batch = np.repeat(position_info[np.newaxis, :],
|
position_info_batch = np.repeat(position_info[np.newaxis, :],
|
||||||
|
@ -1041,9 +1242,9 @@ def _get_obs_general(infoset, position):
|
||||||
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
|
multiply_info_batch = np.repeat(multiply_info[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
three_landlord_cards = _cards2array(infoset.three_landlord_cards)
|
# three_landlord_cards = _cards2array(infoset.three_landlord_cards)
|
||||||
three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
|
# three_landlord_cards_batch = np.repeat(three_landlord_cards[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
# num_legal_actions, axis=0)
|
||||||
|
|
||||||
last_action = _cards2array(infoset.last_move)
|
last_action = _cards2array(infoset.last_move)
|
||||||
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
last_action_batch = np.repeat(last_action[np.newaxis, :],
|
||||||
|
@ -1054,25 +1255,31 @@ def _get_obs_general(infoset, position):
|
||||||
my_action_batch[j, :] = _cards2array(action)
|
my_action_batch[j, :] = _cards2array(action)
|
||||||
|
|
||||||
landlord_num_cards_left = _get_one_hot_array(
|
landlord_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord'], 20)
|
infoset.num_cards_left_dict['landlord'], 33)
|
||||||
landlord_num_cards_left_batch = np.repeat(
|
landlord_num_cards_left_batch = np.repeat(
|
||||||
landlord_num_cards_left[np.newaxis, :],
|
landlord_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
landlord_up_num_cards_left = _get_one_hot_array(
|
landlord_up_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord_up'], 17)
|
infoset.num_cards_left_dict['landlord_up'], 25)
|
||||||
landlord_up_num_cards_left_batch = np.repeat(
|
landlord_up_num_cards_left_batch = np.repeat(
|
||||||
landlord_up_num_cards_left[np.newaxis, :],
|
landlord_up_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
landlord_front_num_cards_left = _get_one_hot_array(
|
||||||
|
infoset.num_cards_left_dict['landlord_front'], 25)
|
||||||
|
landlord_front_num_cards_left_batch = np.repeat(
|
||||||
|
landlord_front_num_cards_left[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
landlord_down_num_cards_left = _get_one_hot_array(
|
landlord_down_num_cards_left = _get_one_hot_array(
|
||||||
infoset.num_cards_left_dict['landlord_down'], 17)
|
infoset.num_cards_left_dict['landlord_down'], 25)
|
||||||
landlord_down_num_cards_left_batch = np.repeat(
|
landlord_down_num_cards_left_batch = np.repeat(
|
||||||
landlord_down_num_cards_left[np.newaxis, :],
|
landlord_down_num_cards_left[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
other_handcards_left_list = []
|
other_handcards_left_list = []
|
||||||
for pos in ["landlord", "landlord_up", "landlord_up"]:
|
for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
|
||||||
if pos != position:
|
if pos != position:
|
||||||
other_handcards_left_list.extend(infoset.all_handcards[pos])
|
other_handcards_left_list.extend(infoset.all_handcards[pos])
|
||||||
|
|
||||||
|
@ -1088,6 +1295,12 @@ def _get_obs_general(infoset, position):
|
||||||
landlord_up_played_cards[np.newaxis, :],
|
landlord_up_played_cards[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
|
landlord_front_played_cards = _cards2array(
|
||||||
|
infoset.played_cards['landlord_front'])
|
||||||
|
landlord_front_played_cards_batch = np.repeat(
|
||||||
|
landlord_front_played_cards[np.newaxis, :],
|
||||||
|
num_legal_actions, axis=0)
|
||||||
|
|
||||||
landlord_down_played_cards = _cards2array(
|
landlord_down_played_cards = _cards2array(
|
||||||
infoset.played_cards['landlord_down'])
|
infoset.played_cards['landlord_down'])
|
||||||
landlord_down_played_cards_batch = np.repeat(
|
landlord_down_played_cards_batch = np.repeat(
|
||||||
|
@ -1100,24 +1313,26 @@ def _get_obs_general(infoset, position):
|
||||||
bomb_num[np.newaxis, :],
|
bomb_num[np.newaxis, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
num_cards_left = np.hstack((
|
num_cards_left = np.hstack((
|
||||||
landlord_num_cards_left, # 20
|
landlord_num_cards_left, # 33
|
||||||
landlord_up_num_cards_left, # 17
|
landlord_up_num_cards_left, # 25
|
||||||
|
landlord_front_num_cards_left, # 25
|
||||||
landlord_down_num_cards_left))
|
landlord_down_num_cards_left))
|
||||||
|
|
||||||
x_batch = np.hstack((
|
x_batch = np.hstack((
|
||||||
bid_info_batch, # 12
|
bid_info_batch, # 16
|
||||||
multiply_info_batch)) # 3
|
multiply_info_batch)) # 4
|
||||||
x_no_action = np.hstack((
|
x_no_action = np.hstack((
|
||||||
bid_info,
|
bid_info,
|
||||||
multiply_info))
|
multiply_info))
|
||||||
z =np.vstack((
|
z =np.vstack((
|
||||||
num_cards_left,
|
num_cards_left,
|
||||||
my_handcards, # 54
|
my_handcards, # 108
|
||||||
other_handcards, # 54
|
other_handcards, # 108
|
||||||
three_landlord_cards, # 54
|
# three_landlord_cards, # 108
|
||||||
landlord_played_cards, # 54
|
landlord_played_cards, # 108
|
||||||
landlord_up_played_cards, # 54
|
landlord_up_played_cards, # 108
|
||||||
landlord_down_played_cards, # 54
|
landlord_front_played_cards, # 108
|
||||||
|
landlord_down_played_cards, # 108
|
||||||
_action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32))
|
_action_seq_list2array(_process_action_seq(infoset.card_play_action_seq, 32))
|
||||||
))
|
))
|
||||||
|
|
||||||
|
@ -1125,7 +1340,7 @@ def _get_obs_general(infoset, position):
|
||||||
z[np.newaxis, :, :],
|
z[np.newaxis, :, :],
|
||||||
num_legal_actions, axis=0)
|
num_legal_actions, axis=0)
|
||||||
my_action_batch = my_action_batch[:,np.newaxis,:]
|
my_action_batch = my_action_batch[:,np.newaxis,:]
|
||||||
z_batch = np.zeros([len(_z_batch),40,54],int)
|
z_batch = np.zeros([len(_z_batch),40,108],int)
|
||||||
for i in range(0,len(_z_batch)):
|
for i in range(0,len(_z_batch)):
|
||||||
z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
|
z_batch[i] = np.vstack((my_action_batch[i],_z_batch[i]))
|
||||||
obs = {
|
obs = {
|
||||||
|
@ -1139,17 +1354,17 @@ def _get_obs_general(infoset, position):
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def gen_bid_legal_actions(player_id, bid_info):
|
def gen_bid_legal_actions(player_id, bid_info):
|
||||||
self_bid_info = bid_info[:, [(player_id - 1) % 3, player_id, (player_id + 1) % 3]]
|
self_bid_info = bid_info[:, [(player_id - 1) % 4, player_id, (player_id + 1) % 4, (player_id + 2) % 4]]
|
||||||
curr_round = -1
|
curr_round = -1
|
||||||
for r in range(4):
|
for r in range(5):
|
||||||
if -1 in self_bid_info[r]:
|
if -1 in self_bid_info[r]:
|
||||||
curr_round = r
|
curr_round = r
|
||||||
break
|
break
|
||||||
bid_actions = []
|
bid_actions = []
|
||||||
if curr_round != -1:
|
if curr_round != -1:
|
||||||
self_bid_info[curr_round] = [0, 0, 0]
|
self_bid_info[curr_round] = [0, 0, 0, 0]
|
||||||
bid_actions.append(np.array(self_bid_info).flatten())
|
bid_actions.append(np.array(self_bid_info).flatten())
|
||||||
self_bid_info[curr_round] = [0, 1, 0]
|
self_bid_info[curr_round] = [0, 1, 0, 0]
|
||||||
bid_actions.append(np.array(self_bid_info).flatten())
|
bid_actions.append(np.array(self_bid_info).flatten())
|
||||||
return np.array(bid_actions)
|
return np.array(bid_actions)
|
||||||
|
|
||||||
|
@ -1273,9 +1488,9 @@ def _get_obs_for_bid_legacy(player_id, bid_info, hand_cards):
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def _get_obs_for_bid(player_id, bid_info, hand_cards):
|
def _get_obs_for_bid(player_id, bid_info, hand_cards):
|
||||||
all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
# all_cards = [3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||||
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
|
# 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
|
||||||
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
|
# 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 17, 17, 17, 17, 20, 30]
|
||||||
num_legal_actions = 2
|
num_legal_actions = 2
|
||||||
my_handcards = _cards2array(hand_cards)
|
my_handcards = _cards2array(hand_cards)
|
||||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||||
|
|
|
@ -11,10 +11,19 @@ RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
|
||||||
'8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
|
'8': 8, '9': 9, '10': 10, 'J': 11, 'Q': 12,
|
||||||
'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
|
'K': 13, 'A': 14, '2': 17, 'X': 20, 'D': 30}
|
||||||
|
|
||||||
bombs = [[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6],
|
bombs = [
|
||||||
[7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9], [10, 10, 10, 10],
|
[[3, 3, 3, 3, 3, 3], [4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7],
|
||||||
[11, 11, 11, 11], [12, 12, 12, 12], [13, 13, 13, 13], [14, 14, 14, 14],
|
[8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9], [10, 10, 10, 10, 10, 10], [11, 11, 11, 11, 11, 11],
|
||||||
[17, 17, 17, 17], [20, 30]]
|
[12, 12, 12, 12, 12, 12], [13, 13, 13, 13, 13, 13], [14, 14, 14, 14, 14, 14], [17, 17, 17, 17, 17, 17],
|
||||||
|
[3, 3, 3, 3, 3, 3, 3], [4, 4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7, 7],
|
||||||
|
[8, 8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9, 9], [10, 10, 10, 10, 10, 10, 10], [11, 11, 11, 11, 11, 11, 11],
|
||||||
|
[12, 12, 12, 12, 12, 12, 12], [13, 13, 13, 13, 13, 13, 13], [14, 14, 14, 14, 14, 14, 14],
|
||||||
|
[17, 17, 17, 17, 17, 17, 17]],
|
||||||
|
[[3, 3, 3, 3, 3, 3, 3, 3], [4, 4, 4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6, 6, 6],
|
||||||
|
[7, 7, 7, 7, 7, 7, 7, 7], [8, 8, 8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9, 9, 9], [10, 10, 10, 10, 10, 10, 10, 10],
|
||||||
|
[11, 11, 11, 11, 11, 11, 11, 11], [12, 12, 12, 12, 12, 12, 12, 12], [13, 13, 13, 13, 13, 13, 13, 13],
|
||||||
|
[14, 14, 14, 14, 14, 14, 14, 14], [17, 17, 17, 17, 17, 17, 17, 17],
|
||||||
|
[20, 20, 30, 30]]]
|
||||||
|
|
||||||
class GameEnv(object):
|
class GameEnv(object):
|
||||||
|
|
||||||
|
@ -22,7 +31,7 @@ class GameEnv(object):
|
||||||
|
|
||||||
self.card_play_action_seq = []
|
self.card_play_action_seq = []
|
||||||
|
|
||||||
self.three_landlord_cards = None
|
# self.three_landlord_cards = None
|
||||||
self.game_over = False
|
self.game_over = False
|
||||||
|
|
||||||
self.acting_player_position = None
|
self.acting_player_position = None
|
||||||
|
@ -32,10 +41,12 @@ class GameEnv(object):
|
||||||
|
|
||||||
self.last_move_dict = {'landlord': [],
|
self.last_move_dict = {'landlord': [],
|
||||||
'landlord_up': [],
|
'landlord_up': [],
|
||||||
|
'landlord_front': [],
|
||||||
'landlord_down': []}
|
'landlord_down': []}
|
||||||
|
|
||||||
self.played_cards = {'landlord': [],
|
self.played_cards = {'landlord': [],
|
||||||
'landlord_up': [],
|
'landlord_up': [],
|
||||||
|
'landlord_front': [],
|
||||||
'landlord_down': []}
|
'landlord_down': []}
|
||||||
|
|
||||||
self.last_move = []
|
self.last_move = []
|
||||||
|
@ -48,24 +59,28 @@ class GameEnv(object):
|
||||||
'farmer': 0}
|
'farmer': 0}
|
||||||
|
|
||||||
self.info_sets = {'landlord': InfoSet('landlord'),
|
self.info_sets = {'landlord': InfoSet('landlord'),
|
||||||
'landlord_up': InfoSet('landlord_up'),
|
'landlord_up': InfoSet('landlord_up'),
|
||||||
'landlord_down': InfoSet('landlord_down')}
|
'landlord_front': InfoSet('landlord_front'),
|
||||||
|
'landlord_down': InfoSet('landlord_down')}
|
||||||
|
|
||||||
self.bomb_num = 0
|
self.bomb_num = [0, 0]
|
||||||
self.pos_bomb_num = {
|
self.pos_bomb_num = {
|
||||||
"landlord": 0,
|
"landlord": 0,
|
||||||
"landlord_up": 0,
|
"landlord_up": 0,
|
||||||
|
"landlord_front": 0,
|
||||||
"landlord_down": 0
|
"landlord_down": 0
|
||||||
}
|
}
|
||||||
self.last_pid = 'landlord'
|
self.last_pid = 'landlord'
|
||||||
|
|
||||||
self.bid_info = [[-1, -1, -1],
|
self.bid_info = [[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1]]
|
[-1, -1, -1, -1],
|
||||||
|
[-1, -1, -1, -1]]
|
||||||
self.bid_count = 0
|
self.bid_count = 0
|
||||||
self.multiply_count = {'landlord': 0,
|
self.multiply_count = {'landlord': 0,
|
||||||
'landlord_up': 0,
|
'landlord_up': 0,
|
||||||
|
'landlord_front': 0,
|
||||||
'landlord_down': 0}
|
'landlord_down': 0}
|
||||||
self.step_count = 0
|
self.step_count = 0
|
||||||
|
|
||||||
|
@ -75,9 +90,11 @@ class GameEnv(object):
|
||||||
card_play_data['landlord']
|
card_play_data['landlord']
|
||||||
self.info_sets['landlord_up'].player_hand_cards = \
|
self.info_sets['landlord_up'].player_hand_cards = \
|
||||||
card_play_data['landlord_up']
|
card_play_data['landlord_up']
|
||||||
|
self.info_sets['landlord_front'].player_hand_cards = \
|
||||||
|
card_play_data['landlord_front']
|
||||||
self.info_sets['landlord_down'].player_hand_cards = \
|
self.info_sets['landlord_down'].player_hand_cards = \
|
||||||
card_play_data['landlord_down']
|
card_play_data['landlord_down']
|
||||||
self.three_landlord_cards = card_play_data['three_landlord_cards']
|
# self.three_landlord_cards = card_play_data['three_landlord_cards']
|
||||||
self.get_acting_player_position()
|
self.get_acting_player_position()
|
||||||
self.game_infoset = self.get_infoset()
|
self.game_infoset = self.get_infoset()
|
||||||
|
|
||||||
|
@ -85,6 +102,7 @@ class GameEnv(object):
|
||||||
def game_done(self):
|
def game_done(self):
|
||||||
if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
|
if len(self.info_sets['landlord'].player_hand_cards) == 0 or \
|
||||||
len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
|
len(self.info_sets['landlord_up'].player_hand_cards) == 0 or \
|
||||||
|
len(self.info_sets['landlord_front'].player_hand_cards) == 0 or \
|
||||||
len(self.info_sets['landlord_down'].player_hand_cards) == 0:
|
len(self.info_sets['landlord_down'].player_hand_cards) == 0:
|
||||||
# if one of the three players discards his hand,
|
# if one of the three players discards his hand,
|
||||||
# then game is over.
|
# then game is over.
|
||||||
|
@ -96,21 +114,21 @@ class GameEnv(object):
|
||||||
def compute_player_utility(self):
|
def compute_player_utility(self):
|
||||||
|
|
||||||
if len(self.info_sets['landlord'].player_hand_cards) == 0:
|
if len(self.info_sets['landlord'].player_hand_cards) == 0:
|
||||||
self.player_utility_dict = {'landlord': 2,
|
self.player_utility_dict = {'landlord': 3,
|
||||||
'farmer': -1}
|
'farmer': -1}
|
||||||
else:
|
else:
|
||||||
self.player_utility_dict = {'landlord': -2,
|
self.player_utility_dict = {'landlord': -3,
|
||||||
'farmer': 1}
|
'farmer': 1}
|
||||||
|
|
||||||
def update_num_wins_scores(self):
|
def update_num_wins_scores(self):
|
||||||
for pos, utility in self.player_utility_dict.items():
|
for pos, utility in self.player_utility_dict.items():
|
||||||
base_score = 2 if pos == 'landlord' else 1
|
base_score = 3 if pos == 'landlord' else 1
|
||||||
if utility > 0:
|
if utility > 0:
|
||||||
self.num_wins[pos] += 1
|
self.num_wins[pos] += 1
|
||||||
self.winner = pos
|
self.winner = pos
|
||||||
self.num_scores[pos] += base_score * (2 ** self.bomb_num)
|
self.num_scores[pos] += base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
|
||||||
else:
|
else:
|
||||||
self.num_scores[pos] -= base_score * (2 ** self.bomb_num)
|
self.num_scores[pos] -= base_score * (2 ** self.bomb_num[0]) * (3 ** self.bomb_num[1])
|
||||||
|
|
||||||
def get_winner(self):
|
def get_winner(self):
|
||||||
return self.winner
|
return self.winner
|
||||||
|
@ -121,12 +139,17 @@ class GameEnv(object):
|
||||||
def step(self):
|
def step(self):
|
||||||
action = self.players[self.acting_player_position].act(
|
action = self.players[self.acting_player_position].act(
|
||||||
self.game_infoset)
|
self.game_infoset)
|
||||||
self.step_count += 1
|
assert action in self.game_infoset.legal_actions
|
||||||
|
|
||||||
if len(action) > 0:
|
if len(action) > 0:
|
||||||
self.last_pid = self.acting_player_position
|
self.last_pid = self.acting_player_position
|
||||||
|
|
||||||
if action in bombs:
|
if action in bombs[0]:
|
||||||
self.bomb_num += 1
|
self.bomb_num[0] += 1
|
||||||
|
self.pos_bomb_num[self.acting_player_position] += 1
|
||||||
|
|
||||||
|
if action in bombs[1]:
|
||||||
|
self.bomb_num[1] += 1
|
||||||
self.pos_bomb_num[self.acting_player_position] += 1
|
self.pos_bomb_num[self.acting_player_position] += 1
|
||||||
|
|
||||||
self.last_move_dict[
|
self.last_move_dict[
|
||||||
|
@ -137,15 +160,15 @@ class GameEnv(object):
|
||||||
|
|
||||||
self.played_cards[self.acting_player_position] += action
|
self.played_cards[self.acting_player_position] += action
|
||||||
|
|
||||||
if self.acting_player_position == 'landlord' and \
|
# if self.acting_player_position == 'landlord' and \
|
||||||
len(action) > 0 and \
|
# len(action) > 0 and \
|
||||||
len(self.three_landlord_cards) > 0:
|
# len(self.three_landlord_cards) > 0:
|
||||||
for card in action:
|
# for card in action:
|
||||||
if len(self.three_landlord_cards) > 0:
|
# if len(self.three_landlord_cards) > 0:
|
||||||
if card in self.three_landlord_cards:
|
# if card in self.three_landlord_cards:
|
||||||
self.three_landlord_cards.remove(card)
|
# self.three_landlord_cards.remove(card)
|
||||||
else:
|
# else:
|
||||||
break
|
# break
|
||||||
|
|
||||||
self.game_done()
|
self.game_done()
|
||||||
if not self.game_over:
|
if not self.game_over:
|
||||||
|
@ -156,7 +179,7 @@ class GameEnv(object):
|
||||||
def get_last_move(self):
|
def get_last_move(self):
|
||||||
last_move = []
|
last_move = []
|
||||||
if len(self.card_play_action_seq) != 0:
|
if len(self.card_play_action_seq) != 0:
|
||||||
if len(self.card_play_action_seq[-1][1]) == 0:
|
if len(self.card_play_action_seq[-1]) == 0:
|
||||||
last_move = self.card_play_action_seq[-2][1]
|
last_move = self.card_play_action_seq[-2][1]
|
||||||
else:
|
else:
|
||||||
last_move = self.card_play_action_seq[-1][1]
|
last_move = self.card_play_action_seq[-1][1]
|
||||||
|
@ -166,7 +189,7 @@ class GameEnv(object):
|
||||||
def get_last_two_moves(self):
|
def get_last_two_moves(self):
|
||||||
last_two_moves = [[], []]
|
last_two_moves = [[], []]
|
||||||
for card in self.card_play_action_seq[-2:]:
|
for card in self.card_play_action_seq[-2:]:
|
||||||
last_two_moves.insert(0, card[1])
|
last_two_moves.insert(0, card)
|
||||||
last_two_moves = last_two_moves[:2]
|
last_two_moves = last_two_moves[:2]
|
||||||
return last_two_moves
|
return last_two_moves
|
||||||
|
|
||||||
|
@ -179,6 +202,9 @@ class GameEnv(object):
|
||||||
self.acting_player_position = 'landlord_down'
|
self.acting_player_position = 'landlord_down'
|
||||||
|
|
||||||
elif self.acting_player_position == 'landlord_down':
|
elif self.acting_player_position == 'landlord_down':
|
||||||
|
self.acting_player_position = 'landlord_front'
|
||||||
|
|
||||||
|
elif self.acting_player_position == 'landlord_front':
|
||||||
self.acting_player_position = 'landlord_up'
|
self.acting_player_position = 'landlord_up'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -202,7 +228,10 @@ class GameEnv(object):
|
||||||
rival_move = []
|
rival_move = []
|
||||||
if len(action_sequence) != 0:
|
if len(action_sequence) != 0:
|
||||||
if len(action_sequence[-1][1]) == 0:
|
if len(action_sequence[-1][1]) == 0:
|
||||||
rival_move = action_sequence[-2][1]
|
if len(action_sequence[-2][1]) == 0:
|
||||||
|
rival_move = action_sequence[-3][1]
|
||||||
|
else:
|
||||||
|
rival_move = action_sequence[-2][1]
|
||||||
else:
|
else:
|
||||||
rival_move = action_sequence[-1][1]
|
rival_move = action_sequence[-1][1]
|
||||||
|
|
||||||
|
@ -227,15 +256,36 @@ class GameEnv(object):
|
||||||
moves = ms.filter_type_3_triple(all_moves, rival_move)
|
moves = ms.filter_type_3_triple(all_moves, rival_move)
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_4_BOMB:
|
elif rival_move_type == md.TYPE_4_BOMB:
|
||||||
all_moves = mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
|
all_moves = mg.gen_type_4_bomb(4)
|
||||||
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
||||||
|
all_moves += mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
||||||
|
|
||||||
|
elif rival_move_type == md.TYPE_4_BOMB5:
|
||||||
|
all_moves = mg.gen_type_4_bomb(5)
|
||||||
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
||||||
|
all_moves += mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
||||||
|
|
||||||
|
elif rival_move_type == md.TYPE_4_BOMB6:
|
||||||
|
all_moves = mg.gen_type_4_bomb(6)
|
||||||
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
||||||
|
all_moves += mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
||||||
|
|
||||||
|
elif rival_move_type == md.TYPE_4_BOMB7:
|
||||||
|
all_moves = mg.gen_type_4_bomb(7)
|
||||||
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
||||||
|
all_moves += mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
||||||
|
|
||||||
|
elif rival_move_type == md.TYPE_4_BOMB8:
|
||||||
|
all_moves = mg.gen_type_4_bomb(8)
|
||||||
|
moves = ms.filter_type_4_bomb(all_moves, rival_move)
|
||||||
|
all_moves += mg.gen_type_5_king_bomb()
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_5_KING_BOMB:
|
elif rival_move_type == md.TYPE_5_KING_BOMB:
|
||||||
moves = []
|
moves = []
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_6_3_1:
|
# elif rival_move_type == md.TYPE_6_3_1:
|
||||||
all_moves = mg.gen_type_6_3_1()
|
# all_moves = mg.gen_type_6_3_1()
|
||||||
moves = ms.filter_type_6_3_1(all_moves, rival_move)
|
# moves = ms.filter_type_6_3_1(all_moves, rival_move)
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_7_3_2:
|
elif rival_move_type == md.TYPE_7_3_2:
|
||||||
all_moves = mg.gen_type_7_3_2()
|
all_moves = mg.gen_type_7_3_2()
|
||||||
|
@ -253,25 +303,24 @@ class GameEnv(object):
|
||||||
all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
|
all_moves = mg.gen_type_10_serial_triple(repeat_num=rival_move_len)
|
||||||
moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
|
moves = ms.filter_type_10_serial_triple(all_moves, rival_move)
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_11_SERIAL_3_1:
|
# elif rival_move_type == md.TYPE_11_SERIAL_3_1:
|
||||||
all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
|
# all_moves = mg.gen_type_11_serial_3_1(repeat_num=rival_move_len)
|
||||||
moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
|
# moves = ms.filter_type_11_serial_3_1(all_moves, rival_move)
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_12_SERIAL_3_2:
|
elif rival_move_type == md.TYPE_12_SERIAL_3_2:
|
||||||
all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
|
all_moves = mg.gen_type_12_serial_3_2(repeat_num=rival_move_len)
|
||||||
moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
|
moves = ms.filter_type_12_serial_3_2(all_moves, rival_move)
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_13_4_2:
|
# elif rival_move_type == md.TYPE_13_4_2:
|
||||||
all_moves = mg.gen_type_13_4_2()
|
# all_moves = mg.gen_type_13_4_2()
|
||||||
moves = ms.filter_type_13_4_2(all_moves, rival_move)
|
# moves = ms.filter_type_13_4_2(all_moves, rival_move)
|
||||||
|
|
||||||
elif rival_move_type == md.TYPE_14_4_22:
|
# elif rival_move_type == md.TYPE_14_4_22:
|
||||||
all_moves = mg.gen_type_14_4_22()
|
# all_moves = mg.gen_type_14_4_22()
|
||||||
moves = ms.filter_type_14_4_22(all_moves, rival_move)
|
# moves = ms.filter_type_14_4_22(all_moves, rival_move)
|
||||||
|
|
||||||
if rival_move_type not in [md.TYPE_0_PASS,
|
if rival_move_type != md.TYPE_0_PASS and rival_move_type < md.TYPE_4_BOMB:
|
||||||
md.TYPE_4_BOMB, md.TYPE_5_KING_BOMB]:
|
moves = moves + mg.gen_type_4_bomb(4) + mg.gen_type_4_bomb(5) + mg.gen_type_4_bomb(6) + mg.gen_type_4_bomb(7) + mg.gen_type_4_bomb(8) + mg.gen_type_5_king_bomb()
|
||||||
moves = moves + mg.gen_type_4_bomb() + mg.gen_type_5_king_bomb()
|
|
||||||
|
|
||||||
if len(rival_move) != 0: # rival_move is not 'pass'
|
if len(rival_move) != 0: # rival_move is not 'pass'
|
||||||
moves = moves + [[]]
|
moves = moves + [[]]
|
||||||
|
@ -284,7 +333,7 @@ class GameEnv(object):
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.card_play_action_seq = []
|
self.card_play_action_seq = []
|
||||||
|
|
||||||
self.three_landlord_cards = None
|
# self.three_landlord_cards = None
|
||||||
self.game_over = False
|
self.game_over = False
|
||||||
|
|
||||||
self.acting_player_position = None
|
self.acting_player_position = None
|
||||||
|
@ -292,33 +341,40 @@ class GameEnv(object):
|
||||||
|
|
||||||
self.last_move_dict = {'landlord': [],
|
self.last_move_dict = {'landlord': [],
|
||||||
'landlord_up': [],
|
'landlord_up': [],
|
||||||
|
'landlord_front': [],
|
||||||
'landlord_down': []}
|
'landlord_down': []}
|
||||||
|
|
||||||
self.played_cards = {'landlord': [],
|
self.played_cards = {'landlord': [],
|
||||||
'landlord_up': [],
|
'landlord_up': [],
|
||||||
|
'landlord_front': [],
|
||||||
'landlord_down': []}
|
'landlord_down': []}
|
||||||
|
|
||||||
self.last_move = []
|
self.last_move = []
|
||||||
self.last_two_moves = []
|
self.last_two_moves = []
|
||||||
|
|
||||||
self.info_sets = {'landlord': InfoSet('landlord'),
|
self.info_sets = {'landlord': InfoSet('landlord'),
|
||||||
'landlord_up': InfoSet('landlord_up'),
|
'landlord_up': InfoSet('landlord_up'),
|
||||||
'landlord_down': InfoSet('landlord_down')}
|
'landlord_front': InfoSet('landlord_front'),
|
||||||
|
'landlord_down': InfoSet('landlord_down')}
|
||||||
|
|
||||||
self.bomb_num = 0
|
self.bomb_num = [0, 0]
|
||||||
self.pos_bomb_num = {
|
self.pos_bomb_num = {
|
||||||
"landlord": 0,
|
"landlord": 0,
|
||||||
"landlord_up": 0,
|
"landlord_up": 0,
|
||||||
|
"landlord_front": 0,
|
||||||
"landlord_down": 0
|
"landlord_down": 0
|
||||||
}
|
}
|
||||||
self.last_pid = 'landlord'
|
self.last_pid = 'landlord'
|
||||||
self.bid_info = [[-1, -1, -1],
|
|
||||||
[-1, -1, -1],
|
self.bid_info = [[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1]]
|
[-1, -1, -1, -1],
|
||||||
|
[-1, -1, -1, -1],
|
||||||
|
[-1, -1, -1, -1]]
|
||||||
self.bid_count = 0
|
self.bid_count = 0
|
||||||
self.multiply_count = {'landlord': 0,
|
self.multiply_count = {'landlord': 0,
|
||||||
'landlord_up': 0,
|
'landlord_up': 0,
|
||||||
|
'landlord_front': 0,
|
||||||
'landlord_down': 0}
|
'landlord_down': 0}
|
||||||
self.step_count = 0
|
self.step_count = 0
|
||||||
|
|
||||||
|
@ -344,10 +400,10 @@ class GameEnv(object):
|
||||||
|
|
||||||
self.info_sets[self.acting_player_position].num_cards_left_dict = \
|
self.info_sets[self.acting_player_position].num_cards_left_dict = \
|
||||||
{pos: len(self.info_sets[pos].player_hand_cards)
|
{pos: len(self.info_sets[pos].player_hand_cards)
|
||||||
for pos in ['landlord', 'landlord_up', 'landlord_down']}
|
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
|
||||||
|
|
||||||
self.info_sets[self.acting_player_position].other_hand_cards = []
|
self.info_sets[self.acting_player_position].other_hand_cards = []
|
||||||
for pos in ['landlord', 'landlord_up', 'landlord_down']:
|
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
|
||||||
if pos != self.acting_player_position:
|
if pos != self.acting_player_position:
|
||||||
self.info_sets[
|
self.info_sets[
|
||||||
self.acting_player_position].other_hand_cards += \
|
self.acting_player_position].other_hand_cards += \
|
||||||
|
@ -355,15 +411,15 @@ class GameEnv(object):
|
||||||
|
|
||||||
self.info_sets[self.acting_player_position].played_cards = \
|
self.info_sets[self.acting_player_position].played_cards = \
|
||||||
self.played_cards
|
self.played_cards
|
||||||
self.info_sets[self.acting_player_position].three_landlord_cards = \
|
# self.info_sets[self.acting_player_position].three_landlord_cards = \
|
||||||
self.three_landlord_cards
|
# self.three_landlord_cards
|
||||||
self.info_sets[self.acting_player_position].card_play_action_seq = \
|
self.info_sets[self.acting_player_position].card_play_action_seq = \
|
||||||
self.card_play_action_seq
|
self.card_play_action_seq
|
||||||
|
|
||||||
self.info_sets[
|
self.info_sets[
|
||||||
self.acting_player_position].all_handcards = \
|
self.acting_player_position].all_handcards = \
|
||||||
{pos: self.info_sets[pos].player_hand_cards
|
{pos: self.info_sets[pos].player_hand_cards
|
||||||
for pos in ['landlord', 'landlord_up', 'landlord_down']}
|
for pos in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']}
|
||||||
|
|
||||||
return deepcopy(self.info_sets[self.acting_player_position])
|
return deepcopy(self.info_sets[self.acting_player_position])
|
||||||
|
|
||||||
|
@ -379,13 +435,13 @@ class InfoSet(object):
|
||||||
self.player_position = player_position
|
self.player_position = player_position
|
||||||
# The hand cands of the current player. A list.
|
# The hand cands of the current player. A list.
|
||||||
self.player_hand_cards = None
|
self.player_hand_cards = None
|
||||||
# The number of cards left for each player. It is a dict with str-->int
|
# The number of cards left for each player. It is a dict with str-->int
|
||||||
self.num_cards_left_dict = None
|
self.num_cards_left_dict = None
|
||||||
# The three landload cards. A list.
|
# The three landload cards. A list.
|
||||||
self.three_landlord_cards = None
|
# self.three_landlord_cards = None
|
||||||
# The historical moves. It is a list of list
|
# The historical moves. It is a list of list
|
||||||
self.card_play_action_seq = None
|
self.card_play_action_seq = None
|
||||||
# The union of the hand cards of the other two players for the current player
|
# The union of the hand cards of the other two players for the current player
|
||||||
self.other_hand_cards = None
|
self.other_hand_cards = None
|
||||||
# The legal actions for the current move. It is a list of list
|
# The legal actions for the current move. It is a list of list
|
||||||
self.legal_actions = None
|
self.legal_actions = None
|
||||||
|
@ -397,18 +453,19 @@ class InfoSet(object):
|
||||||
self.last_move_dict = None
|
self.last_move_dict = None
|
||||||
# The played cands so far. It is a list.
|
# The played cands so far. It is a list.
|
||||||
self.played_cards = None
|
self.played_cards = None
|
||||||
# The hand cards of all the players. It is a dict.
|
# The hand cards of all the players. It is a dict.
|
||||||
self.all_handcards = None
|
self.all_handcards = None
|
||||||
# Last player position that plays a valid move, i.e., not `pass`
|
# Last player position that plays a valid move, i.e., not `pass`
|
||||||
self.last_pid = None
|
self.last_pid = None
|
||||||
# The number of bombs played so far
|
# The number of bombs played so far
|
||||||
self.bomb_num = None
|
self.bomb_num = None
|
||||||
|
|
||||||
self.bid_info = [[-1, -1, -1],
|
self.bid_info = [[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1],
|
[-1, -1, -1, -1],
|
||||||
[-1, -1, -1]]
|
[-1, -1, -1, -1],
|
||||||
|
[-1, -1, -1, -1]]
|
||||||
|
|
||||||
self.multiply_info = [1, 0, 0]
|
self.multiply_info = [1, 0, 0, 0]
|
||||||
|
|
||||||
self.player_id = None
|
self.player_id = None
|
||||||
|
|
|
@ -91,17 +91,17 @@ class MovesGener(object):
|
||||||
self.triple_cards_moves.append([k, k, k])
|
self.triple_cards_moves.append([k, k, k])
|
||||||
return self.triple_cards_moves
|
return self.triple_cards_moves
|
||||||
|
|
||||||
def gen_type_4_bomb(self):
|
def gen_type_4_bomb(self, num = 4):
|
||||||
self.bomb_moves = []
|
self.bomb_moves = []
|
||||||
for k, v in self.cards_dict.items():
|
for k, v in self.cards_dict.items():
|
||||||
if v == 4:
|
if v == num:
|
||||||
self.bomb_moves.append([k, k, k, k])
|
self.bomb_moves.append([k] * num)
|
||||||
return self.bomb_moves
|
return self.bomb_moves
|
||||||
|
|
||||||
def gen_type_5_king_bomb(self):
|
def gen_type_5_king_bomb(self):
|
||||||
self.final_bomb_moves = []
|
self.final_bomb_moves = []
|
||||||
if 20 in self.cards_list and 30 in self.cards_list:
|
if 20 in self.cards_list and self.cards_dict[20] == 2 and 30 in self.cards_list and self.cards_dict[30] == 2:
|
||||||
self.final_bomb_moves.append([20, 30])
|
self.final_bomb_moves.append([20, 20, 30, 30])
|
||||||
return self.final_bomb_moves
|
return self.final_bomb_moves
|
||||||
|
|
||||||
def gen_type_6_3_1(self):
|
def gen_type_6_3_1(self):
|
||||||
|
@ -205,15 +205,19 @@ class MovesGener(object):
|
||||||
moves.extend(self.gen_type_1_single())
|
moves.extend(self.gen_type_1_single())
|
||||||
moves.extend(self.gen_type_2_pair())
|
moves.extend(self.gen_type_2_pair())
|
||||||
moves.extend(self.gen_type_3_triple())
|
moves.extend(self.gen_type_3_triple())
|
||||||
moves.extend(self.gen_type_4_bomb())
|
moves.extend(self.gen_type_4_bomb(4))
|
||||||
|
moves.extend(self.gen_type_4_bomb(5))
|
||||||
|
moves.extend(self.gen_type_4_bomb(6))
|
||||||
|
moves.extend(self.gen_type_4_bomb(7))
|
||||||
|
moves.extend(self.gen_type_4_bomb(8))
|
||||||
moves.extend(self.gen_type_5_king_bomb())
|
moves.extend(self.gen_type_5_king_bomb())
|
||||||
moves.extend(self.gen_type_6_3_1())
|
# moves.extend(self.gen_type_6_3_1())
|
||||||
moves.extend(self.gen_type_7_3_2())
|
moves.extend(self.gen_type_7_3_2())
|
||||||
moves.extend(self.gen_type_8_serial_single())
|
moves.extend(self.gen_type_8_serial_single())
|
||||||
moves.extend(self.gen_type_9_serial_pair())
|
moves.extend(self.gen_type_9_serial_pair())
|
||||||
moves.extend(self.gen_type_10_serial_triple())
|
moves.extend(self.gen_type_10_serial_triple())
|
||||||
moves.extend(self.gen_type_11_serial_3_1())
|
# moves.extend(self.gen_type_11_serial_3_1())
|
||||||
moves.extend(self.gen_type_12_serial_3_2())
|
moves.extend(self.gen_type_12_serial_3_2())
|
||||||
moves.extend(self.gen_type_13_4_2())
|
# moves.extend(self.gen_type_13_4_2())
|
||||||
moves.extend(self.gen_type_14_4_22())
|
# moves.extend(self.gen_type_14_4_22())
|
||||||
return moves
|
return moves
|
||||||
|
|
|
@ -10,17 +10,22 @@ TYPE_0_PASS = 0
|
||||||
TYPE_1_SINGLE = 1
|
TYPE_1_SINGLE = 1
|
||||||
TYPE_2_PAIR = 2
|
TYPE_2_PAIR = 2
|
||||||
TYPE_3_TRIPLE = 3
|
TYPE_3_TRIPLE = 3
|
||||||
TYPE_4_BOMB = 4
|
TYPE_4_BOMB = 44
|
||||||
TYPE_5_KING_BOMB = 5
|
TYPE_4_BOMB5 = 45
|
||||||
|
TYPE_4_BOMB6 = 46
|
||||||
|
TYPE_4_BOMB7 = 47
|
||||||
|
TYPE_4_BOMB8 = 48
|
||||||
|
TYPE_5_KING_BOMB = 50
|
||||||
|
#TYPE_6_3_1 = 6
|
||||||
TYPE_6_3_1 = 6
|
TYPE_6_3_1 = 6
|
||||||
TYPE_7_3_2 = 7
|
TYPE_7_3_2 = 7
|
||||||
TYPE_8_SERIAL_SINGLE = 8
|
TYPE_8_SERIAL_SINGLE = 8
|
||||||
TYPE_9_SERIAL_PAIR = 9
|
TYPE_9_SERIAL_PAIR = 9
|
||||||
TYPE_10_SERIAL_TRIPLE = 10
|
TYPE_10_SERIAL_TRIPLE = 10
|
||||||
TYPE_11_SERIAL_3_1 = 11
|
# TYPE_11_SERIAL_3_1 = 11
|
||||||
TYPE_12_SERIAL_3_2 = 12
|
TYPE_12_SERIAL_3_2 = 12
|
||||||
TYPE_13_4_2 = 13
|
# TYPE_13_4_2 = 13
|
||||||
TYPE_14_4_22 = 14
|
# TYPE_14_4_22 = 14
|
||||||
TYPE_15_WRONG = 15
|
TYPE_15_WRONG = 15
|
||||||
|
|
||||||
# betting round action
|
# betting round action
|
||||||
|
|
|
@ -10,7 +10,7 @@ import BidModel
|
||||||
def load_card_play_models(card_play_model_path_dict):
|
def load_card_play_models(card_play_model_path_dict):
|
||||||
players = {}
|
players = {}
|
||||||
|
|
||||||
for position in ['landlord', 'landlord_up', 'landlord_down']:
|
for position in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']:
|
||||||
if card_play_model_path_dict[position] == 'rlcard':
|
if card_play_model_path_dict[position] == 'rlcard':
|
||||||
from .rlcard_agent import RLCardAgent
|
from .rlcard_agent import RLCardAgent
|
||||||
players[position] = RLCardAgent(position)
|
players[position] = RLCardAgent(position)
|
||||||
|
@ -43,30 +43,34 @@ def mp_simulate(card_play_data_list, card_play_model_path_dict, q, output, bid_o
|
||||||
bid_results = []
|
bid_results = []
|
||||||
bid_values = []
|
bid_values = []
|
||||||
bid_info_list = [
|
bid_info_list = [
|
||||||
np.array([[-1,-1,-1],
|
np.array([[-1,-1,-1,-1],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1]]),
|
[-1,-1,-1,-1]]),
|
||||||
np.array([[0,0,0],
|
np.array([[0,0,0,0],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1]]),
|
[-1,-1,-1,-1]]),
|
||||||
np.array([[1,0,0],
|
np.array([[1,0,0,0],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1]]),
|
[-1,-1,-1,-1]]),
|
||||||
np.array([[0,0,0],
|
np.array([[0,0,0,0],
|
||||||
[0,0,0],
|
[0,0,0,0],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1]]),
|
[-1,-1,-1,-1]]),
|
||||||
np.array([[0,0,1],
|
np.array([[0,0,1,0],
|
||||||
[1,0,0],
|
[0,0,0,1],
|
||||||
[-1,-1,-1],
|
[-1,-1,-1,-1],
|
||||||
[-1,-1,-1]]),
|
[-1,-1,-1,-1]]),
|
||||||
np.array([[0,1,0],
|
np.array([[0,1,0,0],
|
||||||
[0,0,1],
|
[0,0,1,0],
|
||||||
[1,0,0],
|
[0,0,0,1],
|
||||||
[-1,-1,-1]]),
|
[-1,-1,-1,-1]]),
|
||||||
|
np.array([[0,1,0,0],
|
||||||
|
[0,0,1,0],
|
||||||
|
[1,0,0,0],
|
||||||
|
[-1,-1,-1,-1]]),
|
||||||
]
|
]
|
||||||
for bid_info in bid_info_list:
|
for bid_info in bid_info_list:
|
||||||
bid_obs = douzero.env.env._get_obs_for_bid(1, bid_info, card_play_data["landlord"])
|
bid_obs = douzero.env.env._get_obs_for_bid(1, bid_info, card_play_data["landlord"])
|
||||||
|
@ -82,6 +86,7 @@ def mp_simulate(card_play_data_list, card_play_model_path_dict, q, output, bid_o
|
||||||
print("\nStart ------- " + title)
|
print("\nStart ------- " + title)
|
||||||
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord"]]))
|
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord"]]))
|
||||||
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_down"]]))
|
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_down"]]))
|
||||||
|
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_front"]]))
|
||||||
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_up"]]))
|
print ("".join([EnvCard2RealCard[c] for c in card_play_data["landlord_up"]]))
|
||||||
# print(card_play_data)
|
# print(card_play_data)
|
||||||
count = 0
|
count = 0
|
||||||
|
@ -127,6 +132,7 @@ def evaluate(landlord, landlord_up, landlord_down, eval_data, num_workers, outpu
|
||||||
card_play_model_path_dict = {
|
card_play_model_path_dict = {
|
||||||
'landlord': landlord,
|
'landlord': landlord,
|
||||||
'landlord_up': landlord_up,
|
'landlord_up': landlord_up,
|
||||||
|
'landlord_front': landlord_up,
|
||||||
'landlord_down': landlord_down}
|
'landlord_down': landlord_down}
|
||||||
|
|
||||||
num_landlord_wins = 0
|
num_landlord_wins = 0
|
||||||
|
|
|
@ -10,6 +10,8 @@ if __name__ == '__main__':
|
||||||
default='baselines/douzero_12/landlord_weights_39762328900.ckpt')
|
default='baselines/douzero_12/landlord_weights_39762328900.ckpt')
|
||||||
parser.add_argument('--landlord_up', type=str,
|
parser.add_argument('--landlord_up', type=str,
|
||||||
default='baselines/douzero_12/landlord_up_weights_39762328900.ckpt')
|
default='baselines/douzero_12/landlord_up_weights_39762328900.ckpt')
|
||||||
|
parser.add_argument('--landlord_front', type=str,
|
||||||
|
default='baselines/douzero_12/landlord_front_weights_39762328900.ckpt')
|
||||||
parser.add_argument('--landlord_down', type=str,
|
parser.add_argument('--landlord_down', type=str,
|
||||||
default='baselines/douzero_12/landlord_down_weights_39762328900.ckpt')
|
default='baselines/douzero_12/landlord_down_weights_39762328900.ckpt')
|
||||||
parser.add_argument('--eval_data', type=str,
|
parser.add_argument('--eval_data', type=str,
|
||||||
|
@ -25,7 +27,7 @@ if __name__ == '__main__':
|
||||||
if args.output or args.bid:
|
if args.output or args.bid:
|
||||||
args.num_workers = 1
|
args.num_workers = 1
|
||||||
t = 3
|
t = 3
|
||||||
frame = 3085177900
|
frame = 64000
|
||||||
adp_frame = 2511184300
|
adp_frame = 2511184300
|
||||||
# args.landlord = 'baselines/resnet_landlord_%i.ckpt' % frame
|
# args.landlord = 'baselines/resnet_landlord_%i.ckpt' % frame
|
||||||
args.landlord_up = 'baselines/resnet_landlord_up_%i.ckpt' % frame
|
args.landlord_up = 'baselines/resnet_landlord_up_%i.ckpt' % frame
|
||||||
|
@ -44,6 +46,7 @@ if __name__ == '__main__':
|
||||||
elif t == 3:
|
elif t == 3:
|
||||||
args.landlord = 'baselines/resnet_landlord_%i.ckpt' % frame
|
args.landlord = 'baselines/resnet_landlord_%i.ckpt' % frame
|
||||||
args.landlord_up = 'baselines/resnet_landlord_up_%i.ckpt' % frame
|
args.landlord_up = 'baselines/resnet_landlord_up_%i.ckpt' % frame
|
||||||
|
args.landlord_front = 'baselines/resnet_landlord_front_%i.ckpt' % frame
|
||||||
args.landlord_down = 'baselines/resnet_landlord_down_%i.ckpt' % frame
|
args.landlord_down = 'baselines/resnet_landlord_down_%i.ckpt' % frame
|
||||||
elif t == 4:
|
elif t == 4:
|
||||||
args.landlord = 'baselines/douzero_ADP/landlord.ckpt'
|
args.landlord = 'baselines/douzero_ADP/landlord.ckpt'
|
||||||
|
|
|
@ -4,9 +4,9 @@ import numpy as np
|
||||||
|
|
||||||
deck = []
|
deck = []
|
||||||
for i in range(3, 15):
|
for i in range(3, 15):
|
||||||
deck.extend([i for _ in range(4)])
|
deck.extend([i for _ in range(8)])
|
||||||
deck.extend([17 for _ in range(4)])
|
deck.extend([17 for _ in range(8)])
|
||||||
deck.extend([20, 30])
|
deck.extend([20, 20, 30, 30])
|
||||||
|
|
||||||
def get_parser():
|
def get_parser():
|
||||||
parser = argparse.ArgumentParser(description='DouZero: random data generator')
|
parser = argparse.ArgumentParser(description='DouZero: random data generator')
|
||||||
|
@ -17,10 +17,11 @@ def get_parser():
|
||||||
def generate():
|
def generate():
|
||||||
_deck = deck.copy()
|
_deck = deck.copy()
|
||||||
np.random.shuffle(_deck)
|
np.random.shuffle(_deck)
|
||||||
card_play_data = {'landlord': _deck[:20],
|
card_play_data = {'landlord': _deck[:33],
|
||||||
'landlord_up': _deck[20:37],
|
'landlord_up': _deck[33:58],
|
||||||
'landlord_down': _deck[37:54],
|
'landlord_front': _deck[58:83],
|
||||||
'three_landlord_cards': _deck[17:20],
|
'landlord_down': _deck[83:108],
|
||||||
|
# 'three_landlord_cards': _deck[25:33],
|
||||||
}
|
}
|
||||||
for key in card_play_data:
|
for key in card_play_data:
|
||||||
card_play_data[key].sort()
|
card_play_data[key].sort()
|
||||||
|
|
Loading…
Reference in New Issue