Compare commits
No commits in common. "main" and "unified" have entirely different histories.
|
@ -220,12 +220,8 @@ def train(flags):
|
|||
nonlocal frames, position_frames, stats
|
||||
while frames < flags.total_frames:
|
||||
batch = get_batch(batch_queues, position, flags, local_lock)
|
||||
if 'uni' in optimizers.keys():
|
||||
_stats = learn(position, actor_model, learner_model.get_model(position), batch,
|
||||
optimizers['uni'], flags, position_lock)
|
||||
else:
|
||||
_stats = learn(position, actor_model, learner_model.get_model(position), batch,
|
||||
optimizers[position], flags, position_lock)
|
||||
_stats = learn(position, actor_model, learner_model.get_model(position), batch,
|
||||
optimizers['uni'], flags, position_lock)
|
||||
with lock:
|
||||
for k in _stats:
|
||||
stats[k] = _stats[k]
|
||||
|
@ -275,7 +271,7 @@ def train(flags):
|
|||
if flags.old_model:
|
||||
type += 'vanilla'
|
||||
elif flags.unified_model:
|
||||
type += 'unified_v2'
|
||||
type += 'unified'
|
||||
else:
|
||||
type += 'resnet'
|
||||
try:
|
||||
|
|
|
@ -415,13 +415,13 @@ class UnifiedModelLite(nn.Module):
|
|||
self.layer2 = self._make_layer(BasicBlock, 60, 2, stride=2)#1*9*60
|
||||
self.layer3 = self._make_layer(BasicBlock, 120, 2, stride=2)#1*5*120
|
||||
# self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
||||
self.lstm = nn.LSTM(276, 224, batch_first=True)
|
||||
self.lstm = nn.LSTM(276, 128, batch_first=True)
|
||||
|
||||
self.linear1 = nn.Linear((120 * BasicBlock.expansion * 5 + 224) * 2, 2048)
|
||||
self.linear1 = nn.Linear(120 * BasicBlock.expansion * 5 + 128, 2048)
|
||||
self.linear2 = nn.Linear(2048, 1024)
|
||||
self.linear3 = nn.Linear(1024, 1024)
|
||||
self.linear4 = nn.Linear(1024, 512)
|
||||
self.linear5 = nn.Linear(512, 1)
|
||||
self.linear3 = nn.Linear(1024, 512)
|
||||
self.linear4 = nn.Linear(512, 256)
|
||||
self.linear5 = nn.Linear(256, 1)
|
||||
|
||||
def _make_layer(self, block, planes, num_blocks, stride):
|
||||
strides = [stride] + [1] * (num_blocks - 1)
|
||||
|
@ -458,11 +458,9 @@ class UnifiedModelLite(nn.Module):
|
|||
out = self.layer2(out)
|
||||
out = self.layer3(out)
|
||||
out = out.flatten(1,2)
|
||||
is_landlord = z[0][0][0]
|
||||
lstm_out, (h_n, _) = self.lstm(x)
|
||||
lstm_out = lstm_out[:,-1,:]
|
||||
out = torch.cat([lstm_out,out], dim=1)
|
||||
out = torch.cat([out * is_landlord, out * (1 - is_landlord)], dim=1)
|
||||
out = F.leaky_relu_(self.linear1(out))
|
||||
out = F.leaky_relu_(self.linear2(out))
|
||||
out = F.leaky_relu_(self.linear3(out))
|
||||
|
|
|
@ -123,13 +123,12 @@ def create_optimizers(flags, learner_model):
|
|||
|
||||
def infer_logic(i, device, infer_queues, model, flags, onnx_frame):
|
||||
positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down']
|
||||
device = device if device == "cpu" else ("cuda:" + str(device))
|
||||
if not flags.enable_onnx:
|
||||
if flags.unified_model:
|
||||
model.model.to(torch.device(device))
|
||||
model.model.to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
|
||||
else:
|
||||
for pos in positions:
|
||||
model.models[pos].to(torch.device(device))
|
||||
model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device))))
|
||||
last_onnx_frame = -1
|
||||
log.info('Infer %i started.', i)
|
||||
|
||||
|
|
|
@ -130,7 +130,6 @@ class Env:
|
|||
# Initialize the internal environment
|
||||
self._env = GameEnv(self.players)
|
||||
self.total_round = 0
|
||||
self.face_up_level = 0
|
||||
self.infoset = None
|
||||
|
||||
def reset(self, flags=None):
|
||||
|
@ -154,13 +153,6 @@ class Env:
|
|||
}
|
||||
for key in card_play_data:
|
||||
card_play_data[key].sort()
|
||||
rint = np.random.randint(0, 100)
|
||||
if rint < 45:
|
||||
face_up_level = 0
|
||||
elif rint < 90:
|
||||
face_up_level = 0x01
|
||||
else:
|
||||
face_up_level = 0x02
|
||||
player_ids = {
|
||||
'landlord': 0,
|
||||
'landlord_down': 1,
|
||||
|
@ -173,7 +165,6 @@ class Env:
|
|||
for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
|
||||
pid = player_ids[pos]
|
||||
self._env.info_sets[pos].player_id = pid
|
||||
self._env.info_sets[pos].face_up_level = face_up_level
|
||||
self.infoset = self._game_infoset
|
||||
|
||||
return get_obs(self.infoset, self.use_general, self.use_legacy, self.lite_model, self.use_unified)
|
||||
|
@ -590,7 +581,7 @@ def _get_obs_landlord(infoset, use_legacy = False, compressed_form = False):
|
|||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num, use_legacy, compressed_form=compressed_form)
|
||||
infoset.bomb_num, use_legacy)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
@ -705,7 +696,7 @@ def _get_obs_landlord_up(infoset, use_legacy = False, compressed_form = False):
|
|||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num, use_legacy, compressed_form=compressed_form)
|
||||
infoset.bomb_num, use_legacy)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
@ -826,7 +817,7 @@ def _get_obs_landlord_front(infoset, use_legacy = False, compressed_form = False
|
|||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num, use_legacy, compressed_form=compressed_form)
|
||||
infoset.bomb_num, use_legacy)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
@ -947,7 +938,7 @@ def _get_obs_landlord_down(infoset, use_legacy = False, compressed_form = False)
|
|||
num_legal_actions, axis=0)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num, use_legacy, compressed_form=compressed_form)
|
||||
infoset.bomb_num, use_legacy)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
@ -1031,7 +1022,7 @@ def _get_obs_general(infoset, position, compressed_form = False):
|
|||
infoset.played_cards['landlord_down'], compressed_form)
|
||||
|
||||
bomb_num = _get_one_hot_bomb(
|
||||
infoset.bomb_num, compressed_form=compressed_form)
|
||||
infoset.bomb_num)
|
||||
bomb_num_batch = np.repeat(
|
||||
bomb_num[np.newaxis, :],
|
||||
num_legal_actions, axis=0)
|
||||
|
@ -1074,7 +1065,10 @@ def _get_obs_general(infoset, position, compressed_form = False):
|
|||
}
|
||||
return obs
|
||||
|
||||
def _get_obs_unified(infoset, position, compressed_form = True):
|
||||
'''
|
||||
face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down
|
||||
'''
|
||||
def _get_obs_unified(infoset, position, compressed_form = True, face_up_level = 0):
|
||||
num_legal_actions = len(infoset.legal_actions)
|
||||
my_handcards = _cards2array(infoset.player_hand_cards, compressed_form)
|
||||
my_handcards_batch = np.repeat(my_handcards[np.newaxis, :],
|
||||
|
@ -1110,7 +1104,7 @@ def _get_obs_unified(infoset, position, compressed_form = True):
|
|||
landlord_down_played_cards = _cards2array(
|
||||
infoset.played_cards['landlord_down'], compressed_form)
|
||||
|
||||
if (infoset.face_up_level & 0x01) > 0:
|
||||
if (face_up_level & 0x01) > 0:
|
||||
three_landlord_cards = _cards2array(
|
||||
infoset.three_landlord_cards, compressed_form)
|
||||
|
||||
|
@ -1123,28 +1117,28 @@ def _get_obs_unified(infoset, position, compressed_form = True):
|
|||
three_landlord_cards_all = _cards2noise(
|
||||
infoset.three_landlord_cards_all, compressed_form)
|
||||
|
||||
if (infoset.face_up_level & 0x02) > 0:
|
||||
if (face_up_level & 0x02) > 0:
|
||||
landlord_cards = _cards2array(
|
||||
infoset.all_handcards['landlord'], compressed_form)
|
||||
else:
|
||||
landlord_cards = _cards2noise(
|
||||
infoset.all_handcards['landlord'], compressed_form)
|
||||
|
||||
if (infoset.face_up_level & 0x04) > 0:
|
||||
if (face_up_level & 0x04) > 0:
|
||||
landlord_up_cards = _cards2array(
|
||||
infoset.all_handcards['landlord_up'], compressed_form)
|
||||
else:
|
||||
landlord_up_cards = _cards2noise(
|
||||
infoset.all_handcards['landlord_up'], compressed_form)
|
||||
|
||||
if (infoset.face_up_level & 0x08) > 0:
|
||||
if (face_up_level & 0x08) > 0:
|
||||
landlord_front_cards = _cards2array(
|
||||
infoset.all_handcards['landlord_front'], compressed_form)
|
||||
else:
|
||||
landlord_front_cards = _cards2noise(
|
||||
infoset.all_handcards['landlord_front'], compressed_form)
|
||||
|
||||
if (infoset.face_up_level & 0x10) > 0:
|
||||
if (face_up_level & 0x10) > 0:
|
||||
landlord_down_cards = _cards2array(
|
||||
infoset.all_handcards['landlord_down'], compressed_form)
|
||||
else:
|
||||
|
@ -1155,7 +1149,7 @@ def _get_obs_unified(infoset, position, compressed_form = True):
|
|||
infoset.bomb_num, compressed_form=compressed_form) # 56/95
|
||||
base_info = np.hstack((
|
||||
PositionInfoArray[position], # 4
|
||||
FaceUpLevelArray[infoset.face_up_level], # 9
|
||||
FaceUpLevelArray[face_up_level], # 9
|
||||
bomb_num, #56
|
||||
))
|
||||
num_cards_left = np.hstack((
|
||||
|
|
|
@ -168,7 +168,6 @@ class GameEnv(object):
|
|||
"landlord_down": 0
|
||||
}
|
||||
self.last_pid = 'landlord'
|
||||
self.face_up_level = 0
|
||||
self.step_count = 0
|
||||
|
||||
|
||||
|
@ -187,22 +186,12 @@ class GameEnv(object):
|
|||
card_play_data['landlord_front']
|
||||
self.info_sets['landlord_down'].player_hand_cards = \
|
||||
card_play_data['landlord_down']
|
||||
if 'three_landlord_cards' not in card_play_data.keys():
|
||||
self.three_landlord_cards = card_play_data['landlord'][25:33]
|
||||
self.three_landlord_cards_all = card_play_data['landlord'][25:33]
|
||||
else:
|
||||
self.three_landlord_cards = card_play_data['three_landlord_cards'][:]
|
||||
self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:]
|
||||
if 'face_up_level' in card_play_data.keys():
|
||||
self.info_sets['landlord'].face_up_level = card_play_data['face_up_level']
|
||||
self.info_sets['landlord_up'].face_up_level = card_play_data['face_up_level']
|
||||
self.info_sets['landlord_front'].face_up_level = card_play_data['face_up_level']
|
||||
self.info_sets['landlord_down'].face_up_level = card_play_data['face_up_level']
|
||||
else:
|
||||
self.info_sets['landlord'].face_up_level = 0
|
||||
self.info_sets['landlord_up'].face_up_level = 0
|
||||
self.info_sets['landlord_front'].face_up_level = 0
|
||||
self.info_sets['landlord_down'].face_up_level = 0
|
||||
if 'three_landlord_cards' not in card_play_data.keys():
|
||||
self.three_landlord_cards = card_play_data['landlord'][25:33]
|
||||
self.three_landlord_cards_all = card_play_data['landlord'][25:33]
|
||||
else:
|
||||
self.three_landlord_cards = card_play_data['three_landlord_cards'][:]
|
||||
self.three_landlord_cards_all = card_play_data['three_landlord_cards'][:]
|
||||
self.get_acting_player_position()
|
||||
self.game_infoset = self.get_infoset()
|
||||
|
||||
|
@ -375,11 +364,6 @@ class GameEnv(object):
|
|||
'landlord_front': InfoSet('landlord_front'),
|
||||
'landlord_down': InfoSet('landlord_down')}
|
||||
|
||||
self.info_sets['landlord'].face_up_level = self.face_up_level
|
||||
self.info_sets['landlord_up'].face_up_level = self.face_up_level
|
||||
self.info_sets['landlord_front'].face_up_level = self.face_up_level
|
||||
self.info_sets['landlord_down'].face_up_level = self.face_up_level
|
||||
|
||||
self.bomb_num = [0, 0, 0]
|
||||
self.pos_bomb_num = {
|
||||
"landlord": 0,
|
||||
|
@ -472,5 +456,3 @@ class InfoSet(object):
|
|||
# The number of bombs played so far
|
||||
self.bomb_num = None
|
||||
self.player_id = None
|
||||
# face_up_level 0x01: three_landlord_cards, 0x02: landlord, 0x04: landlord_up, 0x08: landlord_front, 0x10: landlord_down
|
||||
self.face_up_level = 0
|
||||
|
|
|
@ -47,7 +47,7 @@ def battle_logic(flags, baseline : Baseline, battle : Battle):
|
|||
challenger_baseline['landlord_front_path'],
|
||||
challenger_baseline['landlord_down_path'],
|
||||
eval_data_first,
|
||||
4,
|
||||
2,
|
||||
False,
|
||||
'New')
|
||||
def _second_eval(landlord_wp, farmer_wp, landlord_adp, farmer_adp):
|
||||
|
@ -57,7 +57,7 @@ def battle_logic(flags, baseline : Baseline, battle : Battle):
|
|||
challenger_baseline['landlord_front_path'],
|
||||
challenger_baseline['landlord_down_path'],
|
||||
eval_data_second,
|
||||
4,
|
||||
2,
|
||||
False,
|
||||
'New')
|
||||
return (landlord_wp + landlord_wp_2 * 4.0) / 5, \
|
||||
|
@ -94,12 +94,10 @@ def battle_logic(flags, baseline : Baseline, battle : Battle):
|
|||
battle.status = 1 if challenge_success else 2
|
||||
battle.save()
|
||||
if not challenge_success:
|
||||
cnt = Battle.select().where(Battle.challenger_path == battle.challenger_path,Battle.status == 0).count()
|
||||
if cnt == 0:
|
||||
onnx_path = str(battle.challenger_path) + '.onnx'
|
||||
if os.path.exists(onnx_path):
|
||||
os.remove(onnx_path)
|
||||
os.remove(str(battle.challenger_path))
|
||||
onnx_path = str(battle.challenger_path) + '.onnx'
|
||||
if os.path.exists(onnx_path):
|
||||
os.remove(onnx_path)
|
||||
os.remove(str(battle.challenger_path))
|
||||
else:
|
||||
baseline_players[position_idx[battle.challenger_position]] = DeepAgent(battle.challenger_position, str(battle.challenger_path), use_onnx=True)
|
||||
|
||||
|
|
|
@ -28,10 +28,10 @@ RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
|
|||
@app.route('/upload', methods=['POST'])
|
||||
def upload():
|
||||
type = request.form.get('type')
|
||||
if type not in ['lite_resnet', 'lite_vanilla', 'legacy_vanilla', 'lite_unified', 'lite_unified_v2']:
|
||||
if type not in ['lite_resnet', 'lite_vanilla', 'legacy_vanilla', 'lite_unified']:
|
||||
return jsonify({'status': -1, 'message': 'illegal type'})
|
||||
position = request.form.get("position")
|
||||
if position != 'uni' and position not in positions:
|
||||
if position not in positions:
|
||||
return jsonify({'status': -2, 'message': 'illegal position'})
|
||||
frame = int(request.form.get("frame"))
|
||||
model_file = request.files.get('model_file')
|
||||
|
@ -42,11 +42,7 @@ def upload():
|
|||
if model is None:
|
||||
model_file.save(path)
|
||||
Model.create(path=path, position=position,type=type,frame=frame,create_time=datetime.now())
|
||||
if position == 'uni':
|
||||
for position in positions:
|
||||
Battle.create(challenger_path=path, challenger_position=position, status=0)
|
||||
else:
|
||||
Battle.create(challenger_path=path, challenger_position=position, status=0)
|
||||
Battle.create(challenger_path=path, challenger_position=position, status=0)
|
||||
return jsonify({'status': 0, 'message': 'success', 'result': ''})
|
||||
|
||||
def start_runner(flags):
|
||||
|
|
|
@ -9,15 +9,13 @@
|
|||
</head>
|
||||
<body>
|
||||
<select id='model_type'>
|
||||
<option value ="lite_unified">lite_unified</option>
|
||||
<option value ="lite_vanilla">lite_vanilla</option>
|
||||
<option value ="lite_resnet">lite_resnet</option>
|
||||
<option value ="lite_vanilla">lite_vanilla</option>
|
||||
<option value ="legacy_vanilla">legacy_vanilla</option>
|
||||
<option value ="lite_unified_v2">lite_unified_v2</option>
|
||||
</select>
|
||||
<div id="container_2" style="width: 80%; height: 400px; margin: 0 auto"></div>
|
||||
<div id="container_1" style="width: 80%; height: 400px; margin: 0 auto"></div>
|
||||
<div id="container_0" style="width: 80%; height: 400px; margin: 0 auto"></div>
|
||||
<div id="container_1" style="width: 80%; height: 400px; margin: 0 auto"></div>
|
||||
<div id="container_2" style="width: 80%; height: 400px; margin: 0 auto"></div>
|
||||
<script>
|
||||
position_map = {
|
||||
'landlord': '地主',
|
||||
|
@ -157,7 +155,7 @@
|
|||
});
|
||||
}
|
||||
$(document).ready(function () {
|
||||
load_charts("lite_unified");
|
||||
load_charts("lite_resnet");
|
||||
$("#model_type").on('change', function(){
|
||||
load_charts($("#model_type").val())
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue