import os import traceback from .orm import Baseline, Battle, Model from ..evaluation.simulation import evaluate from ..evaluation.deep_agent import DeepAgent from datetime import datetime positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down'] idx_position = {0: 'landlord', 1: 'landlord_down', 2: 'landlord_front', 3:'landlord_up'} position_idx = {'landlord': 0, 'landlord_down': 1, 'landlord_front': 2, 'landlord_up': 3} baseline_players = [None, None, None, None] def init_battlefield(flags): global baseline_players baselines = Baseline.select().order_by(Baseline.rank.desc()).limit(1) if len(baselines) >= 1: baseline = baselines[0] try: baseline_players[0] = DeepAgent('landlord', str(baseline.landlord_path), use_onnx=True) baseline_players[1] = DeepAgent('landlord_down', str(baseline.landlord_down_path), use_onnx=True) baseline_players[2] = DeepAgent('landlord_front', str(baseline.landlord_front_path), use_onnx=True) baseline_players[3] = DeepAgent('landlord_up', str(baseline.landlord_up_path), use_onnx=True) except: pass def battle_logic(flags, baseline : Baseline, battle : Battle): eval_data_first = 'eval_data_1000.pkl' eval_data_second = 'eval_data_4000.pkl' challenger_baseline = { 'landlord_path': str(baseline.landlord_path), 'landlord_up_path': str(baseline.landlord_up_path), 'landlord_front_path': str(baseline.landlord_front_path), 'landlord_down_path': str(baseline.landlord_down_path), } if not os.path.exists(str(battle.challenger_path)): battle.status = -2 battle.save() return challenger_baseline[battle.challenger_position + "_path"] = str(battle.challenger_path) print(str(battle.challenger_path)) landlord_wp, farmer_wp, landlord_adp, farmer_adp = \ evaluate(challenger_baseline['landlord_path'], challenger_baseline['landlord_up_path'], challenger_baseline['landlord_front_path'], challenger_baseline['landlord_down_path'], eval_data_first, 2, False, 'New') def _second_eval(landlord_wp, farmer_wp, landlord_adp, farmer_adp): landlord_wp_2, farmer_wp_2, landlord_adp_2, farmer_adp_2 = \ evaluate(challenger_baseline['landlord_path'], challenger_baseline['landlord_up_path'], challenger_baseline['landlord_front_path'], challenger_baseline['landlord_down_path'], eval_data_second, 2, False, 'New') return (landlord_wp + landlord_wp_2 * 4.0) / 5, \ (farmer_wp + farmer_wp_2 * 4.0) / 5, \ (landlord_adp + landlord_adp_2 * 4.0) / 5, \ (farmer_adp + farmer_adp_2 * 4.0) / 5 challenge_success = False update_threshold_landlord = 1 + flags.update_threshold update_threshold_farmer = 1 + flags.update_threshold / 3 if battle.challenger_position == 'landlord': if baseline.landlord_wp == 0 or landlord_wp / float(baseline.landlord_wp) > 1: landlord_wp, farmer_wp, landlord_adp, farmer_adp = \ _second_eval(landlord_wp, farmer_wp, landlord_adp, farmer_adp) if baseline.landlord_wp == 0 or landlord_wp / float(baseline.landlord_wp) >= update_threshold_landlord: challenge_success = True else: if baseline.farmer_wp == 0 or farmer_wp / float(baseline.farmer_wp) > 1: landlord_wp, farmer_wp, landlord_adp, farmer_adp = \ _second_eval(landlord_wp, farmer_wp, landlord_adp, farmer_adp) if baseline.farmer_wp == 0 or farmer_wp / float(baseline.farmer_wp) >= update_threshold_farmer: challenge_success = True if challenge_success: challenger_baseline['rank'] = baseline.rank + 1 challenger_baseline['landlord_wp'] = landlord_wp challenger_baseline['landlord_adp'] = landlord_adp challenger_baseline['farmer_wp'] = farmer_wp challenger_baseline['farmer_adp'] = farmer_adp challenger_baseline['create_time'] = datetime.now() Baseline.create(**challenger_baseline) battle.challenger_wp = landlord_wp if battle.challenger_position == 'landlord' else farmer_wp battle.challenger_adp = landlord_adp if battle.challenger_position == 'landlord' else farmer_adp battle.opponent_rank = baseline.rank battle.status = 1 if challenge_success else 2 battle.save() if not challenge_success: onnx_path = str(battle.challenger_path) + '.onnx' if os.path.exists(onnx_path): os.remove(onnx_path) os.remove(str(battle.challenger_path)) else: baseline_players[position_idx[battle.challenger_position]] = DeepAgent(battle.challenger_position, str(battle.challenger_path), use_onnx=True) def tick(flags): try: battles = Battle.select().where(Battle.status == 0).order_by(Battle.id.asc()).limit(1) for battle in battles: battle.status = -1 battle.save() baselines = Baseline.select().order_by(Baseline.rank.desc()).limit(1) if len(baselines) == 0: baseline = {} for position in positions: models = Model.select().where(Model.position == position).order_by(Model.create_time.desc()).limit(1) if(len(models) > 0): baseline['%s_path' % position] = models[0].path baseline_players[position_idx[position]] = DeepAgent(position, str(models[0].path), use_onnx=True) if len(baseline.keys()) == 4: baseline['rank'] = 0 baseline['landlord_wp'] = 0 baseline['farmer_wp'] = 0 baseline['landlord_adp'] = 0 baseline['farmer_adp'] = 0 baseline['create_time'] = datetime.now() Baseline.create(**baseline) baselines = Baseline.select().order_by(Baseline.rank.desc()).limit(1) battle_logic(flags, baselines[0], battle) else: battle.status = -3 battle.save() else: battle_logic(flags, baselines[0], battle) except: traceback.print_exc()