diff --git a/docs/api.md b/docs/api.md index a1a49bb..1719322 100644 --- a/docs/api.md +++ b/docs/api.md @@ -17,31 +17,31 @@ The definitions of the fields are as follows: * `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\] | type | Resource | Parameters | Description | -|------|-------------------------------------|-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------| -| GET | tournament/launch | `eval_num`, `name` | Launch tournment on the game. Each pair of models will play `eval_num` times. Results will be saved in database. | -| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters | -| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters | -| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents | -| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data | -| POST | tournament/upload\_agent | `model`(Python file), `name`, `game`, | Upload a model file. `name` is model ID | -| GET | tournament/delete\_agent | `name` | Delete the agent of the given name | -| GET | tournament/list\_uploaded\_agents | `game` | list all the uploaded agents | -| GET | tournament/list\_baseline\_agents | `game` | list all the baseline agents | -| GET | download\_examples | `name` | download the example agents | +|------|-------------------------------------|-------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| GET | tournament/launch | `num_eval_games`, `name` | Launch tournment on the game. Each pair of models will play `num_eval_games` times. Results will be saved in database. | +| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters | +| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters | +| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents | +| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data | +| POST | tournament/upload\_agent | `model`(Python file), `name`, `game`, | Upload a model file. `name` is model ID | +| GET | tournament/delete\_agent | `name` | Delete the agent of the given name | +| GET | tournament/list\_uploaded\_agents | `game` | list all the uploaded agents | +| GET | tournament/list\_baseline\_agents | `game` | list all the baseline agents | +| GET | download\_examples | `name` | download the example agents | ### Example API | API | Description | |------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------| -| http://127.0.0.1:8000/tournament/launch?eval\_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models | +| http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models | | http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game | -| http://127.0.0.1:8000/tournament/query\_game&elements\_every\_page=10&page\_index=0 | Get all the game data | -| http://127.0.0.1:8000/tournament/query\_game?name=leduc-holdem&elements\_every\_page=10&page\_index=0 | Get all the game data of Leduc Holdem | -| http://127.0.0.1:8000/tournament/query\_payoff | Get all the payoffs | -| http://127.0.0.1:8000/tournament/query\_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models | -| http://127.0.0.1:8000/tournament/query\_agent\_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem | -| http://127.0.0.1:8000/tournament/list\_uploaded\_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem | -| http://127.0.0.1:8000/tournament/list\_baseline\_agents?game=leduc-holdem | List the baseline agents of leduc-holdem | -| http://127.0.0.1:8000/tournament/download\_examples?name=example\_luduc\_nfsp\_model | Download the NFSP example model for Leduc Hold'em | +| http://127.0.0.1:8000/tournament/query_game&elements_every_page=10&page_index=0 | Get all the game data | +| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem&elements_every_page=10&page_index=0 | Get all the game data of Leduc Holdem | +| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs | +| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models | +| http://127.0.0.1:8000/tournament/query_agent_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem | +| http://127.0.0.1:8000/tournament/list_uploaded_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem | +| http://127.0.0.1:8000/tournament/list_baseline_agents?game=leduc-holdem | List the baseline agents of leduc-holdem | +| http://127.0.0.1:8000/tournament/download_examples?name=example_luduc_nfsp_model | Download the NFSP example model for Leduc Hold'em | ## Registered Models Some models have been pre-registered as baselines @@ -64,7 +64,7 @@ curl -F 'model=@example_luduc_nfsp_model.zip' -F "name=leduc-nfsp" -F "game=ledu ``` Launch the tounament with: ``` -curl 'http://127.0.0.1:8000/tournament/launch?eval_num=200&name=leduc-holdem' +curl 'http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem' ``` We list the uploaded agent with ``` diff --git a/requirements.txt b/requirements.txt index 2faf57e..820562b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,10 @@ rlcard Django tqdm -tensorflow==1.14 django-cors-headers +torch==1.6.0 +flask==1.1 +flask-cors +onnx +onnxruntime + diff --git a/server/tournament/rlcard_wrap/doudizhu_random_model.py b/server/tournament/rlcard_wrap/doudizhu_random_model.py index 70ab55c..6c20f27 100644 --- a/server/tournament/rlcard_wrap/doudizhu_random_model.py +++ b/server/tournament/rlcard_wrap/doudizhu_random_model.py @@ -21,8 +21,8 @@ class DoudizhuRandomModel(Model): ''' Load random model ''' env = rlcard.make('doudizhu') - self.agent = RandomAgent(action_num=env.action_num) - self.player_num = env.player_num + self.agent = RandomAgent(num_actions=env.num_actions) + self.num_players = env.num_players @property def agents(self): @@ -34,7 +34,7 @@ class DoudizhuRandomModel(Model): Note: Each agent should be just like RL agent with step and eval_step functioning well. ''' - return [self.agent for _ in range(self.player_num)] + return [self.agent for _ in range(self.num_players)] @property def use_raw(self): diff --git a/server/tournament/rlcard_wrap/leduc_holdem_random_model.py b/server/tournament/rlcard_wrap/leduc_holdem_random_model.py index a478222..320088a 100644 --- a/server/tournament/rlcard_wrap/leduc_holdem_random_model.py +++ b/server/tournament/rlcard_wrap/leduc_holdem_random_model.py @@ -21,8 +21,8 @@ class LeducHoldemRandomModel(Model): ''' Load random model ''' env = rlcard.make('leduc-holdem') - self.agent = RandomAgent(action_num=env.action_num) - self.player_num = env.player_num + self.agent = RandomAgent(num_actions=env.num_actions) + self.num_players = env.num_players @property def agents(self): @@ -34,7 +34,7 @@ class LeducHoldemRandomModel(Model): Note: Each agent should be just like RL agent with step and eval_step functioning well. ''' - return [self.agent for _ in range(self.player_num)] + return [self.agent for _ in range(self.num_players)] @property def use_raw(self): diff --git a/server/tournament/tournament.py b/server/tournament/tournament.py index dee6fe1..83c561b 100644 --- a/server/tournament/tournament.py +++ b/server/tournament/tournament.py @@ -7,34 +7,34 @@ from .rlcard_wrap import rlcard class Tournament(object): - def __init__(self, game, model_ids, evaluate_num=100): + def __init__(self, game, model_ids, num_eval_games=100): """ Default for two player games For Dou Dizhu, the two peasants use the same model """ self.game = game self.model_ids = model_ids - self.evaluate_num = evaluate_num + self.num_eval_games = num_eval_games # Load the models self.models = [rlcard.models.load(model_id) for model_id in model_ids] def launch(self): """ Currently for two-player game only """ - model_num = len(self.model_ids) + num_models = len(self.model_ids) games_data = [] payoffs_data = [] - for i in range(model_num): - for j in range(model_num): + for i in range(num_models): + for j in range(num_models): if j == i: continue print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j]) if self.game == 'doudizhu': agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]] names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]] - data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num) + data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.num_eval_games) elif self.game == 'leduc-holdem': agents = [self.models[i].agents[0], self.models[j].agents[1]] - data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num) + data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.num_eval_games) mean_payoff = np.mean(payoffs) print('Average payoff:', mean_payoff) print() @@ -59,16 +59,16 @@ class Tournament(object): payoffs_data.append(payoff_data) return games_data, payoffs_data -def doudizhu_tournament(game, agents, names, num): +def doudizhu_tournament(game, agents, names, num_eval_games): env = rlcard.make(game, config={'allow_raw_data': True}) env.set_agents(agents) payoffs = [] json_data = [] wins = [] - for _ in tqdm(range(num)): + for _ in tqdm(range(num_eval_games)): data = {} roles = ['landlord', 'peasant', 'peasant'] - data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)] + data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.num_players)] state, player_id = env.reset() perfect = env.get_perfect_information() data['initHands'] = perfect['hand_cards_with_suit'] @@ -77,7 +77,7 @@ def doudizhu_tournament(game, agents, names, num): current_hand_cards[i] = current_hand_cards[i].split() data['moveHistory'] = [] while not env.is_over(): - action, probs = env.agents[player_id].eval_step(state) + action, info = env.agents[player_id].eval_step(state) history = {} history['playerIdx'] = player_id if env.agents[player_id].use_raw: @@ -85,6 +85,7 @@ def doudizhu_tournament(game, agents, names, num): else: _action = env._decode_action(action) history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards) + history['info'] = info data['moveHistory'].append(history) state, player_id = env.step(action, env.agents[player_id].use_raw) @@ -115,15 +116,15 @@ def _calculate_doudizhu_move(action, player_id, current_hand_cards): break return ' '.join(cards_with_suit) -def leduc_holdem_tournament(game, agents, num): +def leduc_holdem_tournament(game, agents, num_eval_games): env = rlcard.make(game, config={'allow_raw_data': True}) env.set_agents(agents) payoffs = [] json_data = [] wins = [] - for _ in tqdm(range(num)): + for _ in tqdm(range(num_eval_games)): data = {} - data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.player_num)] + data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.num_players)] state, player_id = env.reset() perfect = env.get_perfect_information() data['initHands'] = perfect['hand_cards'] @@ -131,7 +132,7 @@ def leduc_holdem_tournament(game, agents, num): round_history = [] round_id = 0 while not env.is_over(): - action, probs = env.agents[player_id].eval_step(state) + action, info = env.agents[player_id].eval_step(state) history = {} history['playerIdx'] = player_id if env.agents[player_id].use_raw: @@ -139,16 +140,7 @@ def leduc_holdem_tournament(game, agents, num): else: history['move'] = env._decode_action(action) - probabilities = [] - for i, a in enumerate(env.actions): - if len(probs) == 0: - p = -2 - elif a in state['raw_legal_actions']: - p = probs[i] - else: - p = -1 - probabilities.append({'move':a, 'probability': p}) - history['probabilities'] = probabilities + history['info'] = info round_history.append(history) state, player_id = env.step(action, env.agents[player_id].use_raw) perfect = env.get_perfect_information() diff --git a/server/tournament/views.py b/server/tournament/views.py index 90f20ff..abed524 100644 --- a/server/tournament/views.py +++ b/server/tournament/views.py @@ -102,13 +102,13 @@ def query_agent_payoff(request): def launch(request): if request.method == 'GET': try: - eval_num = int(request.GET['eval_num']) + num_eval_games = int(request.GET['num_eval_games']) game = request.GET['name'] except: return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'})) MODEL_IDS_ALL = _get_model_ids_all() - games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], eval_num).launch() + games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], num_eval_games).launch() Game.objects.filter(name=game).delete() Payoff.objects.filter(name=game).delete() for game_data in games_data: