Update server

This commit is contained in:
Daochen Zha 2021-05-27 10:18:30 -05:00
parent 3d9f7cebd9
commit 21e3ead424
6 changed files with 52 additions and 55 deletions

View File

@ -17,31 +17,31 @@ The definitions of the fields are as follows:
* `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\] * `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\]
| type | Resource | Parameters | Description | | type | Resource | Parameters | Description |
|------|-------------------------------------|-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------| |------|-------------------------------------|-------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
| GET | tournament/launch | `eval_num`, `name` | Launch tournment on the game. Each pair of models will play `eval_num` times. Results will be saved in database. | | GET | tournament/launch | `num_eval_games`, `name` | Launch tournment on the game. Each pair of models will play `num_eval_games` times. Results will be saved in database. |
| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters | | GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters |
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters | | GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents | | GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents |
| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data | | GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data |
| POST | tournament/upload\_agent | `model`(Python file), `name`, `game`, | Upload a model file. `name` is model ID | | POST | tournament/upload\_agent | `model`(Python file), `name`, `game`, | Upload a model file. `name` is model ID |
| GET | tournament/delete\_agent | `name` | Delete the agent of the given name | | GET | tournament/delete\_agent | `name` | Delete the agent of the given name |
| GET | tournament/list\_uploaded\_agents | `game` | list all the uploaded agents | | GET | tournament/list\_uploaded\_agents | `game` | list all the uploaded agents |
| GET | tournament/list\_baseline\_agents | `game` | list all the baseline agents | | GET | tournament/list\_baseline\_agents | `game` | list all the baseline agents |
| GET | download\_examples | `name` | download the example agents | | GET | download\_examples | `name` | download the example agents |
### Example API ### Example API
| API | Description | | API | Description |
|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------| |------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
| http://127.0.0.1:8000/tournament/launch?eval\_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models | | http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game | | http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game |
| http://127.0.0.1:8000/tournament/query\_game&elements\_every\_page=10&page\_index=0 | Get all the game data | | http://127.0.0.1:8000/tournament/query_game&elements_every_page=10&page_index=0 | Get all the game data |
| http://127.0.0.1:8000/tournament/query\_game?name=leduc-holdem&elements\_every\_page=10&page\_index=0 | Get all the game data of Leduc Holdem | | http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem&elements_every_page=10&page_index=0 | Get all the game data of Leduc Holdem |
| http://127.0.0.1:8000/tournament/query\_payoff | Get all the payoffs | | http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
| http://127.0.0.1:8000/tournament/query\_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models | | http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
| http://127.0.0.1:8000/tournament/query\_agent\_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem | | http://127.0.0.1:8000/tournament/query_agent_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/list\_uploaded\_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem | | http://127.0.0.1:8000/tournament/list_uploaded_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/list\_baseline\_agents?game=leduc-holdem | List the baseline agents of leduc-holdem | | http://127.0.0.1:8000/tournament/list_baseline_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/download\_examples?name=example\_luduc\_nfsp\_model | Download the NFSP example model for Leduc Hold'em | | http://127.0.0.1:8000/tournament/download_examples?name=example_luduc_nfsp_model | Download the NFSP example model for Leduc Hold'em |
## Registered Models ## Registered Models
Some models have been pre-registered as baselines Some models have been pre-registered as baselines
@ -64,7 +64,7 @@ curl -F 'model=@example_luduc_nfsp_model.zip' -F "name=leduc-nfsp" -F "game=ledu
``` ```
Launch the tounament with: Launch the tounament with:
``` ```
curl 'http://127.0.0.1:8000/tournament/launch?eval_num=200&name=leduc-holdem' curl 'http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem'
``` ```
We list the uploaded agent with We list the uploaded agent with
``` ```

View File

@ -1,5 +1,10 @@
rlcard rlcard
Django Django
tqdm tqdm
tensorflow==1.14
django-cors-headers django-cors-headers
torch==1.6.0
flask==1.1
flask-cors
onnx
onnxruntime

View File

@ -21,8 +21,8 @@ class DoudizhuRandomModel(Model):
''' Load random model ''' Load random model
''' '''
env = rlcard.make('doudizhu') env = rlcard.make('doudizhu')
self.agent = RandomAgent(action_num=env.action_num) self.agent = RandomAgent(num_actions=env.num_actions)
self.player_num = env.player_num self.num_players = env.num_players
@property @property
def agents(self): def agents(self):
@ -34,7 +34,7 @@ class DoudizhuRandomModel(Model):
Note: Each agent should be just like RL agent with step and eval_step Note: Each agent should be just like RL agent with step and eval_step
functioning well. functioning well.
''' '''
return [self.agent for _ in range(self.player_num)] return [self.agent for _ in range(self.num_players)]
@property @property
def use_raw(self): def use_raw(self):

View File

@ -21,8 +21,8 @@ class LeducHoldemRandomModel(Model):
''' Load random model ''' Load random model
''' '''
env = rlcard.make('leduc-holdem') env = rlcard.make('leduc-holdem')
self.agent = RandomAgent(action_num=env.action_num) self.agent = RandomAgent(num_actions=env.num_actions)
self.player_num = env.player_num self.num_players = env.num_players
@property @property
def agents(self): def agents(self):
@ -34,7 +34,7 @@ class LeducHoldemRandomModel(Model):
Note: Each agent should be just like RL agent with step and eval_step Note: Each agent should be just like RL agent with step and eval_step
functioning well. functioning well.
''' '''
return [self.agent for _ in range(self.player_num)] return [self.agent for _ in range(self.num_players)]
@property @property
def use_raw(self): def use_raw(self):

View File

@ -7,34 +7,34 @@ from .rlcard_wrap import rlcard
class Tournament(object): class Tournament(object):
def __init__(self, game, model_ids, evaluate_num=100): def __init__(self, game, model_ids, num_eval_games=100):
""" Default for two player games """ Default for two player games
For Dou Dizhu, the two peasants use the same model For Dou Dizhu, the two peasants use the same model
""" """
self.game = game self.game = game
self.model_ids = model_ids self.model_ids = model_ids
self.evaluate_num = evaluate_num self.num_eval_games = num_eval_games
# Load the models # Load the models
self.models = [rlcard.models.load(model_id) for model_id in model_ids] self.models = [rlcard.models.load(model_id) for model_id in model_ids]
def launch(self): def launch(self):
""" Currently for two-player game only """ Currently for two-player game only
""" """
model_num = len(self.model_ids) num_models = len(self.model_ids)
games_data = [] games_data = []
payoffs_data = [] payoffs_data = []
for i in range(model_num): for i in range(num_models):
for j in range(model_num): for j in range(num_models):
if j == i: if j == i:
continue continue
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j]) print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
if self.game == 'doudizhu': if self.game == 'doudizhu':
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]] agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]] names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num) data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.num_eval_games)
elif self.game == 'leduc-holdem': elif self.game == 'leduc-holdem':
agents = [self.models[i].agents[0], self.models[j].agents[1]] agents = [self.models[i].agents[0], self.models[j].agents[1]]
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num) data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.num_eval_games)
mean_payoff = np.mean(payoffs) mean_payoff = np.mean(payoffs)
print('Average payoff:', mean_payoff) print('Average payoff:', mean_payoff)
print() print()
@ -59,16 +59,16 @@ class Tournament(object):
payoffs_data.append(payoff_data) payoffs_data.append(payoff_data)
return games_data, payoffs_data return games_data, payoffs_data
def doudizhu_tournament(game, agents, names, num): def doudizhu_tournament(game, agents, names, num_eval_games):
env = rlcard.make(game, config={'allow_raw_data': True}) env = rlcard.make(game, config={'allow_raw_data': True})
env.set_agents(agents) env.set_agents(agents)
payoffs = [] payoffs = []
json_data = [] json_data = []
wins = [] wins = []
for _ in tqdm(range(num)): for _ in tqdm(range(num_eval_games)):
data = {} data = {}
roles = ['landlord', 'peasant', 'peasant'] roles = ['landlord', 'peasant', 'peasant']
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)] data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.num_players)]
state, player_id = env.reset() state, player_id = env.reset()
perfect = env.get_perfect_information() perfect = env.get_perfect_information()
data['initHands'] = perfect['hand_cards_with_suit'] data['initHands'] = perfect['hand_cards_with_suit']
@ -77,7 +77,7 @@ def doudizhu_tournament(game, agents, names, num):
current_hand_cards[i] = current_hand_cards[i].split() current_hand_cards[i] = current_hand_cards[i].split()
data['moveHistory'] = [] data['moveHistory'] = []
while not env.is_over(): while not env.is_over():
action, probs = env.agents[player_id].eval_step(state) action, info = env.agents[player_id].eval_step(state)
history = {} history = {}
history['playerIdx'] = player_id history['playerIdx'] = player_id
if env.agents[player_id].use_raw: if env.agents[player_id].use_raw:
@ -85,6 +85,7 @@ def doudizhu_tournament(game, agents, names, num):
else: else:
_action = env._decode_action(action) _action = env._decode_action(action)
history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards) history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards)
history['info'] = info
data['moveHistory'].append(history) data['moveHistory'].append(history)
state, player_id = env.step(action, env.agents[player_id].use_raw) state, player_id = env.step(action, env.agents[player_id].use_raw)
@ -115,15 +116,15 @@ def _calculate_doudizhu_move(action, player_id, current_hand_cards):
break break
return ' '.join(cards_with_suit) return ' '.join(cards_with_suit)
def leduc_holdem_tournament(game, agents, num): def leduc_holdem_tournament(game, agents, num_eval_games):
env = rlcard.make(game, config={'allow_raw_data': True}) env = rlcard.make(game, config={'allow_raw_data': True})
env.set_agents(agents) env.set_agents(agents)
payoffs = [] payoffs = []
json_data = [] json_data = []
wins = [] wins = []
for _ in tqdm(range(num)): for _ in tqdm(range(num_eval_games)):
data = {} data = {}
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.player_num)] data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.num_players)]
state, player_id = env.reset() state, player_id = env.reset()
perfect = env.get_perfect_information() perfect = env.get_perfect_information()
data['initHands'] = perfect['hand_cards'] data['initHands'] = perfect['hand_cards']
@ -131,7 +132,7 @@ def leduc_holdem_tournament(game, agents, num):
round_history = [] round_history = []
round_id = 0 round_id = 0
while not env.is_over(): while not env.is_over():
action, probs = env.agents[player_id].eval_step(state) action, info = env.agents[player_id].eval_step(state)
history = {} history = {}
history['playerIdx'] = player_id history['playerIdx'] = player_id
if env.agents[player_id].use_raw: if env.agents[player_id].use_raw:
@ -139,16 +140,7 @@ def leduc_holdem_tournament(game, agents, num):
else: else:
history['move'] = env._decode_action(action) history['move'] = env._decode_action(action)
probabilities = [] history['info'] = info
for i, a in enumerate(env.actions):
if len(probs) == 0:
p = -2
elif a in state['raw_legal_actions']:
p = probs[i]
else:
p = -1
probabilities.append({'move':a, 'probability': p})
history['probabilities'] = probabilities
round_history.append(history) round_history.append(history)
state, player_id = env.step(action, env.agents[player_id].use_raw) state, player_id = env.step(action, env.agents[player_id].use_raw)
perfect = env.get_perfect_information() perfect = env.get_perfect_information()

View File

@ -102,13 +102,13 @@ def query_agent_payoff(request):
def launch(request): def launch(request):
if request.method == 'GET': if request.method == 'GET':
try: try:
eval_num = int(request.GET['eval_num']) num_eval_games = int(request.GET['num_eval_games'])
game = request.GET['name'] game = request.GET['name']
except: except:
return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'})) return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))
MODEL_IDS_ALL = _get_model_ids_all() MODEL_IDS_ALL = _get_model_ids_all()
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], eval_num).launch() games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], num_eval_games).launch()
Game.objects.filter(name=game).delete() Game.objects.filter(name=game).delete()
Payoff.objects.filter(name=game).delete() Payoff.objects.filter(name=game).delete()
for game_data in games_data: for game_data in games_data: