Update server

This commit is contained in:
Daochen Zha 2021-05-27 10:18:30 -05:00
parent 3d9f7cebd9
commit 21e3ead424
6 changed files with 52 additions and 55 deletions

View File

@ -17,8 +17,8 @@ The definitions of the fields are as follows:
* `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\]
| type | Resource | Parameters | Description |
|------|-------------------------------------|-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
| GET | tournament/launch | `eval_num`, `name` | Launch tournment on the game. Each pair of models will play `eval_num` times. Results will be saved in database. |
|------|-------------------------------------|-------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
| GET | tournament/launch | `num_eval_games`, `name` | Launch tournment on the game. Each pair of models will play `num_eval_games` times. Results will be saved in database. |
| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters |
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents |
@ -32,16 +32,16 @@ The definitions of the fields are as follows:
### Example API
| API | Description |
|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
| http://127.0.0.1:8000/tournament/launch?eval\_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
| http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game |
| http://127.0.0.1:8000/tournament/query\_game&elements\_every\_page=10&page\_index=0 | Get all the game data |
| http://127.0.0.1:8000/tournament/query\_game?name=leduc-holdem&elements\_every\_page=10&page\_index=0 | Get all the game data of Leduc Holdem |
| http://127.0.0.1:8000/tournament/query\_payoff | Get all the payoffs |
| http://127.0.0.1:8000/tournament/query\_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
| http://127.0.0.1:8000/tournament/query\_agent\_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/list\_uploaded\_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/list\_baseline\_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/download\_examples?name=example\_luduc\_nfsp\_model | Download the NFSP example model for Leduc Hold'em |
| http://127.0.0.1:8000/tournament/query_game&elements_every_page=10&page_index=0 | Get all the game data |
| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem&elements_every_page=10&page_index=0 | Get all the game data of Leduc Holdem |
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
| http://127.0.0.1:8000/tournament/query_agent_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/list_uploaded_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/list_baseline_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
| http://127.0.0.1:8000/tournament/download_examples?name=example_luduc_nfsp_model | Download the NFSP example model for Leduc Hold'em |
## Registered Models
Some models have been pre-registered as baselines
@ -64,7 +64,7 @@ curl -F 'model=@example_luduc_nfsp_model.zip' -F "name=leduc-nfsp" -F "game=ledu
```
Launch the tounament with:
```
curl 'http://127.0.0.1:8000/tournament/launch?eval_num=200&name=leduc-holdem'
curl 'http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem'
```
We list the uploaded agent with
```

View File

@ -1,5 +1,10 @@
rlcard
Django
tqdm
tensorflow==1.14
django-cors-headers
torch==1.6.0
flask==1.1
flask-cors
onnx
onnxruntime

View File

@ -21,8 +21,8 @@ class DoudizhuRandomModel(Model):
''' Load random model
'''
env = rlcard.make('doudizhu')
self.agent = RandomAgent(action_num=env.action_num)
self.player_num = env.player_num
self.agent = RandomAgent(num_actions=env.num_actions)
self.num_players = env.num_players
@property
def agents(self):
@ -34,7 +34,7 @@ class DoudizhuRandomModel(Model):
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return [self.agent for _ in range(self.player_num)]
return [self.agent for _ in range(self.num_players)]
@property
def use_raw(self):

View File

@ -21,8 +21,8 @@ class LeducHoldemRandomModel(Model):
''' Load random model
'''
env = rlcard.make('leduc-holdem')
self.agent = RandomAgent(action_num=env.action_num)
self.player_num = env.player_num
self.agent = RandomAgent(num_actions=env.num_actions)
self.num_players = env.num_players
@property
def agents(self):
@ -34,7 +34,7 @@ class LeducHoldemRandomModel(Model):
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return [self.agent for _ in range(self.player_num)]
return [self.agent for _ in range(self.num_players)]
@property
def use_raw(self):

View File

@ -7,34 +7,34 @@ from .rlcard_wrap import rlcard
class Tournament(object):
def __init__(self, game, model_ids, evaluate_num=100):
def __init__(self, game, model_ids, num_eval_games=100):
""" Default for two player games
For Dou Dizhu, the two peasants use the same model
"""
self.game = game
self.model_ids = model_ids
self.evaluate_num = evaluate_num
self.num_eval_games = num_eval_games
# Load the models
self.models = [rlcard.models.load(model_id) for model_id in model_ids]
def launch(self):
""" Currently for two-player game only
"""
model_num = len(self.model_ids)
num_models = len(self.model_ids)
games_data = []
payoffs_data = []
for i in range(model_num):
for j in range(model_num):
for i in range(num_models):
for j in range(num_models):
if j == i:
continue
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
if self.game == 'doudizhu':
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.num_eval_games)
elif self.game == 'leduc-holdem':
agents = [self.models[i].agents[0], self.models[j].agents[1]]
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.num_eval_games)
mean_payoff = np.mean(payoffs)
print('Average payoff:', mean_payoff)
print()
@ -59,16 +59,16 @@ class Tournament(object):
payoffs_data.append(payoff_data)
return games_data, payoffs_data
def doudizhu_tournament(game, agents, names, num):
def doudizhu_tournament(game, agents, names, num_eval_games):
env = rlcard.make(game, config={'allow_raw_data': True})
env.set_agents(agents)
payoffs = []
json_data = []
wins = []
for _ in tqdm(range(num)):
for _ in tqdm(range(num_eval_games)):
data = {}
roles = ['landlord', 'peasant', 'peasant']
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.num_players)]
state, player_id = env.reset()
perfect = env.get_perfect_information()
data['initHands'] = perfect['hand_cards_with_suit']
@ -77,7 +77,7 @@ def doudizhu_tournament(game, agents, names, num):
current_hand_cards[i] = current_hand_cards[i].split()
data['moveHistory'] = []
while not env.is_over():
action, probs = env.agents[player_id].eval_step(state)
action, info = env.agents[player_id].eval_step(state)
history = {}
history['playerIdx'] = player_id
if env.agents[player_id].use_raw:
@ -85,6 +85,7 @@ def doudizhu_tournament(game, agents, names, num):
else:
_action = env._decode_action(action)
history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards)
history['info'] = info
data['moveHistory'].append(history)
state, player_id = env.step(action, env.agents[player_id].use_raw)
@ -115,15 +116,15 @@ def _calculate_doudizhu_move(action, player_id, current_hand_cards):
break
return ' '.join(cards_with_suit)
def leduc_holdem_tournament(game, agents, num):
def leduc_holdem_tournament(game, agents, num_eval_games):
env = rlcard.make(game, config={'allow_raw_data': True})
env.set_agents(agents)
payoffs = []
json_data = []
wins = []
for _ in tqdm(range(num)):
for _ in tqdm(range(num_eval_games)):
data = {}
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.player_num)]
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.num_players)]
state, player_id = env.reset()
perfect = env.get_perfect_information()
data['initHands'] = perfect['hand_cards']
@ -131,7 +132,7 @@ def leduc_holdem_tournament(game, agents, num):
round_history = []
round_id = 0
while not env.is_over():
action, probs = env.agents[player_id].eval_step(state)
action, info = env.agents[player_id].eval_step(state)
history = {}
history['playerIdx'] = player_id
if env.agents[player_id].use_raw:
@ -139,16 +140,7 @@ def leduc_holdem_tournament(game, agents, num):
else:
history['move'] = env._decode_action(action)
probabilities = []
for i, a in enumerate(env.actions):
if len(probs) == 0:
p = -2
elif a in state['raw_legal_actions']:
p = probs[i]
else:
p = -1
probabilities.append({'move':a, 'probability': p})
history['probabilities'] = probabilities
history['info'] = info
round_history.append(history)
state, player_id = env.step(action, env.agents[player_id].use_raw)
perfect = env.get_perfect_information()

View File

@ -102,13 +102,13 @@ def query_agent_payoff(request):
def launch(request):
if request.method == 'GET':
try:
eval_num = int(request.GET['eval_num'])
num_eval_games = int(request.GET['num_eval_games'])
game = request.GET['name']
except:
return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))
MODEL_IDS_ALL = _get_model_ids_all()
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], eval_num).launch()
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], num_eval_games).launch()
Game.objects.filter(name=game).delete()
Payoff.objects.filter(name=game).delete()
for game_data in games_data: