Update server
This commit is contained in:
parent
3d9f7cebd9
commit
21e3ead424
42
docs/api.md
42
docs/api.md
|
@ -17,31 +17,31 @@ The definitions of the fields are as follows:
|
||||||
* `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\]
|
* `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\]
|
||||||
|
|
||||||
| type | Resource | Parameters | Description |
|
| type | Resource | Parameters | Description |
|
||||||
|------|-------------------------------------|-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
|
|------|-------------------------------------|-------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
|
||||||
| GET | tournament/launch | `eval_num`, `name` | Launch tournment on the game. Each pair of models will play `eval_num` times. Results will be saved in database. |
|
| GET | tournament/launch | `num_eval_games`, `name` | Launch tournment on the game. Each pair of models will play `num_eval_games` times. Results will be saved in database. |
|
||||||
| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters |
|
| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters |
|
||||||
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
|
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
|
||||||
| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents |
|
| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents |
|
||||||
| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data |
|
| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data |
|
||||||
| POST | tournament/upload\_agent | `model`(Python file), `name`, `game`, | Upload a model file. `name` is model ID |
|
| POST | tournament/upload\_agent | `model`(Python file), `name`, `game`, | Upload a model file. `name` is model ID |
|
||||||
| GET | tournament/delete\_agent | `name` | Delete the agent of the given name |
|
| GET | tournament/delete\_agent | `name` | Delete the agent of the given name |
|
||||||
| GET | tournament/list\_uploaded\_agents | `game` | list all the uploaded agents |
|
| GET | tournament/list\_uploaded\_agents | `game` | list all the uploaded agents |
|
||||||
| GET | tournament/list\_baseline\_agents | `game` | list all the baseline agents |
|
| GET | tournament/list\_baseline\_agents | `game` | list all the baseline agents |
|
||||||
| GET | download\_examples | `name` | download the example agents |
|
| GET | download\_examples | `name` | download the example agents |
|
||||||
|
|
||||||
### Example API
|
### Example API
|
||||||
| API | Description |
|
| API | Description |
|
||||||
|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
|
|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
|
||||||
| http://127.0.0.1:8000/tournament/launch?eval\_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
|
| http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
|
||||||
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game |
|
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game |
|
||||||
| http://127.0.0.1:8000/tournament/query\_game&elements\_every\_page=10&page\_index=0 | Get all the game data |
|
| http://127.0.0.1:8000/tournament/query_game&elements_every_page=10&page_index=0 | Get all the game data |
|
||||||
| http://127.0.0.1:8000/tournament/query\_game?name=leduc-holdem&elements\_every\_page=10&page\_index=0 | Get all the game data of Leduc Holdem |
|
| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem&elements_every_page=10&page_index=0 | Get all the game data of Leduc Holdem |
|
||||||
| http://127.0.0.1:8000/tournament/query\_payoff | Get all the payoffs |
|
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
|
||||||
| http://127.0.0.1:8000/tournament/query\_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
||||||
| http://127.0.0.1:8000/tournament/query\_agent\_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
|
| http://127.0.0.1:8000/tournament/query_agent_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
|
||||||
| http://127.0.0.1:8000/tournament/list\_uploaded\_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
|
| http://127.0.0.1:8000/tournament/list_uploaded_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
|
||||||
| http://127.0.0.1:8000/tournament/list\_baseline\_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
|
| http://127.0.0.1:8000/tournament/list_baseline_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
|
||||||
| http://127.0.0.1:8000/tournament/download\_examples?name=example\_luduc\_nfsp\_model | Download the NFSP example model for Leduc Hold'em |
|
| http://127.0.0.1:8000/tournament/download_examples?name=example_luduc_nfsp_model | Download the NFSP example model for Leduc Hold'em |
|
||||||
|
|
||||||
## Registered Models
|
## Registered Models
|
||||||
Some models have been pre-registered as baselines
|
Some models have been pre-registered as baselines
|
||||||
|
@ -64,7 +64,7 @@ curl -F 'model=@example_luduc_nfsp_model.zip' -F "name=leduc-nfsp" -F "game=ledu
|
||||||
```
|
```
|
||||||
Launch the tounament with:
|
Launch the tounament with:
|
||||||
```
|
```
|
||||||
curl 'http://127.0.0.1:8000/tournament/launch?eval_num=200&name=leduc-holdem'
|
curl 'http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem'
|
||||||
```
|
```
|
||||||
We list the uploaded agent with
|
We list the uploaded agent with
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
rlcard
|
rlcard
|
||||||
Django
|
Django
|
||||||
tqdm
|
tqdm
|
||||||
tensorflow==1.14
|
|
||||||
django-cors-headers
|
django-cors-headers
|
||||||
|
torch==1.6.0
|
||||||
|
flask==1.1
|
||||||
|
flask-cors
|
||||||
|
onnx
|
||||||
|
onnxruntime
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,8 @@ class DoudizhuRandomModel(Model):
|
||||||
''' Load random model
|
''' Load random model
|
||||||
'''
|
'''
|
||||||
env = rlcard.make('doudizhu')
|
env = rlcard.make('doudizhu')
|
||||||
self.agent = RandomAgent(action_num=env.action_num)
|
self.agent = RandomAgent(num_actions=env.num_actions)
|
||||||
self.player_num = env.player_num
|
self.num_players = env.num_players
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def agents(self):
|
def agents(self):
|
||||||
|
@ -34,7 +34,7 @@ class DoudizhuRandomModel(Model):
|
||||||
Note: Each agent should be just like RL agent with step and eval_step
|
Note: Each agent should be just like RL agent with step and eval_step
|
||||||
functioning well.
|
functioning well.
|
||||||
'''
|
'''
|
||||||
return [self.agent for _ in range(self.player_num)]
|
return [self.agent for _ in range(self.num_players)]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def use_raw(self):
|
def use_raw(self):
|
||||||
|
|
|
@ -21,8 +21,8 @@ class LeducHoldemRandomModel(Model):
|
||||||
''' Load random model
|
''' Load random model
|
||||||
'''
|
'''
|
||||||
env = rlcard.make('leduc-holdem')
|
env = rlcard.make('leduc-holdem')
|
||||||
self.agent = RandomAgent(action_num=env.action_num)
|
self.agent = RandomAgent(num_actions=env.num_actions)
|
||||||
self.player_num = env.player_num
|
self.num_players = env.num_players
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def agents(self):
|
def agents(self):
|
||||||
|
@ -34,7 +34,7 @@ class LeducHoldemRandomModel(Model):
|
||||||
Note: Each agent should be just like RL agent with step and eval_step
|
Note: Each agent should be just like RL agent with step and eval_step
|
||||||
functioning well.
|
functioning well.
|
||||||
'''
|
'''
|
||||||
return [self.agent for _ in range(self.player_num)]
|
return [self.agent for _ in range(self.num_players)]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def use_raw(self):
|
def use_raw(self):
|
||||||
|
|
|
@ -7,34 +7,34 @@ from .rlcard_wrap import rlcard
|
||||||
|
|
||||||
class Tournament(object):
|
class Tournament(object):
|
||||||
|
|
||||||
def __init__(self, game, model_ids, evaluate_num=100):
|
def __init__(self, game, model_ids, num_eval_games=100):
|
||||||
""" Default for two player games
|
""" Default for two player games
|
||||||
For Dou Dizhu, the two peasants use the same model
|
For Dou Dizhu, the two peasants use the same model
|
||||||
"""
|
"""
|
||||||
self.game = game
|
self.game = game
|
||||||
self.model_ids = model_ids
|
self.model_ids = model_ids
|
||||||
self.evaluate_num = evaluate_num
|
self.num_eval_games = num_eval_games
|
||||||
# Load the models
|
# Load the models
|
||||||
self.models = [rlcard.models.load(model_id) for model_id in model_ids]
|
self.models = [rlcard.models.load(model_id) for model_id in model_ids]
|
||||||
|
|
||||||
def launch(self):
|
def launch(self):
|
||||||
""" Currently for two-player game only
|
""" Currently for two-player game only
|
||||||
"""
|
"""
|
||||||
model_num = len(self.model_ids)
|
num_models = len(self.model_ids)
|
||||||
games_data = []
|
games_data = []
|
||||||
payoffs_data = []
|
payoffs_data = []
|
||||||
for i in range(model_num):
|
for i in range(num_models):
|
||||||
for j in range(model_num):
|
for j in range(num_models):
|
||||||
if j == i:
|
if j == i:
|
||||||
continue
|
continue
|
||||||
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
|
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
|
||||||
if self.game == 'doudizhu':
|
if self.game == 'doudizhu':
|
||||||
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
|
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
|
||||||
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
|
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
|
||||||
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
|
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.num_eval_games)
|
||||||
elif self.game == 'leduc-holdem':
|
elif self.game == 'leduc-holdem':
|
||||||
agents = [self.models[i].agents[0], self.models[j].agents[1]]
|
agents = [self.models[i].agents[0], self.models[j].agents[1]]
|
||||||
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
|
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.num_eval_games)
|
||||||
mean_payoff = np.mean(payoffs)
|
mean_payoff = np.mean(payoffs)
|
||||||
print('Average payoff:', mean_payoff)
|
print('Average payoff:', mean_payoff)
|
||||||
print()
|
print()
|
||||||
|
@ -59,16 +59,16 @@ class Tournament(object):
|
||||||
payoffs_data.append(payoff_data)
|
payoffs_data.append(payoff_data)
|
||||||
return games_data, payoffs_data
|
return games_data, payoffs_data
|
||||||
|
|
||||||
def doudizhu_tournament(game, agents, names, num):
|
def doudizhu_tournament(game, agents, names, num_eval_games):
|
||||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||||
env.set_agents(agents)
|
env.set_agents(agents)
|
||||||
payoffs = []
|
payoffs = []
|
||||||
json_data = []
|
json_data = []
|
||||||
wins = []
|
wins = []
|
||||||
for _ in tqdm(range(num)):
|
for _ in tqdm(range(num_eval_games)):
|
||||||
data = {}
|
data = {}
|
||||||
roles = ['landlord', 'peasant', 'peasant']
|
roles = ['landlord', 'peasant', 'peasant']
|
||||||
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
|
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.num_players)]
|
||||||
state, player_id = env.reset()
|
state, player_id = env.reset()
|
||||||
perfect = env.get_perfect_information()
|
perfect = env.get_perfect_information()
|
||||||
data['initHands'] = perfect['hand_cards_with_suit']
|
data['initHands'] = perfect['hand_cards_with_suit']
|
||||||
|
@ -77,7 +77,7 @@ def doudizhu_tournament(game, agents, names, num):
|
||||||
current_hand_cards[i] = current_hand_cards[i].split()
|
current_hand_cards[i] = current_hand_cards[i].split()
|
||||||
data['moveHistory'] = []
|
data['moveHistory'] = []
|
||||||
while not env.is_over():
|
while not env.is_over():
|
||||||
action, probs = env.agents[player_id].eval_step(state)
|
action, info = env.agents[player_id].eval_step(state)
|
||||||
history = {}
|
history = {}
|
||||||
history['playerIdx'] = player_id
|
history['playerIdx'] = player_id
|
||||||
if env.agents[player_id].use_raw:
|
if env.agents[player_id].use_raw:
|
||||||
|
@ -85,6 +85,7 @@ def doudizhu_tournament(game, agents, names, num):
|
||||||
else:
|
else:
|
||||||
_action = env._decode_action(action)
|
_action = env._decode_action(action)
|
||||||
history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards)
|
history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards)
|
||||||
|
history['info'] = info
|
||||||
|
|
||||||
data['moveHistory'].append(history)
|
data['moveHistory'].append(history)
|
||||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||||
|
@ -115,15 +116,15 @@ def _calculate_doudizhu_move(action, player_id, current_hand_cards):
|
||||||
break
|
break
|
||||||
return ' '.join(cards_with_suit)
|
return ' '.join(cards_with_suit)
|
||||||
|
|
||||||
def leduc_holdem_tournament(game, agents, num):
|
def leduc_holdem_tournament(game, agents, num_eval_games):
|
||||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||||
env.set_agents(agents)
|
env.set_agents(agents)
|
||||||
payoffs = []
|
payoffs = []
|
||||||
json_data = []
|
json_data = []
|
||||||
wins = []
|
wins = []
|
||||||
for _ in tqdm(range(num)):
|
for _ in tqdm(range(num_eval_games)):
|
||||||
data = {}
|
data = {}
|
||||||
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.player_num)]
|
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.num_players)]
|
||||||
state, player_id = env.reset()
|
state, player_id = env.reset()
|
||||||
perfect = env.get_perfect_information()
|
perfect = env.get_perfect_information()
|
||||||
data['initHands'] = perfect['hand_cards']
|
data['initHands'] = perfect['hand_cards']
|
||||||
|
@ -131,7 +132,7 @@ def leduc_holdem_tournament(game, agents, num):
|
||||||
round_history = []
|
round_history = []
|
||||||
round_id = 0
|
round_id = 0
|
||||||
while not env.is_over():
|
while not env.is_over():
|
||||||
action, probs = env.agents[player_id].eval_step(state)
|
action, info = env.agents[player_id].eval_step(state)
|
||||||
history = {}
|
history = {}
|
||||||
history['playerIdx'] = player_id
|
history['playerIdx'] = player_id
|
||||||
if env.agents[player_id].use_raw:
|
if env.agents[player_id].use_raw:
|
||||||
|
@ -139,16 +140,7 @@ def leduc_holdem_tournament(game, agents, num):
|
||||||
else:
|
else:
|
||||||
history['move'] = env._decode_action(action)
|
history['move'] = env._decode_action(action)
|
||||||
|
|
||||||
probabilities = []
|
history['info'] = info
|
||||||
for i, a in enumerate(env.actions):
|
|
||||||
if len(probs) == 0:
|
|
||||||
p = -2
|
|
||||||
elif a in state['raw_legal_actions']:
|
|
||||||
p = probs[i]
|
|
||||||
else:
|
|
||||||
p = -1
|
|
||||||
probabilities.append({'move':a, 'probability': p})
|
|
||||||
history['probabilities'] = probabilities
|
|
||||||
round_history.append(history)
|
round_history.append(history)
|
||||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||||
perfect = env.get_perfect_information()
|
perfect = env.get_perfect_information()
|
||||||
|
|
|
@ -102,13 +102,13 @@ def query_agent_payoff(request):
|
||||||
def launch(request):
|
def launch(request):
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
try:
|
try:
|
||||||
eval_num = int(request.GET['eval_num'])
|
num_eval_games = int(request.GET['num_eval_games'])
|
||||||
game = request.GET['name']
|
game = request.GET['name']
|
||||||
except:
|
except:
|
||||||
return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))
|
return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))
|
||||||
|
|
||||||
MODEL_IDS_ALL = _get_model_ids_all()
|
MODEL_IDS_ALL = _get_model_ids_all()
|
||||||
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], eval_num).launch()
|
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], num_eval_games).launch()
|
||||||
Game.objects.filter(name=game).delete()
|
Game.objects.filter(name=game).delete()
|
||||||
Payoff.objects.filter(name=game).delete()
|
Payoff.objects.filter(name=game).delete()
|
||||||
for game_data in games_data:
|
for game_data in games_data:
|
||||||
|
|
Loading…
Reference in New Issue