Update server
This commit is contained in:
parent
3d9f7cebd9
commit
21e3ead424
24
docs/api.md
24
docs/api.md
|
@ -17,8 +17,8 @@ The definitions of the fields are as follows:
|
|||
* `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\]
|
||||
|
||||
| type | Resource | Parameters | Description |
|
||||
|------|-------------------------------------|-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
|
||||
| GET | tournament/launch | `eval_num`, `name` | Launch tournment on the game. Each pair of models will play `eval_num` times. Results will be saved in database. |
|
||||
|------|-------------------------------------|-------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
|
||||
| GET | tournament/launch | `num_eval_games`, `name` | Launch tournment on the game. Each pair of models will play `num_eval_games` times. Results will be saved in database. |
|
||||
| GET | tournament/query\_game | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters |
|
||||
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
|
||||
| GET | tournament/query\_agent\_payoff | `name`, `elements_every_page`, `page_index`, | Query the payoffs of all the agents |
|
||||
|
@ -32,16 +32,16 @@ The definitions of the fields are as follows:
|
|||
### Example API
|
||||
| API | Description |
|
||||
|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
|
||||
| http://127.0.0.1:8000/tournament/launch?eval\_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
|
||||
| http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
|
||||
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game |
|
||||
| http://127.0.0.1:8000/tournament/query\_game&elements\_every\_page=10&page\_index=0 | Get all the game data |
|
||||
| http://127.0.0.1:8000/tournament/query\_game?name=leduc-holdem&elements\_every\_page=10&page\_index=0 | Get all the game data of Leduc Holdem |
|
||||
| http://127.0.0.1:8000/tournament/query\_payoff | Get all the payoffs |
|
||||
| http://127.0.0.1:8000/tournament/query\_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
||||
| http://127.0.0.1:8000/tournament/query\_agent\_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
|
||||
| http://127.0.0.1:8000/tournament/list\_uploaded\_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
|
||||
| http://127.0.0.1:8000/tournament/list\_baseline\_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
|
||||
| http://127.0.0.1:8000/tournament/download\_examples?name=example\_luduc\_nfsp\_model | Download the NFSP example model for Leduc Hold'em |
|
||||
| http://127.0.0.1:8000/tournament/query_game&elements_every_page=10&page_index=0 | Get all the game data |
|
||||
| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem&elements_every_page=10&page_index=0 | Get all the game data of Leduc Holdem |
|
||||
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
|
||||
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
||||
| http://127.0.0.1:8000/tournament/query_agent_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1 | Get the payoffs of all the agents of leduc-holdem |
|
||||
| http://127.0.0.1:8000/tournament/list_uploaded_agents?game=leduc-holdem | List the uploaded agents of leduc-holdem |
|
||||
| http://127.0.0.1:8000/tournament/list_baseline_agents?game=leduc-holdem | List the baseline agents of leduc-holdem |
|
||||
| http://127.0.0.1:8000/tournament/download_examples?name=example_luduc_nfsp_model | Download the NFSP example model for Leduc Hold'em |
|
||||
|
||||
## Registered Models
|
||||
Some models have been pre-registered as baselines
|
||||
|
@ -64,7 +64,7 @@ curl -F 'model=@example_luduc_nfsp_model.zip' -F "name=leduc-nfsp" -F "game=ledu
|
|||
```
|
||||
Launch the tounament with:
|
||||
```
|
||||
curl 'http://127.0.0.1:8000/tournament/launch?eval_num=200&name=leduc-holdem'
|
||||
curl 'http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem'
|
||||
```
|
||||
We list the uploaded agent with
|
||||
```
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
rlcard
|
||||
Django
|
||||
tqdm
|
||||
tensorflow==1.14
|
||||
django-cors-headers
|
||||
torch==1.6.0
|
||||
flask==1.1
|
||||
flask-cors
|
||||
onnx
|
||||
onnxruntime
|
||||
|
||||
|
|
|
@ -21,8 +21,8 @@ class DoudizhuRandomModel(Model):
|
|||
''' Load random model
|
||||
'''
|
||||
env = rlcard.make('doudizhu')
|
||||
self.agent = RandomAgent(action_num=env.action_num)
|
||||
self.player_num = env.player_num
|
||||
self.agent = RandomAgent(num_actions=env.num_actions)
|
||||
self.num_players = env.num_players
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
|
@ -34,7 +34,7 @@ class DoudizhuRandomModel(Model):
|
|||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return [self.agent for _ in range(self.player_num)]
|
||||
return [self.agent for _ in range(self.num_players)]
|
||||
|
||||
@property
|
||||
def use_raw(self):
|
||||
|
|
|
@ -21,8 +21,8 @@ class LeducHoldemRandomModel(Model):
|
|||
''' Load random model
|
||||
'''
|
||||
env = rlcard.make('leduc-holdem')
|
||||
self.agent = RandomAgent(action_num=env.action_num)
|
||||
self.player_num = env.player_num
|
||||
self.agent = RandomAgent(num_actions=env.num_actions)
|
||||
self.num_players = env.num_players
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
|
@ -34,7 +34,7 @@ class LeducHoldemRandomModel(Model):
|
|||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return [self.agent for _ in range(self.player_num)]
|
||||
return [self.agent for _ in range(self.num_players)]
|
||||
|
||||
@property
|
||||
def use_raw(self):
|
||||
|
|
|
@ -7,34 +7,34 @@ from .rlcard_wrap import rlcard
|
|||
|
||||
class Tournament(object):
|
||||
|
||||
def __init__(self, game, model_ids, evaluate_num=100):
|
||||
def __init__(self, game, model_ids, num_eval_games=100):
|
||||
""" Default for two player games
|
||||
For Dou Dizhu, the two peasants use the same model
|
||||
"""
|
||||
self.game = game
|
||||
self.model_ids = model_ids
|
||||
self.evaluate_num = evaluate_num
|
||||
self.num_eval_games = num_eval_games
|
||||
# Load the models
|
||||
self.models = [rlcard.models.load(model_id) for model_id in model_ids]
|
||||
|
||||
def launch(self):
|
||||
""" Currently for two-player game only
|
||||
"""
|
||||
model_num = len(self.model_ids)
|
||||
num_models = len(self.model_ids)
|
||||
games_data = []
|
||||
payoffs_data = []
|
||||
for i in range(model_num):
|
||||
for j in range(model_num):
|
||||
for i in range(num_models):
|
||||
for j in range(num_models):
|
||||
if j == i:
|
||||
continue
|
||||
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
|
||||
if self.game == 'doudizhu':
|
||||
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
|
||||
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
|
||||
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
|
||||
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.num_eval_games)
|
||||
elif self.game == 'leduc-holdem':
|
||||
agents = [self.models[i].agents[0], self.models[j].agents[1]]
|
||||
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
|
||||
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.num_eval_games)
|
||||
mean_payoff = np.mean(payoffs)
|
||||
print('Average payoff:', mean_payoff)
|
||||
print()
|
||||
|
@ -59,16 +59,16 @@ class Tournament(object):
|
|||
payoffs_data.append(payoff_data)
|
||||
return games_data, payoffs_data
|
||||
|
||||
def doudizhu_tournament(game, agents, names, num):
|
||||
def doudizhu_tournament(game, agents, names, num_eval_games):
|
||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||
env.set_agents(agents)
|
||||
payoffs = []
|
||||
json_data = []
|
||||
wins = []
|
||||
for _ in tqdm(range(num)):
|
||||
for _ in tqdm(range(num_eval_games)):
|
||||
data = {}
|
||||
roles = ['landlord', 'peasant', 'peasant']
|
||||
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
|
||||
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.num_players)]
|
||||
state, player_id = env.reset()
|
||||
perfect = env.get_perfect_information()
|
||||
data['initHands'] = perfect['hand_cards_with_suit']
|
||||
|
@ -77,7 +77,7 @@ def doudizhu_tournament(game, agents, names, num):
|
|||
current_hand_cards[i] = current_hand_cards[i].split()
|
||||
data['moveHistory'] = []
|
||||
while not env.is_over():
|
||||
action, probs = env.agents[player_id].eval_step(state)
|
||||
action, info = env.agents[player_id].eval_step(state)
|
||||
history = {}
|
||||
history['playerIdx'] = player_id
|
||||
if env.agents[player_id].use_raw:
|
||||
|
@ -85,6 +85,7 @@ def doudizhu_tournament(game, agents, names, num):
|
|||
else:
|
||||
_action = env._decode_action(action)
|
||||
history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards)
|
||||
history['info'] = info
|
||||
|
||||
data['moveHistory'].append(history)
|
||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||
|
@ -115,15 +116,15 @@ def _calculate_doudizhu_move(action, player_id, current_hand_cards):
|
|||
break
|
||||
return ' '.join(cards_with_suit)
|
||||
|
||||
def leduc_holdem_tournament(game, agents, num):
|
||||
def leduc_holdem_tournament(game, agents, num_eval_games):
|
||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||
env.set_agents(agents)
|
||||
payoffs = []
|
||||
json_data = []
|
||||
wins = []
|
||||
for _ in tqdm(range(num)):
|
||||
for _ in tqdm(range(num_eval_games)):
|
||||
data = {}
|
||||
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.player_num)]
|
||||
data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.num_players)]
|
||||
state, player_id = env.reset()
|
||||
perfect = env.get_perfect_information()
|
||||
data['initHands'] = perfect['hand_cards']
|
||||
|
@ -131,7 +132,7 @@ def leduc_holdem_tournament(game, agents, num):
|
|||
round_history = []
|
||||
round_id = 0
|
||||
while not env.is_over():
|
||||
action, probs = env.agents[player_id].eval_step(state)
|
||||
action, info = env.agents[player_id].eval_step(state)
|
||||
history = {}
|
||||
history['playerIdx'] = player_id
|
||||
if env.agents[player_id].use_raw:
|
||||
|
@ -139,16 +140,7 @@ def leduc_holdem_tournament(game, agents, num):
|
|||
else:
|
||||
history['move'] = env._decode_action(action)
|
||||
|
||||
probabilities = []
|
||||
for i, a in enumerate(env.actions):
|
||||
if len(probs) == 0:
|
||||
p = -2
|
||||
elif a in state['raw_legal_actions']:
|
||||
p = probs[i]
|
||||
else:
|
||||
p = -1
|
||||
probabilities.append({'move':a, 'probability': p})
|
||||
history['probabilities'] = probabilities
|
||||
history['info'] = info
|
||||
round_history.append(history)
|
||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||
perfect = env.get_perfect_information()
|
||||
|
|
|
@ -102,13 +102,13 @@ def query_agent_payoff(request):
|
|||
def launch(request):
|
||||
if request.method == 'GET':
|
||||
try:
|
||||
eval_num = int(request.GET['eval_num'])
|
||||
num_eval_games = int(request.GET['num_eval_games'])
|
||||
game = request.GET['name']
|
||||
except:
|
||||
return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))
|
||||
|
||||
MODEL_IDS_ALL = _get_model_ids_all()
|
||||
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], eval_num).launch()
|
||||
games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], num_eval_games).launch()
|
||||
Game.objects.filter(name=game).delete()
|
||||
Payoff.objects.filter(name=game).delete()
|
||||
for game_data in games_data:
|
||||
|
|
Loading…
Reference in New Issue