This commit is contained in:
Daochen 2020-05-11 16:42:08 -05:00
parent c92edef65c
commit 796d6f46e3
21 changed files with 144 additions and 7 deletions

1
.gitignore vendored
View File

@ -23,3 +23,4 @@ yarn-debug.log*
yarn-error.log*
db.sqlite3
__pycache__

View File

@ -1,6 +1,7 @@
**NOTE: This project is under final tesing. The one in the repo only supports the visualization of some sampled data. The full version will be available soon!**
# Server Setup
# Django Server
## Server Setup
Install dependencies:
```
pip install -r requirements.txt
@ -17,7 +18,7 @@ python manage.py runserver
```
The default URL is [http://127.0.0.1:8000/](http://127.0.0.1:8000/)
# REST API
## REST API
The definitions of the fields are as follows:
* `eval_num`: Integer. The number of evaluation times.
* `name`: String. The name of the environment.
@ -34,6 +35,19 @@ The definitions of the fields are as follows:
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data (only support Leduc Holdem for now) |
## Example API
| API | Description |
|-----------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
| http://127.0.0.1:8000/tournamentlaunch?eval_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain teh data of the 3rd game |
| http://127.0.0.1:8000/tournament/query_game | Get all the game data |
| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem | Get all the game data of Leduc Holdem |
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
# Others
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).

Binary file not shown.

View File

@ -1,10 +1,14 @@
import rlcard
from .leduc_holdem_random_model import LeducHoldemRandomModelSpec
from .doudizhu_random_model import DoudizhuRandomModelSpec
# Register Leduc Holdem Random Model
rlcard.models.registration.model_registry.model_specs['leduc-holdem-random'] = LeducHoldemRandomModelSpec()
# Register Doudizhu Random Model
rlcard.models.registration.model_registry.model_specs['doudizhu-random'] = DoudizhuRandomModelSpec()
# The models we are concerned
MODEL_IDS = {}
MODEL_IDS['leduc-holdem'] = [
@ -13,3 +17,8 @@ MODEL_IDS['leduc-holdem'] = [
'leduc-holdem-rule-v1',
]
MODEL_IDS['doudizhu'] = [
'doudizhu-random',
'doudizhu-random',
]

View File

@ -0,0 +1,46 @@
# A wrap for rlcard
# Here, we include a random model as the default baseline
import rlcard
from rlcard.agents import RandomAgent
from rlcard.models.model import Model
class DoudizhuRandomModelSpec(object):
def __init__(self):
self.model_id = 'doudizhu-random'
self._entry_point = DoudizhuRandomModel
def load(self):
model = self._entry_point()
return model
class DoudizhuRandomModel(Model):
''' A random model
'''
def __init__(self):
''' Load random model
'''
env = rlcard.make('doudizhu')
self.agent = RandomAgent(action_num=env.action_num)
self.player_num = env.player_num
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return [self.agent for _ in range(self.player_num)]
@property
def use_raw(self):
''' Indicate whether use raw state and action
Returns:
use_raw (boolean): True if using raw state and action
'''
return False

View File

@ -5,10 +5,19 @@ import numpy as np
from .rlcard_wrap import rlcard
def cards2str(cards):
response = ''
for card in cards:
if card.rank == '':
response += card.suit[0]
else:
response += card.rank
return response
class Tournament(object):
def __init__(self, game, model_ids, evaluate_num=100):
""" Defalt for two player games
""" Default for two player games
For Dou Dizhu, the two peasants use the same model
"""
self.game = game
@ -28,7 +37,13 @@ class Tournament(object):
if j == i:
continue
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
data, payoffs, wins = tournament(self.game, [self.models[i].agents[0], self.models[j].agents[1]], self.evaluate_num)
if self.game == 'doudizhu':
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
elif self.game == 'leduc-holdem':
agents = [self.models[i].agents[0], self.models[j].agents[1]]
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
mean_payoff = np.mean(payoffs)
print('Average payoff:', mean_payoff)
print()
@ -53,7 +68,50 @@ class Tournament(object):
payoffs_data.append(payoff_data)
return games_data, payoffs_data
def tournament(game, agents, num):
def doudizhu_tournament(game, agents, names, num):
import rlcard
env = rlcard.make(game, config={'allow_raw_data': True})
print(env.reset())
print(env.step(87, False))
exit()
env.set_agents(agents)
payoffs = []
json_data = []
wins = []
for _ in tqdm(range(num)):
data = {}
roles = ['landlord', 'peasant', 'peasant']
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
state, player_id = env.reset()
#perfect = env.get_perfect_information()
#data['initHands'] = perfect['hand_cards']
data['initHands'] =[cards2str(env.game.players[i].current_hand) for i in range(env.player_num)]
data['moveHistory'] = []
while not env.is_over():
action, probs = env.agents[player_id].eval_step(state)
history = {}
history['playerIdx'] = player_id
if env.agents[player_id].use_raw:
history['move'] = action
else:
history['move'] = env._decode_action(action)
data['moveHistory'].append(history)
print(action, player_id, env.agents[player_id].use_raw)
state, player_id = env.step(action, env.agents[player_id].use_raw)
data = json.dumps(data)
#data = json.dumps(data, indent=2, sort_keys=True)
print(data)
exit()
json_data.append(data)
if env.get_payoffs()[0] > 0:
wins.append(True)
else:
wins.append(False)
payoffs.append(env.get_payoffs()[0])
return json_data, payoffs, wins
def leduc_holdem_tournament(game, agents, num):
env = rlcard.make(game, config={'allow_raw_data': True})
env.set_agents(agents)
payoffs = []
@ -65,7 +123,9 @@ def tournament(game, agents, num):
state, player_id = env.reset()
perfect = env.get_perfect_information()
data['initHands'] = perfect['hand_cards']
data['moveHistory'] = [[]]
data['moveHistory'] = []
round_history = []
round_id = 0
while not env.is_over():
action, probs = env.agents[player_id].eval_step(state)
history = {}
@ -85,9 +145,16 @@ def tournament(game, agents, num):
p = -1
probabilities.append({'move':a, 'probability': p})
history['probabilities'] = probabilities
data['moveHistory'][0].append(history)
round_history.append(history)
perfect = env.get_perfect_information()
if round_id < perfect['current_round']:
round_id = perfect['current_round']
data['moveHistory'].append(round_history)
round_history = []
state, player_id = env.step(action, env.agents[player_id].use_raw)
perfect = env.get_perfect_information()
if round_id < perfect['current_round']:
data['moveHistory'].append(round_history)
data['publicCard'] = perfect['public_card']
data = json.dumps(data)
#data = json.dumps(data, indent=2, sort_keys=True)