Leduc
This commit is contained in:
parent
c92edef65c
commit
796d6f46e3
|
@ -23,3 +23,4 @@ yarn-debug.log*
|
|||
yarn-error.log*
|
||||
|
||||
db.sqlite3
|
||||
__pycache__
|
||||
|
|
18
README.md
18
README.md
|
@ -1,6 +1,7 @@
|
|||
**NOTE: This project is under final tesing. The one in the repo only supports the visualization of some sampled data. The full version will be available soon!**
|
||||
|
||||
# Server Setup
|
||||
# Django Server
|
||||
## Server Setup
|
||||
Install dependencies:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
|
@ -17,7 +18,7 @@ python manage.py runserver
|
|||
```
|
||||
The default URL is [http://127.0.0.1:8000/](http://127.0.0.1:8000/)
|
||||
|
||||
# REST API
|
||||
## REST API
|
||||
The definitions of the fields are as follows:
|
||||
* `eval_num`: Integer. The number of evaluation times.
|
||||
* `name`: String. The name of the environment.
|
||||
|
@ -34,6 +35,19 @@ The definitions of the fields are as follows:
|
|||
| GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters |
|
||||
| GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data (only support Leduc Holdem for now) |
|
||||
|
||||
## Example API
|
||||
| API | Description |
|
||||
|-----------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
|
||||
| http://127.0.0.1:8000/tournamentlaunch?eval_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models |
|
||||
| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain teh data of the 3rd game |
|
||||
| http://127.0.0.1:8000/tournament/query_game | Get all the game data |
|
||||
| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem | Get all the game data of Leduc Holdem |
|
||||
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
|
||||
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
||||
|
||||
|
||||
|
||||
|
||||
# Others
|
||||
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,10 +1,14 @@
|
|||
import rlcard
|
||||
from .leduc_holdem_random_model import LeducHoldemRandomModelSpec
|
||||
from .doudizhu_random_model import DoudizhuRandomModelSpec
|
||||
|
||||
|
||||
# Register Leduc Holdem Random Model
|
||||
rlcard.models.registration.model_registry.model_specs['leduc-holdem-random'] = LeducHoldemRandomModelSpec()
|
||||
|
||||
# Register Doudizhu Random Model
|
||||
rlcard.models.registration.model_registry.model_specs['doudizhu-random'] = DoudizhuRandomModelSpec()
|
||||
|
||||
# The models we are concerned
|
||||
MODEL_IDS = {}
|
||||
MODEL_IDS['leduc-holdem'] = [
|
||||
|
@ -13,3 +17,8 @@ MODEL_IDS['leduc-holdem'] = [
|
|||
'leduc-holdem-rule-v1',
|
||||
]
|
||||
|
||||
MODEL_IDS['doudizhu'] = [
|
||||
'doudizhu-random',
|
||||
'doudizhu-random',
|
||||
]
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,46 @@
|
|||
# A wrap for rlcard
|
||||
# Here, we include a random model as the default baseline
|
||||
import rlcard
|
||||
from rlcard.agents import RandomAgent
|
||||
from rlcard.models.model import Model
|
||||
|
||||
class DoudizhuRandomModelSpec(object):
|
||||
def __init__(self):
|
||||
self.model_id = 'doudizhu-random'
|
||||
self._entry_point = DoudizhuRandomModel
|
||||
|
||||
def load(self):
|
||||
model = self._entry_point()
|
||||
return model
|
||||
|
||||
class DoudizhuRandomModel(Model):
|
||||
''' A random model
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
''' Load random model
|
||||
'''
|
||||
env = rlcard.make('doudizhu')
|
||||
self.agent = RandomAgent(action_num=env.action_num)
|
||||
self.player_num = env.player_num
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return [self.agent for _ in range(self.player_num)]
|
||||
|
||||
@property
|
||||
def use_raw(self):
|
||||
''' Indicate whether use raw state and action
|
||||
|
||||
Returns:
|
||||
use_raw (boolean): True if using raw state and action
|
||||
'''
|
||||
return False
|
|
@ -5,10 +5,19 @@ import numpy as np
|
|||
|
||||
from .rlcard_wrap import rlcard
|
||||
|
||||
def cards2str(cards):
|
||||
response = ''
|
||||
for card in cards:
|
||||
if card.rank == '':
|
||||
response += card.suit[0]
|
||||
else:
|
||||
response += card.rank
|
||||
return response
|
||||
|
||||
class Tournament(object):
|
||||
|
||||
def __init__(self, game, model_ids, evaluate_num=100):
|
||||
""" Defalt for two player games
|
||||
""" Default for two player games
|
||||
For Dou Dizhu, the two peasants use the same model
|
||||
"""
|
||||
self.game = game
|
||||
|
@ -28,7 +37,13 @@ class Tournament(object):
|
|||
if j == i:
|
||||
continue
|
||||
print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
|
||||
data, payoffs, wins = tournament(self.game, [self.models[i].agents[0], self.models[j].agents[1]], self.evaluate_num)
|
||||
if self.game == 'doudizhu':
|
||||
agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
|
||||
names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
|
||||
data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
|
||||
elif self.game == 'leduc-holdem':
|
||||
agents = [self.models[i].agents[0], self.models[j].agents[1]]
|
||||
data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
|
||||
mean_payoff = np.mean(payoffs)
|
||||
print('Average payoff:', mean_payoff)
|
||||
print()
|
||||
|
@ -53,7 +68,50 @@ class Tournament(object):
|
|||
payoffs_data.append(payoff_data)
|
||||
return games_data, payoffs_data
|
||||
|
||||
def tournament(game, agents, num):
|
||||
def doudizhu_tournament(game, agents, names, num):
|
||||
import rlcard
|
||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||
print(env.reset())
|
||||
print(env.step(87, False))
|
||||
exit()
|
||||
env.set_agents(agents)
|
||||
payoffs = []
|
||||
json_data = []
|
||||
wins = []
|
||||
for _ in tqdm(range(num)):
|
||||
data = {}
|
||||
roles = ['landlord', 'peasant', 'peasant']
|
||||
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
|
||||
state, player_id = env.reset()
|
||||
#perfect = env.get_perfect_information()
|
||||
#data['initHands'] = perfect['hand_cards']
|
||||
data['initHands'] =[cards2str(env.game.players[i].current_hand) for i in range(env.player_num)]
|
||||
data['moveHistory'] = []
|
||||
while not env.is_over():
|
||||
action, probs = env.agents[player_id].eval_step(state)
|
||||
history = {}
|
||||
history['playerIdx'] = player_id
|
||||
if env.agents[player_id].use_raw:
|
||||
history['move'] = action
|
||||
else:
|
||||
history['move'] = env._decode_action(action)
|
||||
|
||||
data['moveHistory'].append(history)
|
||||
print(action, player_id, env.agents[player_id].use_raw)
|
||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||
data = json.dumps(data)
|
||||
#data = json.dumps(data, indent=2, sort_keys=True)
|
||||
print(data)
|
||||
exit()
|
||||
json_data.append(data)
|
||||
if env.get_payoffs()[0] > 0:
|
||||
wins.append(True)
|
||||
else:
|
||||
wins.append(False)
|
||||
payoffs.append(env.get_payoffs()[0])
|
||||
return json_data, payoffs, wins
|
||||
|
||||
def leduc_holdem_tournament(game, agents, num):
|
||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||
env.set_agents(agents)
|
||||
payoffs = []
|
||||
|
@ -65,7 +123,9 @@ def tournament(game, agents, num):
|
|||
state, player_id = env.reset()
|
||||
perfect = env.get_perfect_information()
|
||||
data['initHands'] = perfect['hand_cards']
|
||||
data['moveHistory'] = [[]]
|
||||
data['moveHistory'] = []
|
||||
round_history = []
|
||||
round_id = 0
|
||||
while not env.is_over():
|
||||
action, probs = env.agents[player_id].eval_step(state)
|
||||
history = {}
|
||||
|
@ -85,9 +145,16 @@ def tournament(game, agents, num):
|
|||
p = -1
|
||||
probabilities.append({'move':a, 'probability': p})
|
||||
history['probabilities'] = probabilities
|
||||
data['moveHistory'][0].append(history)
|
||||
round_history.append(history)
|
||||
perfect = env.get_perfect_information()
|
||||
if round_id < perfect['current_round']:
|
||||
round_id = perfect['current_round']
|
||||
data['moveHistory'].append(round_history)
|
||||
round_history = []
|
||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||
perfect = env.get_perfect_information()
|
||||
if round_id < perfect['current_round']:
|
||||
data['moveHistory'].append(round_history)
|
||||
data['publicCard'] = perfect['public_card']
|
||||
data = json.dumps(data)
|
||||
#data = json.dumps(data, indent=2, sort_keys=True)
|
||||
|
|
Loading…
Reference in New Issue