Fix Dou Dizhu issues

This commit is contained in:
Daochen 2020-05-12 18:51:16 -05:00
parent 796d6f46e3
commit babb55b520
4 changed files with 13 additions and 21 deletions

1
.gitignore vendored
View File

@ -24,3 +24,4 @@ yarn-error.log*
db.sqlite3 db.sqlite3
__pycache__ __pycache__
*.swp

View File

@ -45,7 +45,15 @@ The definitions of the fields are as follows:
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs | | http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models | | http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
## Regitered Models
Some models have been pre-registered as baselines
| Model | Game | Description |
|----------------------|--------------|---------------------------------------|
| leduc-holdem-random | leduc-holdem | A random model |
| leduc-holdem-cfr | leduc-holdem | Pre-trained CFR model on Leduc Holdem |
| leduc-holdem-rule-v1 | leduc-holdem | A rule model that plays greedily |
| doudizhu-random | doudizhu | A random model |
| doudizhu-rule-v1 | doudizhu | Dou Dizhu rule model |
# Others # Others

View File

@ -19,6 +19,6 @@ MODEL_IDS['leduc-holdem'] = [
MODEL_IDS['doudizhu'] = [ MODEL_IDS['doudizhu'] = [
'doudizhu-random', 'doudizhu-random',
'doudizhu-random', 'doudizhu-rule-v1',
] ]

View File

@ -5,15 +5,6 @@ import numpy as np
from .rlcard_wrap import rlcard from .rlcard_wrap import rlcard
def cards2str(cards):
response = ''
for card in cards:
if card.rank == '':
response += card.suit[0]
else:
response += card.rank
return response
class Tournament(object): class Tournament(object):
def __init__(self, game, model_ids, evaluate_num=100): def __init__(self, game, model_ids, evaluate_num=100):
@ -69,11 +60,7 @@ class Tournament(object):
return games_data, payoffs_data return games_data, payoffs_data
def doudizhu_tournament(game, agents, names, num): def doudizhu_tournament(game, agents, names, num):
import rlcard
env = rlcard.make(game, config={'allow_raw_data': True}) env = rlcard.make(game, config={'allow_raw_data': True})
print(env.reset())
print(env.step(87, False))
exit()
env.set_agents(agents) env.set_agents(agents)
payoffs = [] payoffs = []
json_data = [] json_data = []
@ -83,9 +70,8 @@ def doudizhu_tournament(game, agents, names, num):
roles = ['landlord', 'peasant', 'peasant'] roles = ['landlord', 'peasant', 'peasant']
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)] data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
state, player_id = env.reset() state, player_id = env.reset()
#perfect = env.get_perfect_information() perfect = env.get_perfect_information()
#data['initHands'] = perfect['hand_cards'] data['initHands'] = perfect['hand_cards_with_suit']
data['initHands'] =[cards2str(env.game.players[i].current_hand) for i in range(env.player_num)]
data['moveHistory'] = [] data['moveHistory'] = []
while not env.is_over(): while not env.is_over():
action, probs = env.agents[player_id].eval_step(state) action, probs = env.agents[player_id].eval_step(state)
@ -97,12 +83,9 @@ def doudizhu_tournament(game, agents, names, num):
history['move'] = env._decode_action(action) history['move'] = env._decode_action(action)
data['moveHistory'].append(history) data['moveHistory'].append(history)
print(action, player_id, env.agents[player_id].use_raw)
state, player_id = env.step(action, env.agents[player_id].use_raw) state, player_id = env.step(action, env.agents[player_id].use_raw)
data = json.dumps(data) data = json.dumps(data)
#data = json.dumps(data, indent=2, sort_keys=True) #data = json.dumps(data, indent=2, sort_keys=True)
print(data)
exit()
json_data.append(data) json_data.append(data)
if env.get_payoffs()[0] > 0: if env.get_payoffs()[0] > 0:
wins.append(True) wins.append(True)