Fix Dou Dizhu issues
This commit is contained in:
parent
796d6f46e3
commit
babb55b520
|
@ -24,3 +24,4 @@ yarn-error.log*
|
||||||
|
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
__pycache__
|
__pycache__
|
||||||
|
*.swp
|
||||||
|
|
10
README.md
10
README.md
|
@ -45,7 +45,15 @@ The definitions of the fields are as follows:
|
||||||
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
|
| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs |
|
||||||
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models |
|
||||||
|
|
||||||
|
## Regitered Models
|
||||||
|
Some models have been pre-registered as baselines
|
||||||
|
| Model | Game | Description |
|
||||||
|
|----------------------|--------------|---------------------------------------|
|
||||||
|
| leduc-holdem-random | leduc-holdem | A random model |
|
||||||
|
| leduc-holdem-cfr | leduc-holdem | Pre-trained CFR model on Leduc Holdem |
|
||||||
|
| leduc-holdem-rule-v1 | leduc-holdem | A rule model that plays greedily |
|
||||||
|
| doudizhu-random | doudizhu | A random model |
|
||||||
|
| doudizhu-rule-v1 | doudizhu | Dou Dizhu rule model |
|
||||||
|
|
||||||
|
|
||||||
# Others
|
# Others
|
||||||
|
|
|
@ -19,6 +19,6 @@ MODEL_IDS['leduc-holdem'] = [
|
||||||
|
|
||||||
MODEL_IDS['doudizhu'] = [
|
MODEL_IDS['doudizhu'] = [
|
||||||
'doudizhu-random',
|
'doudizhu-random',
|
||||||
'doudizhu-random',
|
'doudizhu-rule-v1',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -5,15 +5,6 @@ import numpy as np
|
||||||
|
|
||||||
from .rlcard_wrap import rlcard
|
from .rlcard_wrap import rlcard
|
||||||
|
|
||||||
def cards2str(cards):
|
|
||||||
response = ''
|
|
||||||
for card in cards:
|
|
||||||
if card.rank == '':
|
|
||||||
response += card.suit[0]
|
|
||||||
else:
|
|
||||||
response += card.rank
|
|
||||||
return response
|
|
||||||
|
|
||||||
class Tournament(object):
|
class Tournament(object):
|
||||||
|
|
||||||
def __init__(self, game, model_ids, evaluate_num=100):
|
def __init__(self, game, model_ids, evaluate_num=100):
|
||||||
|
@ -69,11 +60,7 @@ class Tournament(object):
|
||||||
return games_data, payoffs_data
|
return games_data, payoffs_data
|
||||||
|
|
||||||
def doudizhu_tournament(game, agents, names, num):
|
def doudizhu_tournament(game, agents, names, num):
|
||||||
import rlcard
|
|
||||||
env = rlcard.make(game, config={'allow_raw_data': True})
|
env = rlcard.make(game, config={'allow_raw_data': True})
|
||||||
print(env.reset())
|
|
||||||
print(env.step(87, False))
|
|
||||||
exit()
|
|
||||||
env.set_agents(agents)
|
env.set_agents(agents)
|
||||||
payoffs = []
|
payoffs = []
|
||||||
json_data = []
|
json_data = []
|
||||||
|
@ -83,9 +70,8 @@ def doudizhu_tournament(game, agents, names, num):
|
||||||
roles = ['landlord', 'peasant', 'peasant']
|
roles = ['landlord', 'peasant', 'peasant']
|
||||||
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
|
data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
|
||||||
state, player_id = env.reset()
|
state, player_id = env.reset()
|
||||||
#perfect = env.get_perfect_information()
|
perfect = env.get_perfect_information()
|
||||||
#data['initHands'] = perfect['hand_cards']
|
data['initHands'] = perfect['hand_cards_with_suit']
|
||||||
data['initHands'] =[cards2str(env.game.players[i].current_hand) for i in range(env.player_num)]
|
|
||||||
data['moveHistory'] = []
|
data['moveHistory'] = []
|
||||||
while not env.is_over():
|
while not env.is_over():
|
||||||
action, probs = env.agents[player_id].eval_step(state)
|
action, probs = env.agents[player_id].eval_step(state)
|
||||||
|
@ -97,12 +83,9 @@ def doudizhu_tournament(game, agents, names, num):
|
||||||
history['move'] = env._decode_action(action)
|
history['move'] = env._decode_action(action)
|
||||||
|
|
||||||
data['moveHistory'].append(history)
|
data['moveHistory'].append(history)
|
||||||
print(action, player_id, env.agents[player_id].use_raw)
|
|
||||||
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
state, player_id = env.step(action, env.agents[player_id].use_raw)
|
||||||
data = json.dumps(data)
|
data = json.dumps(data)
|
||||||
#data = json.dumps(data, indent=2, sort_keys=True)
|
#data = json.dumps(data, indent=2, sort_keys=True)
|
||||||
print(data)
|
|
||||||
exit()
|
|
||||||
json_data.append(data)
|
json_data.append(data)
|
||||||
if env.get_payoffs()[0] > 0:
|
if env.get_payoffs()[0] > 0:
|
||||||
wins.append(True)
|
wins.append(True)
|
||||||
|
|
Loading…
Reference in New Issue