add tournament
This commit is contained in:
parent
fd0f4517ee
commit
26cf44d15a
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
<EFBFBD>M<>&.
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
<EFBFBD>M<>.
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
model_checkpoint_path: "model"
|
||||
all_model_checkpoint_paths: "model"
|
|
@ -0,0 +1,2 @@
|
|||
model_checkpoint_path: "model"
|
||||
all_model_checkpoint_paths: "model"
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
model_checkpoint_path: "model"
|
||||
all_model_checkpoint_paths: "model"
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
model_checkpoint_path: "model"
|
||||
all_model_checkpoint_paths: "model"
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,240 @@
|
|||
''' Wrrapers of pretrained models. Designed for Tensorflow.
|
||||
'''
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
|
||||
import rlcard
|
||||
from rlcard.agents.nfsp_agent import NFSPAgent
|
||||
from rlcard.agents.dqn_agent import DQNAgent
|
||||
from rlcard.agents.cfr_agent import CFRAgent
|
||||
from rlcard.agents.random_agent import RandomAgent
|
||||
from rlcard.models.model import Model
|
||||
from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1
|
||||
|
||||
class LeducHoldemRuleModel(Model):
|
||||
''' Leduc holdem Rule Model version 1
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
''' Load pretrained model
|
||||
'''
|
||||
env = rlcard.make('leduc-holdem', allow_raw_data=True)
|
||||
|
||||
rule_agent = LeducholdemRuleAgentV1()
|
||||
self.rule_agents = [rule_agent for _ in range(env.player_num)]
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return self.rule_agents
|
||||
|
||||
@property
|
||||
def use_raw(self):
|
||||
''' Indicate whether use raw state and action
|
||||
|
||||
Returns:
|
||||
use_raw (boolean): True if using raw state and action
|
||||
'''
|
||||
return True
|
||||
|
||||
class LeducHoldemRandomModel(Model):
|
||||
''' A pretrained model on Leduc Holdem with DQN
|
||||
'''
|
||||
|
||||
def __init__(self, root_path):
|
||||
''' Load pretrained model
|
||||
'''
|
||||
env = rlcard.make('leduc-holdem')
|
||||
model_path = os.path.join(root_path, 'leduc_holdem_cfr')
|
||||
self.agent = RandomAgent(action_num=env.action_num)
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return [self.agent, self.agent]
|
||||
|
||||
@property
|
||||
def use_raw(self):
|
||||
''' Indicate whether use raw state and action
|
||||
|
||||
Returns:
|
||||
use_raw (boolean): True if using raw state and action
|
||||
'''
|
||||
return False
|
||||
|
||||
class LeducHoldemCFRModel(Model):
|
||||
''' A pretrained model on Leduc Holdem with DQN
|
||||
'''
|
||||
|
||||
def __init__(self, root_path):
|
||||
''' Load pretrained model
|
||||
'''
|
||||
env = rlcard.make('leduc-holdem')
|
||||
model_path = os.path.join(root_path, 'leduc_holdem_cfr')
|
||||
self.agent = CFRAgent(env, model_path=model_path)
|
||||
self.agent.load() # If we have saved model, we first load the model
|
||||
|
||||
#self.agents = [self.agent, self.agent]
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return [self.agent, self.agent]
|
||||
|
||||
@property
|
||||
def use_raw(self):
|
||||
''' Indicate whether use raw state and action
|
||||
|
||||
Returns:
|
||||
use_raw (boolean): True if using raw state and action
|
||||
'''
|
||||
return False
|
||||
|
||||
class LeducHoldemDQNModel2(Model):
|
||||
''' A pretrained model on Leduc Holdem with DQN
|
||||
'''
|
||||
|
||||
def __init__(self, root_path):
|
||||
''' Load pretrained model
|
||||
'''
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
self.root_path = root_path
|
||||
|
||||
|
||||
env = rlcard.make('leduc-holdem')
|
||||
with self.graph.as_default():
|
||||
agent = DQNAgent(self.sess,
|
||||
scope='dqn',
|
||||
action_num=env.action_num,
|
||||
replay_memory_size=int(1e5),
|
||||
replay_memory_init_size=1000,
|
||||
state_shape=env.state_shape,
|
||||
mlp_layers=[128, 128])
|
||||
self.dqn_agents = [agent, agent]
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
|
||||
check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
|
||||
with self.sess.as_default():
|
||||
with self.graph.as_default():
|
||||
saver = tf.train.Saver(tf.model_variables())
|
||||
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return self.dqn_agents
|
||||
|
||||
class LeducHoldemDQNModel1(Model):
|
||||
''' A pretrained model on Leduc Holdem with DQN
|
||||
'''
|
||||
|
||||
def __init__(self, root_path):
|
||||
''' Load pretrained model
|
||||
'''
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
self.root_path = root_path
|
||||
|
||||
|
||||
env = rlcard.make('leduc-holdem')
|
||||
with self.graph.as_default():
|
||||
agent = DQNAgent(self.sess,
|
||||
scope='dqn',
|
||||
action_num=env.action_num,
|
||||
replay_memory_size=int(1e5),
|
||||
replay_memory_init_size=1000,
|
||||
state_shape=env.state_shape,
|
||||
mlp_layers=[8, 8])
|
||||
self.dqn_agents = [agent, agent]
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
|
||||
check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
|
||||
with self.sess.as_default():
|
||||
with self.graph.as_default():
|
||||
saver = tf.train.Saver(tf.model_variables())
|
||||
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return self.dqn_agents
|
||||
|
||||
|
||||
|
||||
class LeducHoldemNFSPModel(Model):
|
||||
''' A pretrained model on Leduc Holdem with NFSP
|
||||
'''
|
||||
|
||||
def __init__(self, root_path):
|
||||
''' Load pretrained model
|
||||
'''
|
||||
self.graph = tf.Graph()
|
||||
self.sess = tf.Session(graph=self.graph)
|
||||
self.root_path = root_path
|
||||
|
||||
env = rlcard.make('leduc-holdem')
|
||||
with self.graph.as_default():
|
||||
self.nfsp_agents = []
|
||||
for i in range(env.player_num):
|
||||
agent = NFSPAgent(self.sess,
|
||||
scope='nfsp' + str(i),
|
||||
action_num=env.action_num,
|
||||
state_shape=env.state_shape,
|
||||
hidden_layers_sizes=[128,128],
|
||||
q_mlp_layers=[128,128],
|
||||
evaluate_with='best_response'
|
||||
)
|
||||
self.nfsp_agents.append(agent)
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
|
||||
check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
|
||||
with self.sess.as_default():
|
||||
with self.graph.as_default():
|
||||
saver = tf.train.Saver(tf.model_variables())
|
||||
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
|
||||
@property
|
||||
def agents(self):
|
||||
''' Get a list of agents for each position in a the game
|
||||
|
||||
Returns:
|
||||
agents (list): A list of agents
|
||||
|
||||
Note: Each agent should be just like RL agent with step and eval_step
|
||||
functioning well.
|
||||
'''
|
||||
return self.nfsp_agents
|
|
@ -0,0 +1,120 @@
|
|||
import os
|
||||
import json
|
||||
import tensorflow as tf
|
||||
import sys
|
||||
import rlcard
|
||||
from tqdm import tqdm
|
||||
from rlcard.agents.nfsp_agent import NFSPAgent
|
||||
from rlcard.agents.dqn_agent import DQNAgent
|
||||
from math import log10
|
||||
# from rlcard.agents.random_agent import RandomAgent
|
||||
from rlcard.utils.utils import set_global_seed
|
||||
from rlcard.utils.logger import Logger
|
||||
from pretrained_models import LeducHoldemDQNModel1, LeducHoldemNFSPModel, LeducHoldemCFRModel, LeducHoldemRandomModel, LeducHoldemRuleModel, LeducHoldemDQNModel2
|
||||
|
||||
class Tournament(object):
|
||||
|
||||
def __init__(self,
|
||||
agent1,
|
||||
agent2,
|
||||
agent3,
|
||||
agent4,
|
||||
agent5,
|
||||
env_id,
|
||||
evaluate_num=10000):
|
||||
|
||||
set_global_seed(0)
|
||||
self.env_id = env_id
|
||||
self.env1 = rlcard.make(env_id, allow_raw_data=True)
|
||||
self.env2 = rlcard.make(env_id, allow_raw_data=True)
|
||||
self.env3 = rlcard.make(env_id, allow_raw_data=True)
|
||||
self.agent1 = agent1.agents[0]
|
||||
self.agent2 = agent2.agents[0]
|
||||
self.agent3 = agent3.agents[0]
|
||||
self.agent4 = agent4.agents[0]
|
||||
self.agent5 = agent5.agents[0]
|
||||
self.evaluate_num = evaluate_num
|
||||
self.env1.set_agents([self.agent1, self.agent2])
|
||||
self.env2.set_agents([self.agent1, self.agent3])
|
||||
self.env3.set_agents([self.agent1, self.agent4])
|
||||
|
||||
|
||||
def competition(self):
|
||||
|
||||
agent1_wins = 0
|
||||
agent2_wins = 0
|
||||
print("########## Play Against Random Agent ##########")
|
||||
for eval_episode in tqdm(range(self.evaluate_num)):
|
||||
_, payoffs = self.env1.run(is_training=False)
|
||||
|
||||
agent1_wins += payoffs[0]
|
||||
agent2_wins += payoffs[1]
|
||||
|
||||
agent1_rate = agent1_wins / self.evaluate_num
|
||||
agent2_rate = agent2_wins / self.evaluate_num
|
||||
print("DQN Agent average peroformance:", agent1_rate)
|
||||
print("Random Agent avgerage performance:", agent2_rate)
|
||||
print("\n")
|
||||
|
||||
print("########## Play Against Rule-based Agent ##########")
|
||||
agent1_wins = 0
|
||||
agent2_wins = 0
|
||||
for eval_episode in tqdm(range(self.evaluate_num)):
|
||||
_, payoffs = self.env2.run(is_training=False)
|
||||
|
||||
agent1_wins += payoffs[0]
|
||||
agent2_wins += payoffs[1]
|
||||
|
||||
agent1_rate = agent1_wins / self.evaluate_num
|
||||
agent2_rate = agent2_wins / self.evaluate_num
|
||||
print("DQN Agent average peroformance:", agent1_rate)
|
||||
print("Rule-based Agent avgerage performance:", agent2_rate)
|
||||
print("\n")
|
||||
|
||||
agent1_wins = 0
|
||||
agent2_wins = 0
|
||||
print("########## Play Against CFR Agent ##########")
|
||||
for eval_episode in tqdm(range(self.evaluate_num)):
|
||||
_, payoffs = self.env2.run(is_training=False)
|
||||
|
||||
agent1_wins += payoffs[0]
|
||||
agent2_wins += payoffs[1]
|
||||
|
||||
agent1_rate = agent1_wins / self.evaluate_num
|
||||
agent2_rate = agent2_wins / self.evaluate_num
|
||||
print("DQN Agent average peroformance:", agent1_rate)
|
||||
print("CFR Agent avgerage performance:", agent2_rate)
|
||||
|
||||
def evaluate(self):
|
||||
agents = [self.agent1, self.agent2, self.agent3, self.agent4, self.agent5]
|
||||
for a1 in agents:
|
||||
avg_performance = 0.0
|
||||
print("########### Evaluating "+ str(a1) +" #########")
|
||||
for a2 in agents:
|
||||
if a1 == a2:
|
||||
continue
|
||||
agent1_wins = 0
|
||||
env = rlcard.make(self.env_id, allow_raw_data=True)
|
||||
env.set_agents([a1, a2])
|
||||
for eval_episode in range(self.evaluate_num):
|
||||
_, payoffs = env.run(is_training=False)
|
||||
agent1_wins += payoffs[0]
|
||||
agent1_rate = agent1_wins / self.evaluate_num
|
||||
if agent1_rate > 0:
|
||||
avg_performance += 1.0
|
||||
print("Against "+str(a2)+":", agent1_rate)
|
||||
avg_performance /= len(agents)-1
|
||||
print("Average Performance:", avg_performance)
|
||||
print("\n")
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
root_path = './models'
|
||||
agent1 = LeducHoldemDQNModel1(root_path)
|
||||
agent2 = LeducHoldemRandomModel(root_path)
|
||||
agent3 = LeducHoldemRuleModel()
|
||||
agent4 = LeducHoldemCFRModel(root_path)
|
||||
agent5 = LeducHoldemDQNModel2(root_path)
|
||||
t = Tournament(agent1, agent2, agent3, agent4, agent5, 'leduc-holdem')
|
||||
#t.competition()
|
||||
t.evaluate()
|
Loading…
Reference in New Issue