rlcard-showdown/tournaments/pretrained_models.py

241 lines
7.5 KiB
Python

''' Wrrapers of pretrained models. Designed for Tensorflow.
'''
import os
import tensorflow as tf
import rlcard
from rlcard.agents.nfsp_agent import NFSPAgent
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.agents.cfr_agent import CFRAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.models.model import Model
from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1
class LeducHoldemRuleModel(Model):
''' Leduc holdem Rule Model version 1
'''
def __init__(self):
''' Load pretrained model
'''
env = rlcard.make('leduc-holdem', allow_raw_data=True)
rule_agent = LeducholdemRuleAgentV1()
self.rule_agents = [rule_agent for _ in range(env.player_num)]
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return self.rule_agents
@property
def use_raw(self):
''' Indicate whether use raw state and action
Returns:
use_raw (boolean): True if using raw state and action
'''
return True
class LeducHoldemRandomModel(Model):
''' A pretrained model on Leduc Holdem with DQN
'''
def __init__(self, root_path):
''' Load pretrained model
'''
env = rlcard.make('leduc-holdem')
model_path = os.path.join(root_path, 'leduc_holdem_cfr')
self.agent = RandomAgent(action_num=env.action_num)
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return [self.agent, self.agent]
@property
def use_raw(self):
''' Indicate whether use raw state and action
Returns:
use_raw (boolean): True if using raw state and action
'''
return False
class LeducHoldemCFRModel(Model):
''' A pretrained model on Leduc Holdem with DQN
'''
def __init__(self, root_path):
''' Load pretrained model
'''
env = rlcard.make('leduc-holdem')
model_path = os.path.join(root_path, 'leduc_holdem_cfr')
self.agent = CFRAgent(env, model_path=model_path)
self.agent.load() # If we have saved model, we first load the model
#self.agents = [self.agent, self.agent]
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return [self.agent, self.agent]
@property
def use_raw(self):
''' Indicate whether use raw state and action
Returns:
use_raw (boolean): True if using raw state and action
'''
return False
class LeducHoldemDQNModel2(Model):
''' A pretrained model on Leduc Holdem with DQN
'''
def __init__(self, root_path):
''' Load pretrained model
'''
self.graph = tf.Graph()
self.sess = tf.Session(graph=self.graph)
self.root_path = root_path
env = rlcard.make('leduc-holdem')
with self.graph.as_default():
agent = DQNAgent(self.sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=int(1e5),
replay_memory_init_size=1000,
state_shape=env.state_shape,
mlp_layers=[128, 128])
self.dqn_agents = [agent, agent]
self.sess.run(tf.global_variables_initializer())
check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
with self.sess.as_default():
with self.graph.as_default():
saver = tf.train.Saver(tf.model_variables())
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return self.dqn_agents
class LeducHoldemDQNModel1(Model):
''' A pretrained model on Leduc Holdem with DQN
'''
def __init__(self, root_path):
''' Load pretrained model
'''
self.graph = tf.Graph()
self.sess = tf.Session(graph=self.graph)
self.root_path = root_path
env = rlcard.make('leduc-holdem')
with self.graph.as_default():
agent = DQNAgent(self.sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=int(1e5),
replay_memory_init_size=1000,
state_shape=env.state_shape,
mlp_layers=[8, 8])
self.dqn_agents = [agent, agent]
self.sess.run(tf.global_variables_initializer())
check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
with self.sess.as_default():
with self.graph.as_default():
saver = tf.train.Saver(tf.model_variables())
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return self.dqn_agents
class LeducHoldemNFSPModel(Model):
''' A pretrained model on Leduc Holdem with NFSP
'''
def __init__(self, root_path):
''' Load pretrained model
'''
self.graph = tf.Graph()
self.sess = tf.Session(graph=self.graph)
self.root_path = root_path
env = rlcard.make('leduc-holdem')
with self.graph.as_default():
self.nfsp_agents = []
for i in range(env.player_num):
agent = NFSPAgent(self.sess,
scope='nfsp' + str(i),
action_num=env.action_num,
state_shape=env.state_shape,
hidden_layers_sizes=[128,128],
q_mlp_layers=[128,128],
evaluate_with='best_response'
)
self.nfsp_agents.append(agent)
self.sess.run(tf.global_variables_initializer())
check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
with self.sess.as_default():
with self.graph.as_default():
saver = tf.train.Saver(tf.model_variables())
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return self.nfsp_agents