241 lines
7.5 KiB
Python
241 lines
7.5 KiB
Python
|
''' Wrrapers of pretrained models. Designed for Tensorflow.
|
||
|
'''
|
||
|
|
||
|
import os
|
||
|
import tensorflow as tf
|
||
|
|
||
|
import rlcard
|
||
|
from rlcard.agents.nfsp_agent import NFSPAgent
|
||
|
from rlcard.agents.dqn_agent import DQNAgent
|
||
|
from rlcard.agents.cfr_agent import CFRAgent
|
||
|
from rlcard.agents.random_agent import RandomAgent
|
||
|
from rlcard.models.model import Model
|
||
|
from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1
|
||
|
|
||
|
class LeducHoldemRuleModel(Model):
|
||
|
''' Leduc holdem Rule Model version 1
|
||
|
'''
|
||
|
|
||
|
def __init__(self):
|
||
|
''' Load pretrained model
|
||
|
'''
|
||
|
env = rlcard.make('leduc-holdem', allow_raw_data=True)
|
||
|
|
||
|
rule_agent = LeducholdemRuleAgentV1()
|
||
|
self.rule_agents = [rule_agent for _ in range(env.player_num)]
|
||
|
|
||
|
@property
|
||
|
def agents(self):
|
||
|
''' Get a list of agents for each position in a the game
|
||
|
|
||
|
Returns:
|
||
|
agents (list): A list of agents
|
||
|
|
||
|
Note: Each agent should be just like RL agent with step and eval_step
|
||
|
functioning well.
|
||
|
'''
|
||
|
return self.rule_agents
|
||
|
|
||
|
@property
|
||
|
def use_raw(self):
|
||
|
''' Indicate whether use raw state and action
|
||
|
|
||
|
Returns:
|
||
|
use_raw (boolean): True if using raw state and action
|
||
|
'''
|
||
|
return True
|
||
|
|
||
|
class LeducHoldemRandomModel(Model):
|
||
|
''' A pretrained model on Leduc Holdem with DQN
|
||
|
'''
|
||
|
|
||
|
def __init__(self, root_path):
|
||
|
''' Load pretrained model
|
||
|
'''
|
||
|
env = rlcard.make('leduc-holdem')
|
||
|
model_path = os.path.join(root_path, 'leduc_holdem_cfr')
|
||
|
self.agent = RandomAgent(action_num=env.action_num)
|
||
|
|
||
|
@property
|
||
|
def agents(self):
|
||
|
''' Get a list of agents for each position in a the game
|
||
|
|
||
|
Returns:
|
||
|
agents (list): A list of agents
|
||
|
|
||
|
Note: Each agent should be just like RL agent with step and eval_step
|
||
|
functioning well.
|
||
|
'''
|
||
|
return [self.agent, self.agent]
|
||
|
|
||
|
@property
|
||
|
def use_raw(self):
|
||
|
''' Indicate whether use raw state and action
|
||
|
|
||
|
Returns:
|
||
|
use_raw (boolean): True if using raw state and action
|
||
|
'''
|
||
|
return False
|
||
|
|
||
|
class LeducHoldemCFRModel(Model):
|
||
|
''' A pretrained model on Leduc Holdem with DQN
|
||
|
'''
|
||
|
|
||
|
def __init__(self, root_path):
|
||
|
''' Load pretrained model
|
||
|
'''
|
||
|
env = rlcard.make('leduc-holdem')
|
||
|
model_path = os.path.join(root_path, 'leduc_holdem_cfr')
|
||
|
self.agent = CFRAgent(env, model_path=model_path)
|
||
|
self.agent.load() # If we have saved model, we first load the model
|
||
|
|
||
|
#self.agents = [self.agent, self.agent]
|
||
|
|
||
|
@property
|
||
|
def agents(self):
|
||
|
''' Get a list of agents for each position in a the game
|
||
|
|
||
|
Returns:
|
||
|
agents (list): A list of agents
|
||
|
|
||
|
Note: Each agent should be just like RL agent with step and eval_step
|
||
|
functioning well.
|
||
|
'''
|
||
|
return [self.agent, self.agent]
|
||
|
|
||
|
@property
|
||
|
def use_raw(self):
|
||
|
''' Indicate whether use raw state and action
|
||
|
|
||
|
Returns:
|
||
|
use_raw (boolean): True if using raw state and action
|
||
|
'''
|
||
|
return False
|
||
|
|
||
|
class LeducHoldemDQNModel2(Model):
|
||
|
''' A pretrained model on Leduc Holdem with DQN
|
||
|
'''
|
||
|
|
||
|
def __init__(self, root_path):
|
||
|
''' Load pretrained model
|
||
|
'''
|
||
|
self.graph = tf.Graph()
|
||
|
self.sess = tf.Session(graph=self.graph)
|
||
|
self.root_path = root_path
|
||
|
|
||
|
|
||
|
env = rlcard.make('leduc-holdem')
|
||
|
with self.graph.as_default():
|
||
|
agent = DQNAgent(self.sess,
|
||
|
scope='dqn',
|
||
|
action_num=env.action_num,
|
||
|
replay_memory_size=int(1e5),
|
||
|
replay_memory_init_size=1000,
|
||
|
state_shape=env.state_shape,
|
||
|
mlp_layers=[128, 128])
|
||
|
self.dqn_agents = [agent, agent]
|
||
|
self.sess.run(tf.global_variables_initializer())
|
||
|
|
||
|
check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
|
||
|
with self.sess.as_default():
|
||
|
with self.graph.as_default():
|
||
|
saver = tf.train.Saver(tf.model_variables())
|
||
|
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
|
||
|
@property
|
||
|
def agents(self):
|
||
|
''' Get a list of agents for each position in a the game
|
||
|
|
||
|
Returns:
|
||
|
agents (list): A list of agents
|
||
|
|
||
|
Note: Each agent should be just like RL agent with step and eval_step
|
||
|
functioning well.
|
||
|
'''
|
||
|
return self.dqn_agents
|
||
|
|
||
|
class LeducHoldemDQNModel1(Model):
|
||
|
''' A pretrained model on Leduc Holdem with DQN
|
||
|
'''
|
||
|
|
||
|
def __init__(self, root_path):
|
||
|
''' Load pretrained model
|
||
|
'''
|
||
|
self.graph = tf.Graph()
|
||
|
self.sess = tf.Session(graph=self.graph)
|
||
|
self.root_path = root_path
|
||
|
|
||
|
|
||
|
env = rlcard.make('leduc-holdem')
|
||
|
with self.graph.as_default():
|
||
|
agent = DQNAgent(self.sess,
|
||
|
scope='dqn',
|
||
|
action_num=env.action_num,
|
||
|
replay_memory_size=int(1e5),
|
||
|
replay_memory_init_size=1000,
|
||
|
state_shape=env.state_shape,
|
||
|
mlp_layers=[8, 8])
|
||
|
self.dqn_agents = [agent, agent]
|
||
|
self.sess.run(tf.global_variables_initializer())
|
||
|
|
||
|
check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
|
||
|
with self.sess.as_default():
|
||
|
with self.graph.as_default():
|
||
|
saver = tf.train.Saver(tf.model_variables())
|
||
|
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
|
||
|
@property
|
||
|
def agents(self):
|
||
|
''' Get a list of agents for each position in a the game
|
||
|
|
||
|
Returns:
|
||
|
agents (list): A list of agents
|
||
|
|
||
|
Note: Each agent should be just like RL agent with step and eval_step
|
||
|
functioning well.
|
||
|
'''
|
||
|
return self.dqn_agents
|
||
|
|
||
|
|
||
|
|
||
|
class LeducHoldemNFSPModel(Model):
|
||
|
''' A pretrained model on Leduc Holdem with NFSP
|
||
|
'''
|
||
|
|
||
|
def __init__(self, root_path):
|
||
|
''' Load pretrained model
|
||
|
'''
|
||
|
self.graph = tf.Graph()
|
||
|
self.sess = tf.Session(graph=self.graph)
|
||
|
self.root_path = root_path
|
||
|
|
||
|
env = rlcard.make('leduc-holdem')
|
||
|
with self.graph.as_default():
|
||
|
self.nfsp_agents = []
|
||
|
for i in range(env.player_num):
|
||
|
agent = NFSPAgent(self.sess,
|
||
|
scope='nfsp' + str(i),
|
||
|
action_num=env.action_num,
|
||
|
state_shape=env.state_shape,
|
||
|
hidden_layers_sizes=[128,128],
|
||
|
q_mlp_layers=[128,128],
|
||
|
evaluate_with='best_response'
|
||
|
)
|
||
|
self.nfsp_agents.append(agent)
|
||
|
self.sess.run(tf.global_variables_initializer())
|
||
|
|
||
|
check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
|
||
|
with self.sess.as_default():
|
||
|
with self.graph.as_default():
|
||
|
saver = tf.train.Saver(tf.model_variables())
|
||
|
saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
|
||
|
@property
|
||
|
def agents(self):
|
||
|
''' Get a list of agents for each position in a the game
|
||
|
|
||
|
Returns:
|
||
|
agents (list): A list of agents
|
||
|
|
||
|
Note: Each agent should be just like RL agent with step and eval_step
|
||
|
functioning well.
|
||
|
'''
|
||
|
return self.nfsp_agents
|