rlcard-showdown/tournaments/pretrained_models.py

''' Wrrapers of pretrained models. Designed for Tensorflow.
'''

import os
import tensorflow as tf

import rlcard
from rlcard.agents.nfsp_agent import NFSPAgent
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.agents.cfr_agent import CFRAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.models.model import Model
from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1

class LeducHoldemRuleModel(Model):
    ''' Leduc holdem Rule Model version 1
    '''

    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem', allow_raw_data=True)

        rule_agent = LeducholdemRuleAgentV1()
        self.rule_agents = [rule_agent for _ in range(env.player_num)]

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.rule_agents

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return True

class LeducHoldemRandomModel(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
        self.agent = RandomAgent(action_num=env.action_num)

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return False

class LeducHoldemCFRModel(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
        self.agent = CFRAgent(env, model_path=model_path)
        self.agent.load()  # If we have saved model, we first load the model
        
        #self.agents = [self.agent, self.agent]

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return False

class LeducHoldemDQNModel2(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path
        

        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            agent = DQNAgent(self.sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_size=int(1e5),
                        replay_memory_init_size=1000,
                        state_shape=env.state_shape,
                        mlp_layers=[128, 128])
            self.dqn_agents = [agent, agent]
            self.sess.run(tf.global_variables_initializer())

        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.dqn_agents

class LeducHoldemDQNModel1(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path
        

        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            agent = DQNAgent(self.sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_size=int(1e5),
                        replay_memory_init_size=1000,
                        state_shape=env.state_shape,
                        mlp_layers=[8, 8])
            self.dqn_agents = [agent, agent]
            self.sess.run(tf.global_variables_initializer())

        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.dqn_agents


class LeducHoldemNFSPModel(Model):
    ''' A pretrained model on Leduc Holdem with NFSP
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path

        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            self.nfsp_agents = []
            for i in range(env.player_num):
                agent = NFSPAgent(self.sess,
                                  scope='nfsp' + str(i),
                                  action_num=env.action_num,
                                  state_shape=env.state_shape,
                                  hidden_layers_sizes=[128,128],
                                  q_mlp_layers=[128,128],
                                  evaluate_with='best_response'
                                  )
                self.nfsp_agents.append(agent)
            self.sess.run(tf.global_variables_initializer())

        check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.nfsp_agents
add tournament 2020-03-06 10:10:15 +08:00			`''' Wrrapers of pretrained models. Designed for Tensorflow.`
			`'''`

			`import os`
			`import tensorflow as tf`

			`import rlcard`
			`from rlcard.agents.nfsp_agent import NFSPAgent`
			`from rlcard.agents.dqn_agent import DQNAgent`
			`from rlcard.agents.cfr_agent import CFRAgent`
			`from rlcard.agents.random_agent import RandomAgent`
			`from rlcard.models.model import Model`
			`from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1`

			`class LeducHoldemRuleModel(Model):`
			`''' Leduc holdem Rule Model version 1`
			`'''`

			`def __init__(self):`
			`''' Load pretrained model`
			`'''`
			`env = rlcard.make('leduc-holdem', allow_raw_data=True)`

			`rule_agent = LeducholdemRuleAgentV1()`
			`self.rule_agents = [rule_agent for _ in range(env.player_num)]`

			`@property`
			`def agents(self):`
			`''' Get a list of agents for each position in a the game`

			`Returns:`
			`agents (list): A list of agents`

			`Note: Each agent should be just like RL agent with step and eval_step`
			`functioning well.`
			`'''`
			`return self.rule_agents`

			`@property`
			`def use_raw(self):`
			`''' Indicate whether use raw state and action`

			`Returns:`
			`use_raw (boolean): True if using raw state and action`
			`'''`
			`return True`

			`class LeducHoldemRandomModel(Model):`
			`''' A pretrained model on Leduc Holdem with DQN`
			`'''`

			`def __init__(self, root_path):`
			`''' Load pretrained model`
			`'''`
			`env = rlcard.make('leduc-holdem')`
			`model_path = os.path.join(root_path, 'leduc_holdem_cfr')`
			`self.agent = RandomAgent(action_num=env.action_num)`

			`@property`
			`def agents(self):`
			`''' Get a list of agents for each position in a the game`

			`Returns:`
			`agents (list): A list of agents`

			`Note: Each agent should be just like RL agent with step and eval_step`
			`functioning well.`
			`'''`
			`return [self.agent, self.agent]`

			`@property`
			`def use_raw(self):`
			`''' Indicate whether use raw state and action`

			`Returns:`
			`use_raw (boolean): True if using raw state and action`
			`'''`
			`return False`

			`class LeducHoldemCFRModel(Model):`
			`''' A pretrained model on Leduc Holdem with DQN`
			`'''`

			`def __init__(self, root_path):`
			`''' Load pretrained model`
			`'''`
			`env = rlcard.make('leduc-holdem')`
			`model_path = os.path.join(root_path, 'leduc_holdem_cfr')`
			`self.agent = CFRAgent(env, model_path=model_path)`
			`self.agent.load() # If we have saved model, we first load the model`

			`#self.agents = [self.agent, self.agent]`

			`@property`
			`def agents(self):`
			`''' Get a list of agents for each position in a the game`

			`Returns:`
			`agents (list): A list of agents`

			`Note: Each agent should be just like RL agent with step and eval_step`
			`functioning well.`
			`'''`
			`return [self.agent, self.agent]`

			`@property`
			`def use_raw(self):`
			`''' Indicate whether use raw state and action`

			`Returns:`
			`use_raw (boolean): True if using raw state and action`
			`'''`
			`return False`

			`class LeducHoldemDQNModel2(Model):`
			`''' A pretrained model on Leduc Holdem with DQN`
			`'''`

			`def __init__(self, root_path):`
			`''' Load pretrained model`
			`'''`
			`self.graph = tf.Graph()`
			`self.sess = tf.Session(graph=self.graph)`
			`self.root_path = root_path`


			`env = rlcard.make('leduc-holdem')`
			`with self.graph.as_default():`
			`agent = DQNAgent(self.sess,`
			`scope='dqn',`
			`action_num=env.action_num,`
			`replay_memory_size=int(1e5),`
			`replay_memory_init_size=1000,`
			`state_shape=env.state_shape,`
			`mlp_layers=[128, 128])`
			`self.dqn_agents = [agent, agent]`
			`self.sess.run(tf.global_variables_initializer())`

			`check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')`
			`with self.sess.as_default():`
			`with self.graph.as_default():`
			`saver = tf.train.Saver(tf.model_variables())`
			`saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))`
			`@property`
			`def agents(self):`
			`''' Get a list of agents for each position in a the game`

			`Returns:`
			`agents (list): A list of agents`

			`Note: Each agent should be just like RL agent with step and eval_step`
			`functioning well.`
			`'''`
			`return self.dqn_agents`

			`class LeducHoldemDQNModel1(Model):`
			`''' A pretrained model on Leduc Holdem with DQN`
			`'''`

			`def __init__(self, root_path):`
			`''' Load pretrained model`
			`'''`
			`self.graph = tf.Graph()`
			`self.sess = tf.Session(graph=self.graph)`
			`self.root_path = root_path`


			`env = rlcard.make('leduc-holdem')`
			`with self.graph.as_default():`
			`agent = DQNAgent(self.sess,`
			`scope='dqn',`
			`action_num=env.action_num,`
			`replay_memory_size=int(1e5),`
			`replay_memory_init_size=1000,`
			`state_shape=env.state_shape,`
			`mlp_layers=[8, 8])`
			`self.dqn_agents = [agent, agent]`
			`self.sess.run(tf.global_variables_initializer())`

			`check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')`
			`with self.sess.as_default():`
			`with self.graph.as_default():`
			`saver = tf.train.Saver(tf.model_variables())`
			`saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))`
			`@property`
			`def agents(self):`
			`''' Get a list of agents for each position in a the game`

			`Returns:`
			`agents (list): A list of agents`

			`Note: Each agent should be just like RL agent with step and eval_step`
			`functioning well.`
			`'''`
			`return self.dqn_agents`



			`class LeducHoldemNFSPModel(Model):`
			`''' A pretrained model on Leduc Holdem with NFSP`
			`'''`

			`def __init__(self, root_path):`
			`''' Load pretrained model`
			`'''`
			`self.graph = tf.Graph()`
			`self.sess = tf.Session(graph=self.graph)`
			`self.root_path = root_path`

			`env = rlcard.make('leduc-holdem')`
			`with self.graph.as_default():`
			`self.nfsp_agents = []`
			`for i in range(env.player_num):`
			`agent = NFSPAgent(self.sess,`
			`scope='nfsp' + str(i),`
			`action_num=env.action_num,`
			`state_shape=env.state_shape,`
			`hidden_layers_sizes=[128,128],`
			`q_mlp_layers=[128,128],`
			`evaluate_with='best_response'`
			`)`
			`self.nfsp_agents.append(agent)`
			`self.sess.run(tf.global_variables_initializer())`

			`check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')`
			`with self.sess.as_default():`
			`with self.graph.as_default():`
			`saver = tf.train.Saver(tf.model_variables())`
			`saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))`
			`@property`
			`def agents(self):`
			`''' Get a list of agents for each position in a the game`

			`Returns:`
			`agents (list): A list of agents`

			`Note: Each agent should be just like RL agent with step and eval_step`
			`functioning well.`
			`'''`
			`return self.nfsp_agents`