rlcard-showdown/tournaments/pretrained_models.py

''' Wrrapers of pretrained models. Designed for Tensorflow.
'''

import os
import tensorflow as tf

import rlcard
from rlcard.agents.nfsp_agent import NFSPAgent
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.agents.cfr_agent import CFRAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.models.model import Model
from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1

class LeducHoldemRuleModel(Model):
    ''' Leduc holdem Rule Model version 1
    '''

    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem', allow_raw_data=True)

        rule_agent = LeducholdemRuleAgentV1()
        self.rule_agents = [rule_agent for _ in range(env.player_num)]

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.rule_agents

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return True

class LeducHoldemRandomModel(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
        self.agent = RandomAgent(action_num=env.action_num)

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return False

class LeducHoldemCFRModel(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
        self.agent = CFRAgent(env, model_path=model_path)
        self.agent.load()  # If we have saved model, we first load the model

        #self.agents = [self.agent, self.agent]

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return False

class LeducHoldemDQNModel2(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path


        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            agent = DQNAgent(self.sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_size=int(1e5),
                        replay_memory_init_size=1000,
                        state_shape=env.state_shape,
                        mlp_layers=[128, 128])
            self.dqn_agents = [agent, agent]
            self.sess.run(tf.global_variables_initializer())

        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.dqn_agents

class LeducHoldemDQNModel1(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path


        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            agent = DQNAgent(self.sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_size=int(1e5),
                        replay_memory_init_size=1000,
                        state_shape=env.state_shape,
                        mlp_layers=[8, 8])
            self.dqn_agents = [agent, agent]
            self.sess.run(tf.global_variables_initializer())

        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.dqn_agents


class LeducHoldemNFSPModel(Model):
    ''' A pretrained model on Leduc Holdem with NFSP
    '''

    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path

        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            self.nfsp_agents = []
            for i in range(env.player_num):
                agent = NFSPAgent(self.sess,
                                  scope='nfsp' + str(i),
                                  action_num=env.action_num,
                                  state_shape=env.state_shape,
                                  hidden_layers_sizes=[128,128],
                                  q_mlp_layers=[128,128],
                                  evaluate_with='best_response'
                                  )
                self.nfsp_agents.append(agent)
            self.sess.run(tf.global_variables_initializer())

        check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.nfsp_agents