add tournament

2020-03-05 20:10:15 -06:00 · 2020-03-05 20:10:15 -06:00 · 26cf44d15a
parent fd0f4517ee
commit 26cf44d15a
28 changed files with 370 additions and 0 deletions
--- a/tournaments/init.py
+++ b/tournaments/init.py
--- a/tournaments/pycache/pretrained_models.cpython-35.pyc
+++ b/tournaments/pycache/pretrained_models.cpython-35.pyc
--- a/tournaments/models/leduc_holdem_cfr/average_policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/average_policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/iteration.pkl
+++ b/tournaments/models/leduc_holdem_cfr/iteration.pkl
@ -0,0 +1 @@
 <EFBFBD>M<>&.
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/average_policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/average_policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/iteration.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/iteration.pkl
@ -0,0 +1 @@
 <EFBFBD>M<>.
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/regrets.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/regrets.pkl
--- a/tournaments/models/leduc_holdem_cfr/policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/regrets.pkl
+++ b/tournaments/models/leduc_holdem_cfr/regrets.pkl
--- a/tournaments/models/leduc_holdem_dqn/checkpoint
+++ b/tournaments/models/leduc_holdem_dqn/checkpoint
@ -0,0 +1,2 @@
 model_checkpoint_path: "model"
 all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/checkpoint
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/checkpoint
@ -0,0 +1,2 @@
 model_checkpoint_path: "model"
 all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.index
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.index
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.meta
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.meta
--- a/tournaments/models/leduc_holdem_dqn/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_dqn/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_dqn/model.index
+++ b/tournaments/models/leduc_holdem_dqn/model.index
--- a/tournaments/models/leduc_holdem_dqn/model.meta
+++ b/tournaments/models/leduc_holdem_dqn/model.meta
--- a/tournaments/models/leduc_holdem_dqn_bad/checkpoint
+++ b/tournaments/models/leduc_holdem_dqn_bad/checkpoint
@ -0,0 +1,2 @@
 model_checkpoint_path: "model"
 all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_dqn_bad/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_dqn_bad/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_dqn_bad/model.index
+++ b/tournaments/models/leduc_holdem_dqn_bad/model.index
--- a/tournaments/models/leduc_holdem_dqn_bad/model.meta
+++ b/tournaments/models/leduc_holdem_dqn_bad/model.meta
--- a/tournaments/models/leduc_holdem_nfsp/checkpoint
+++ b/tournaments/models/leduc_holdem_nfsp/checkpoint
@ -0,0 +1,2 @@
 model_checkpoint_path: "model"
 all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_nfsp/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_nfsp/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_nfsp/model.index
+++ b/tournaments/models/leduc_holdem_nfsp/model.index
--- a/tournaments/models/leduc_holdem_nfsp/model.meta
+++ b/tournaments/models/leduc_holdem_nfsp/model.meta
--- a/tournaments/pretrained_models.py
+++ b/tournaments/pretrained_models.py
@ -0,0 +1,240 @@
 ''' Wrrapers of pretrained models. Designed for Tensorflow.
 '''
 import os
 import tensorflow as tf
 import rlcard
 from rlcard.agents.nfsp_agent import NFSPAgent
 from rlcard.agents.dqn_agent import DQNAgent
 from rlcard.agents.cfr_agent import CFRAgent
 from rlcard.agents.random_agent import RandomAgent
 from rlcard.models.model import Model
 from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1
 class LeducHoldemRuleModel(Model):
    ''' Leduc holdem Rule Model version 1
    '''
    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem', allow_raw_data=True)
        rule_agent = LeducholdemRuleAgentV1()
        self.rule_agents = [rule_agent for _ in range(env.player_num)]
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game
        Returns:
            agents (list): A list of agents
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.rule_agents
    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action
        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return True
 class LeducHoldemRandomModel(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''
    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
        self.agent = RandomAgent(action_num=env.action_num)
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game
        Returns:
            agents (list): A list of agents
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]
    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action
        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return False
 class LeducHoldemCFRModel(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''
    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
        self.agent = CFRAgent(env, model_path=model_path)
        self.agent.load()  # If we have saved model, we first load the model
        #self.agents = [self.agent, self.agent]
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game
        Returns:
            agents (list): A list of agents
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]
    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action
        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return False
 class LeducHoldemDQNModel2(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''
    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path
        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            agent = DQNAgent(self.sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_size=int(1e5),
                        replay_memory_init_size=1000,
                        state_shape=env.state_shape,
                        mlp_layers=[128, 128])
            self.dqn_agents = [agent, agent]
            self.sess.run(tf.global_variables_initializer())
        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game
        Returns:
            agents (list): A list of agents
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.dqn_agents
 class LeducHoldemDQNModel1(Model):
    ''' A pretrained model on Leduc Holdem with DQN
    '''
    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path
        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            agent = DQNAgent(self.sess,
                        scope='dqn',
                        action_num=env.action_num,
                        replay_memory_size=int(1e5),
                        replay_memory_init_size=1000,
                        state_shape=env.state_shape,
                        mlp_layers=[8, 8])
            self.dqn_agents = [agent, agent]
            self.sess.run(tf.global_variables_initializer())
        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game
        Returns:
            agents (list): A list of agents
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.dqn_agents
 class LeducHoldemNFSPModel(Model):
    ''' A pretrained model on Leduc Holdem with NFSP
    '''
    def __init__(self, root_path):
        ''' Load pretrained model
        '''
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)
        self.root_path = root_path
        env = rlcard.make('leduc-holdem')
        with self.graph.as_default():
            self.nfsp_agents = []
            for i in range(env.player_num):
                agent = NFSPAgent(self.sess,
                                  scope='nfsp' + str(i),
                                  action_num=env.action_num,
                                  state_shape=env.state_shape,
                                  hidden_layers_sizes=[128,128],
                                  q_mlp_layers=[128,128],
                                  evaluate_with='best_response'
                                  )
                self.nfsp_agents.append(agent)
            self.sess.run(tf.global_variables_initializer())
        check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver(tf.model_variables())
                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game
        Returns:
            agents (list): A list of agents
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.nfsp_agents
--- a/tournaments/tournament.py
+++ b/tournaments/tournament.py
@ -0,0 +1,120 @@
 import os
 import json
 import tensorflow as tf
 import sys
 import rlcard
 from tqdm import tqdm
 from rlcard.agents.nfsp_agent import NFSPAgent
 from rlcard.agents.dqn_agent import DQNAgent
 from math import log10
 # from rlcard.agents.random_agent import RandomAgent
 from rlcard.utils.utils import set_global_seed
 from rlcard.utils.logger import Logger
 from pretrained_models import LeducHoldemDQNModel1, LeducHoldemNFSPModel, LeducHoldemCFRModel, LeducHoldemRandomModel, LeducHoldemRuleModel, LeducHoldemDQNModel2
 class Tournament(object):
    def __init__(self, 
                agent1, 
                agent2, 
                agent3,
                agent4,
                agent5,
                env_id, 
                evaluate_num=10000):
        set_global_seed(0)
        self.env_id = env_id
        self.env1 = rlcard.make(env_id, allow_raw_data=True)
        self.env2 = rlcard.make(env_id, allow_raw_data=True)
        self.env3 = rlcard.make(env_id, allow_raw_data=True)
        self.agent1 = agent1.agents[0]
        self.agent2 = agent2.agents[0]
        self.agent3 = agent3.agents[0]
        self.agent4 = agent4.agents[0]
        self.agent5 = agent5.agents[0]
        self.evaluate_num = evaluate_num
        self.env1.set_agents([self.agent1, self.agent2])
        self.env2.set_agents([self.agent1, self.agent3])
        self.env3.set_agents([self.agent1, self.agent4])
    def competition(self):
        agent1_wins = 0
        agent2_wins = 0
        print("########## Play Against Random Agent ##########")
        for eval_episode in tqdm(range(self.evaluate_num)):
            _, payoffs = self.env1.run(is_training=False)
            agent1_wins += payoffs[0]
            agent2_wins += payoffs[1]
        agent1_rate = agent1_wins / self.evaluate_num
        agent2_rate = agent2_wins / self.evaluate_num
        print("DQN Agent average peroformance:", agent1_rate)
        print("Random Agent avgerage performance:", agent2_rate)
        print("\n")
        print("########## Play Against Rule-based Agent ##########")
        agent1_wins = 0
        agent2_wins = 0
        for eval_episode in tqdm(range(self.evaluate_num)):
            _, payoffs = self.env2.run(is_training=False)
            agent1_wins += payoffs[0]
            agent2_wins += payoffs[1]
        agent1_rate = agent1_wins / self.evaluate_num
        agent2_rate = agent2_wins / self.evaluate_num
        print("DQN Agent average peroformance:", agent1_rate)
        print("Rule-based Agent avgerage performance:", agent2_rate)
        print("\n")
        agent1_wins = 0
        agent2_wins = 0
        print("########## Play Against CFR Agent ##########")
        for eval_episode in tqdm(range(self.evaluate_num)):
            _, payoffs = self.env2.run(is_training=False)
            agent1_wins += payoffs[0]
            agent2_wins += payoffs[1]
        agent1_rate = agent1_wins / self.evaluate_num
        agent2_rate = agent2_wins / self.evaluate_num
        print("DQN Agent average peroformance:", agent1_rate)
        print("CFR Agent avgerage performance:", agent2_rate)
    def evaluate(self):
        agents = [self.agent1, self.agent2, self.agent3, self.agent4, self.agent5]
        for a1 in agents:
            avg_performance = 0.0
            print("########### Evaluating "+ str(a1) +" #########")
            for a2 in agents:
                if a1 == a2:
                    continue
                agent1_wins = 0
                env = rlcard.make(self.env_id, allow_raw_data=True)
                env.set_agents([a1, a2])
                for eval_episode in range(self.evaluate_num):
                    _, payoffs = env.run(is_training=False)
                    agent1_wins += payoffs[0]
                agent1_rate = agent1_wins / self.evaluate_num
                if agent1_rate > 0:
                    avg_performance += 1.0
                print("Against "+str(a2)+":", agent1_rate)
            avg_performance /= len(agents)-1
            print("Average Performance:", avg_performance)
            print("\n")
 if __name__=='__main__':
    root_path = './models'
    agent1 = LeducHoldemDQNModel1(root_path)
    agent2 = LeducHoldemRandomModel(root_path)
    agent3 = LeducHoldemRuleModel()
    agent4 = LeducHoldemCFRModel(root_path)
    agent5 = LeducHoldemDQNModel2(root_path)
    t = Tournament(agent1, agent2, agent3, agent4, agent5, 'leduc-holdem')
    #t.competition()
    t.evaluate()
		`@ -0,0 +1,2 @@`
							`model_checkpoint_path: "model"`
							`all_model_checkpoint_paths: "model"`