add tournament

2020-03-05 20:10:15 -06:00 · 2020-03-05 20:10:15 -06:00 · 26cf44d15a
parent fd0f4517ee
commit 26cf44d15a
28 changed files with 370 additions and 0 deletions
--- a/tournaments/init.py
+++ b/tournaments/init.py
--- a/tournaments/pycache/pretrained_models.cpython-35.pyc
+++ b/tournaments/pycache/pretrained_models.cpython-35.pyc
--- a/tournaments/models/leduc_holdem_cfr/average_policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/average_policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/iteration.pkl
+++ b/tournaments/models/leduc_holdem_cfr/iteration.pkl
@ -0,0 +1 @@
+<EFBFBD>M<>&.
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/average_policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/average_policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/iteration.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/iteration.pkl
@ -0,0 +1 @@
+<EFBFBD>M<>.
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/regrets.pkl
+++ b/tournaments/models/leduc_holdem_cfr/leduc_holdem_cfr/regrets.pkl
--- a/tournaments/models/leduc_holdem_cfr/policy.pkl
+++ b/tournaments/models/leduc_holdem_cfr/policy.pkl
--- a/tournaments/models/leduc_holdem_cfr/regrets.pkl
+++ b/tournaments/models/leduc_holdem_cfr/regrets.pkl
--- a/tournaments/models/leduc_holdem_dqn/checkpoint
+++ b/tournaments/models/leduc_holdem_dqn/checkpoint
@ -0,0 +1,2 @@
+model_checkpoint_path: "model"
+all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/checkpoint
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/checkpoint
@ -0,0 +1,2 @@
+model_checkpoint_path: "model"
+all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.index
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.index
--- a/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.meta
+++ b/tournaments/models/leduc_holdem_dqn/leduc_holdem_dqn/model.meta
--- a/tournaments/models/leduc_holdem_dqn/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_dqn/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_dqn/model.index
+++ b/tournaments/models/leduc_holdem_dqn/model.index
--- a/tournaments/models/leduc_holdem_dqn/model.meta
+++ b/tournaments/models/leduc_holdem_dqn/model.meta
--- a/tournaments/models/leduc_holdem_dqn_bad/checkpoint
+++ b/tournaments/models/leduc_holdem_dqn_bad/checkpoint
@ -0,0 +1,2 @@
+model_checkpoint_path: "model"
+all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_dqn_bad/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_dqn_bad/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_dqn_bad/model.index
+++ b/tournaments/models/leduc_holdem_dqn_bad/model.index
--- a/tournaments/models/leduc_holdem_dqn_bad/model.meta
+++ b/tournaments/models/leduc_holdem_dqn_bad/model.meta
--- a/tournaments/models/leduc_holdem_nfsp/checkpoint
+++ b/tournaments/models/leduc_holdem_nfsp/checkpoint
@ -0,0 +1,2 @@
+model_checkpoint_path: "model"
+all_model_checkpoint_paths: "model"
--- a/tournaments/models/leduc_holdem_nfsp/model.data-00000-of-00001
+++ b/tournaments/models/leduc_holdem_nfsp/model.data-00000-of-00001
--- a/tournaments/models/leduc_holdem_nfsp/model.index
+++ b/tournaments/models/leduc_holdem_nfsp/model.index
--- a/tournaments/models/leduc_holdem_nfsp/model.meta
+++ b/tournaments/models/leduc_holdem_nfsp/model.meta
--- a/tournaments/pretrained_models.py
+++ b/tournaments/pretrained_models.py
@ -0,0 +1,240 @@
+''' Wrrapers of pretrained models. Designed for Tensorflow.
+'''
+
+import os
+import tensorflow as tf
+
+import rlcard
+from rlcard.agents.nfsp_agent import NFSPAgent
+from rlcard.agents.dqn_agent import DQNAgent
+from rlcard.agents.cfr_agent import CFRAgent
+from rlcard.agents.random_agent import RandomAgent
+from rlcard.models.model import Model
+from rlcard.models.leducholdem_rule_models import LeducholdemRuleAgentV1
+
+class LeducHoldemRuleModel(Model):
+    ''' Leduc holdem Rule Model version 1
+    '''
+
+    def __init__(self):
+        ''' Load pretrained model
+        '''
+        env = rlcard.make('leduc-holdem', allow_raw_data=True)
+
+        rule_agent = LeducholdemRuleAgentV1()
+        self.rule_agents = [rule_agent for _ in range(env.player_num)]
+
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return self.rule_agents
+
+    @property
+    def use_raw(self):
+        ''' Indicate whether use raw state and action
+
+        Returns:
+            use_raw (boolean): True if using raw state and action
+        '''
+        return True
+
+class LeducHoldemRandomModel(Model):
+    ''' A pretrained model on Leduc Holdem with DQN
+    '''
+
+    def __init__(self, root_path):
+        ''' Load pretrained model
+        '''
+        env = rlcard.make('leduc-holdem')
+        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
+        self.agent = RandomAgent(action_num=env.action_num)
+
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return [self.agent, self.agent]
+
+    @property
+    def use_raw(self):
+        ''' Indicate whether use raw state and action
+
+        Returns:
+            use_raw (boolean): True if using raw state and action
+        '''
+        return False
+
+class LeducHoldemCFRModel(Model):
+    ''' A pretrained model on Leduc Holdem with DQN
+    '''
+
+    def __init__(self, root_path):
+        ''' Load pretrained model
+        '''
+        env = rlcard.make('leduc-holdem')
+        model_path = os.path.join(root_path, 'leduc_holdem_cfr')
+        self.agent = CFRAgent(env, model_path=model_path)
+        self.agent.load()  # If we have saved model, we first load the model
+        
+        #self.agents = [self.agent, self.agent]
+
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return [self.agent, self.agent]
+
+    @property
+    def use_raw(self):
+        ''' Indicate whether use raw state and action
+
+        Returns:
+            use_raw (boolean): True if using raw state and action
+        '''
+        return False
+
+class LeducHoldemDQNModel2(Model):
+    ''' A pretrained model on Leduc Holdem with DQN
+    '''
+
+    def __init__(self, root_path):
+        ''' Load pretrained model
+        '''
+        self.graph = tf.Graph()
+        self.sess = tf.Session(graph=self.graph)
+        self.root_path = root_path
+        
+
+        env = rlcard.make('leduc-holdem')
+        with self.graph.as_default():
+            agent = DQNAgent(self.sess,
+                        scope='dqn',
+                        action_num=env.action_num,
+                        replay_memory_size=int(1e5),
+                        replay_memory_init_size=1000,
+                        state_shape=env.state_shape,
+                        mlp_layers=[128, 128])
+            self.dqn_agents = [agent, agent]
+            self.sess.run(tf.global_variables_initializer())
+
+        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn')
+        with self.sess.as_default():
+            with self.graph.as_default():
+                saver = tf.train.Saver(tf.model_variables())
+                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return self.dqn_agents
+
+class LeducHoldemDQNModel1(Model):
+    ''' A pretrained model on Leduc Holdem with DQN
+    '''
+
+    def __init__(self, root_path):
+        ''' Load pretrained model
+        '''
+        self.graph = tf.Graph()
+        self.sess = tf.Session(graph=self.graph)
+        self.root_path = root_path
+        
+
+        env = rlcard.make('leduc-holdem')
+        with self.graph.as_default():
+            agent = DQNAgent(self.sess,
+                        scope='dqn',
+                        action_num=env.action_num,
+                        replay_memory_size=int(1e5),
+                        replay_memory_init_size=1000,
+                        state_shape=env.state_shape,
+                        mlp_layers=[8, 8])
+            self.dqn_agents = [agent, agent]
+            self.sess.run(tf.global_variables_initializer())
+
+        check_point_path = os.path.join(self.root_path, 'leduc_holdem_dqn_bad')
+        with self.sess.as_default():
+            with self.graph.as_default():
+                saver = tf.train.Saver(tf.model_variables())
+                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return self.dqn_agents
+
+
+
+class LeducHoldemNFSPModel(Model):
+    ''' A pretrained model on Leduc Holdem with NFSP
+    '''
+
+    def __init__(self, root_path):
+        ''' Load pretrained model
+        '''
+        self.graph = tf.Graph()
+        self.sess = tf.Session(graph=self.graph)
+        self.root_path = root_path
+
+        env = rlcard.make('leduc-holdem')
+        with self.graph.as_default():
+            self.nfsp_agents = []
+            for i in range(env.player_num):
+                agent = NFSPAgent(self.sess,
+                                  scope='nfsp' + str(i),
+                                  action_num=env.action_num,
+                                  state_shape=env.state_shape,
+                                  hidden_layers_sizes=[128,128],
+                                  q_mlp_layers=[128,128],
+                                  evaluate_with='best_response'
+                                  )
+                self.nfsp_agents.append(agent)
+            self.sess.run(tf.global_variables_initializer())
+
+        check_point_path = os.path.join(self.root_path, 'leduc_holdem_nfsp')
+        with self.sess.as_default():
+            with self.graph.as_default():
+                saver = tf.train.Saver(tf.model_variables())
+                saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return self.nfsp_agents
--- a/tournaments/tournament.py
+++ b/tournaments/tournament.py
@ -0,0 +1,120 @@
+import os
+import json
+import tensorflow as tf
+import sys
+import rlcard
+from tqdm import tqdm
+from rlcard.agents.nfsp_agent import NFSPAgent
+from rlcard.agents.dqn_agent import DQNAgent
+from math import log10
+# from rlcard.agents.random_agent import RandomAgent
+from rlcard.utils.utils import set_global_seed
+from rlcard.utils.logger import Logger
+from pretrained_models import LeducHoldemDQNModel1, LeducHoldemNFSPModel, LeducHoldemCFRModel, LeducHoldemRandomModel, LeducHoldemRuleModel, LeducHoldemDQNModel2
+
+class Tournament(object):
+    
+    def __init__(self, 
+                agent1, 
+                agent2, 
+                agent3,
+                agent4,
+                agent5,
+                env_id, 
+                evaluate_num=10000):
+
+        set_global_seed(0)
+        self.env_id = env_id
+        self.env1 = rlcard.make(env_id, allow_raw_data=True)
+        self.env2 = rlcard.make(env_id, allow_raw_data=True)
+        self.env3 = rlcard.make(env_id, allow_raw_data=True)
+        self.agent1 = agent1.agents[0]
+        self.agent2 = agent2.agents[0]
+        self.agent3 = agent3.agents[0]
+        self.agent4 = agent4.agents[0]
+        self.agent5 = agent5.agents[0]
+        self.evaluate_num = evaluate_num
+        self.env1.set_agents([self.agent1, self.agent2])
+        self.env2.set_agents([self.agent1, self.agent3])
+        self.env3.set_agents([self.agent1, self.agent4])
+
+
+    def competition(self):
+
+        agent1_wins = 0
+        agent2_wins = 0
+        print("########## Play Against Random Agent ##########")
+        for eval_episode in tqdm(range(self.evaluate_num)):
+            _, payoffs = self.env1.run(is_training=False)
+            
+            agent1_wins += payoffs[0]
+            agent2_wins += payoffs[1]
+
+        agent1_rate = agent1_wins / self.evaluate_num
+        agent2_rate = agent2_wins / self.evaluate_num
+        print("DQN Agent average peroformance:", agent1_rate)
+        print("Random Agent avgerage performance:", agent2_rate)
+        print("\n")
+
+        print("########## Play Against Rule-based Agent ##########")
+        agent1_wins = 0
+        agent2_wins = 0
+        for eval_episode in tqdm(range(self.evaluate_num)):
+            _, payoffs = self.env2.run(is_training=False)
+            
+            agent1_wins += payoffs[0]
+            agent2_wins += payoffs[1]
+
+        agent1_rate = agent1_wins / self.evaluate_num
+        agent2_rate = agent2_wins / self.evaluate_num
+        print("DQN Agent average peroformance:", agent1_rate)
+        print("Rule-based Agent avgerage performance:", agent2_rate)
+        print("\n")
+
+        agent1_wins = 0
+        agent2_wins = 0
+        print("########## Play Against CFR Agent ##########")
+        for eval_episode in tqdm(range(self.evaluate_num)):
+            _, payoffs = self.env2.run(is_training=False)
+            
+            agent1_wins += payoffs[0]
+            agent2_wins += payoffs[1]
+
+        agent1_rate = agent1_wins / self.evaluate_num
+        agent2_rate = agent2_wins / self.evaluate_num
+        print("DQN Agent average peroformance:", agent1_rate)
+        print("CFR Agent avgerage performance:", agent2_rate)
+
+    def evaluate(self):
+        agents = [self.agent1, self.agent2, self.agent3, self.agent4, self.agent5]
+        for a1 in agents:
+            avg_performance = 0.0
+            print("########### Evaluating "+ str(a1) +" #########")
+            for a2 in agents:
+                if a1 == a2:
+                    continue
+                agent1_wins = 0
+                env = rlcard.make(self.env_id, allow_raw_data=True)
+                env.set_agents([a1, a2])
+                for eval_episode in range(self.evaluate_num):
+                    _, payoffs = env.run(is_training=False)
+                    agent1_wins += payoffs[0]
+                agent1_rate = agent1_wins / self.evaluate_num
+                if agent1_rate > 0:
+                    avg_performance += 1.0
+                print("Against "+str(a2)+":", agent1_rate)
+            avg_performance /= len(agents)-1
+            print("Average Performance:", avg_performance)
+            print("\n")
+
+
+if __name__=='__main__':
+    root_path = './models'
+    agent1 = LeducHoldemDQNModel1(root_path)
+    agent2 = LeducHoldemRandomModel(root_path)
+    agent3 = LeducHoldemRuleModel()
+    agent4 = LeducHoldemCFRModel(root_path)
+    agent5 = LeducHoldemDQNModel2(root_path)
+    t = Tournament(agent1, agent2, agent3, agent4, agent5, 'leduc-holdem')
+    #t.competition()
+    t.evaluate()