121 lines
4.4 KiB
Python
121 lines
4.4 KiB
Python
|
import os
|
||
|
import json
|
||
|
import tensorflow as tf
|
||
|
import sys
|
||
|
import rlcard
|
||
|
from tqdm import tqdm
|
||
|
from rlcard.agents.nfsp_agent import NFSPAgent
|
||
|
from rlcard.agents.dqn_agent import DQNAgent
|
||
|
from math import log10
|
||
|
# from rlcard.agents.random_agent import RandomAgent
|
||
|
from rlcard.utils.utils import set_global_seed
|
||
|
from rlcard.utils.logger import Logger
|
||
|
from pretrained_models import LeducHoldemDQNModel1, LeducHoldemNFSPModel, LeducHoldemCFRModel, LeducHoldemRandomModel, LeducHoldemRuleModel, LeducHoldemDQNModel2
|
||
|
|
||
|
class Tournament(object):
|
||
|
|
||
|
def __init__(self,
|
||
|
agent1,
|
||
|
agent2,
|
||
|
agent3,
|
||
|
agent4,
|
||
|
agent5,
|
||
|
env_id,
|
||
|
evaluate_num=10000):
|
||
|
|
||
|
set_global_seed(0)
|
||
|
self.env_id = env_id
|
||
|
self.env1 = rlcard.make(env_id, allow_raw_data=True)
|
||
|
self.env2 = rlcard.make(env_id, allow_raw_data=True)
|
||
|
self.env3 = rlcard.make(env_id, allow_raw_data=True)
|
||
|
self.agent1 = agent1.agents[0]
|
||
|
self.agent2 = agent2.agents[0]
|
||
|
self.agent3 = agent3.agents[0]
|
||
|
self.agent4 = agent4.agents[0]
|
||
|
self.agent5 = agent5.agents[0]
|
||
|
self.evaluate_num = evaluate_num
|
||
|
self.env1.set_agents([self.agent1, self.agent2])
|
||
|
self.env2.set_agents([self.agent1, self.agent3])
|
||
|
self.env3.set_agents([self.agent1, self.agent4])
|
||
|
|
||
|
|
||
|
def competition(self):
|
||
|
|
||
|
agent1_wins = 0
|
||
|
agent2_wins = 0
|
||
|
print("########## Play Against Random Agent ##########")
|
||
|
for eval_episode in tqdm(range(self.evaluate_num)):
|
||
|
_, payoffs = self.env1.run(is_training=False)
|
||
|
|
||
|
agent1_wins += payoffs[0]
|
||
|
agent2_wins += payoffs[1]
|
||
|
|
||
|
agent1_rate = agent1_wins / self.evaluate_num
|
||
|
agent2_rate = agent2_wins / self.evaluate_num
|
||
|
print("DQN Agent average peroformance:", agent1_rate)
|
||
|
print("Random Agent avgerage performance:", agent2_rate)
|
||
|
print("\n")
|
||
|
|
||
|
print("########## Play Against Rule-based Agent ##########")
|
||
|
agent1_wins = 0
|
||
|
agent2_wins = 0
|
||
|
for eval_episode in tqdm(range(self.evaluate_num)):
|
||
|
_, payoffs = self.env2.run(is_training=False)
|
||
|
|
||
|
agent1_wins += payoffs[0]
|
||
|
agent2_wins += payoffs[1]
|
||
|
|
||
|
agent1_rate = agent1_wins / self.evaluate_num
|
||
|
agent2_rate = agent2_wins / self.evaluate_num
|
||
|
print("DQN Agent average peroformance:", agent1_rate)
|
||
|
print("Rule-based Agent avgerage performance:", agent2_rate)
|
||
|
print("\n")
|
||
|
|
||
|
agent1_wins = 0
|
||
|
agent2_wins = 0
|
||
|
print("########## Play Against CFR Agent ##########")
|
||
|
for eval_episode in tqdm(range(self.evaluate_num)):
|
||
|
_, payoffs = self.env2.run(is_training=False)
|
||
|
|
||
|
agent1_wins += payoffs[0]
|
||
|
agent2_wins += payoffs[1]
|
||
|
|
||
|
agent1_rate = agent1_wins / self.evaluate_num
|
||
|
agent2_rate = agent2_wins / self.evaluate_num
|
||
|
print("DQN Agent average peroformance:", agent1_rate)
|
||
|
print("CFR Agent avgerage performance:", agent2_rate)
|
||
|
|
||
|
def evaluate(self):
|
||
|
agents = [self.agent1, self.agent2, self.agent3, self.agent4, self.agent5]
|
||
|
for a1 in agents:
|
||
|
avg_performance = 0.0
|
||
|
print("########### Evaluating "+ str(a1) +" #########")
|
||
|
for a2 in agents:
|
||
|
if a1 == a2:
|
||
|
continue
|
||
|
agent1_wins = 0
|
||
|
env = rlcard.make(self.env_id, allow_raw_data=True)
|
||
|
env.set_agents([a1, a2])
|
||
|
for eval_episode in range(self.evaluate_num):
|
||
|
_, payoffs = env.run(is_training=False)
|
||
|
agent1_wins += payoffs[0]
|
||
|
agent1_rate = agent1_wins / self.evaluate_num
|
||
|
if agent1_rate > 0:
|
||
|
avg_performance += 1.0
|
||
|
print("Against "+str(a2)+":", agent1_rate)
|
||
|
avg_performance /= len(agents)-1
|
||
|
print("Average Performance:", avg_performance)
|
||
|
print("\n")
|
||
|
|
||
|
|
||
|
if __name__=='__main__':
|
||
|
root_path = './models'
|
||
|
agent1 = LeducHoldemDQNModel1(root_path)
|
||
|
agent2 = LeducHoldemRandomModel(root_path)
|
||
|
agent3 = LeducHoldemRuleModel()
|
||
|
agent4 = LeducHoldemCFRModel(root_path)
|
||
|
agent5 = LeducHoldemDQNModel2(root_path)
|
||
|
t = Tournament(agent1, agent2, agent3, agent4, agent5, 'leduc-holdem')
|
||
|
#t.competition()
|
||
|
t.evaluate()
|