Refinement

This commit is contained in:
Daochen 2020-05-13 13:08:00 -05:00
parent 131607a3f9
commit 1a48af157d
3 changed files with 10 additions and 90 deletions

2
.gitignore vendored
View File

@ -25,4 +25,4 @@ yarn-error.log*
db.sqlite3 db.sqlite3
__pycache__ __pycache__
*.swp *.swp
/uploaded_agents uploaded_agents

View File

@ -1,84 +0,0 @@
''' Leduc Hold 'em rule model
'''
import rlcard
from rlcard.models.model import Model
class LeducHoldemRuleAgentV2(object):
''' Leduc Hold 'em Rule agent version 2
'''
def __init__(self):
self.use_raw = True
def step(self, state):
''' Predict the action when given raw state. A simple rule-based AI.
Args:
state (dict): Raw state from the game
Returns:
action (str): Predicted action
'''
legal_actions = state['raw_legal_actions']
state = state['raw_obs']
hand = state['hand']
public_card = state['public_card']
action = 'fold'
'''
When having only 2 hand cards at the game start, choose fold to drop terrible cards:
Acceptable hand cards:
Pairs
AK, AQ, AJ, AT
A9s, A8s, ... A2s(s means flush)
KQ, KJ, QJ, JT
Fold all hand types except those mentioned above to save money
'''
if public_card:
if public_card[1] == hand[1]:
action = 'raise'
else:
action = 'fold'
else:
if hand[0] == 'K':
action = 'raise'
elif hand[0] == 'Q':
action = 'check'
else:
action = 'fold'
#return action
if action in legal_actions:
return action
else:
if action == 'raise':
return 'call'
if action == 'check':
return 'fold'
if action == 'call':
return 'raise'
else:
return action
def eval_step(self, state):
return self.step(state), []
class LeducHoldemRuleModelV2(Model):
''' Leduc holdem Rule Model version 2
'''
def __init__(self):
''' Load pretrained model
'''
env = rlcard.make('leduc-holdem')
rule_agent = LeducHoldemRuleAgentV2()
self.rule_agents = [rule_agent for _ in range(env.player_num)]
@property
def agents(self):
''' Get a list of agents for each position in a the game
Returns:
agents (list): A list of agents
Note: Each agent should be just like RL agent with step and eval_step
functioning well.
'''
return self.rule_agents

View File

@ -16,7 +16,7 @@ from .models import Game, Payoff, UploadedAgent
from .tournament import Tournament from .tournament import Tournament
def reset_model_ids(): def _reset_model_ids():
from .rlcard_wrap import rlcard, MODEL_IDS from .rlcard_wrap import rlcard, MODEL_IDS
agents = UploadedAgent.objects.all() agents = UploadedAgent.objects.all()
for agent in agents: for agent in agents:
@ -69,8 +69,12 @@ def query_payoff(request):
@transaction.atomic @transaction.atomic
def launch(request): def launch(request):
if request.method == 'GET': if request.method == 'GET':
eval_num = int(request.GET['eval_num']) try:
game = request.GET['name'] eval_num = int(request.GET['eval_num'])
game = request.GET['name']
except:
return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))
games_data, payoffs_data = Tournament(game, MODEL_IDS[game], eval_num).launch() games_data, payoffs_data = Tournament(game, MODEL_IDS[game], eval_num).launch()
Game.objects.filter(name=game).delete() Game.objects.filter(name=game).delete()
Payoff.objects.filter(name=game).delete() Payoff.objects.filter(name=game).delete()
@ -103,7 +107,7 @@ def upload_agent(request):
a = UploadedAgent(name=name, game=game, f=f, entry=entry) a = UploadedAgent(name=name, game=game, f=f, entry=entry)
a.save() a.save()
reset_model_ids() _reset_model_ids()
return HttpResponse(json.dumps({'value': 0, 'info': 'success'})) return HttpResponse(json.dumps({'value': 0, 'info': 'success'}))
def delete_agent(request): def delete_agent(request):
@ -113,7 +117,7 @@ def delete_agent(request):
return HttpResponse(json.dumps({'value': -1, 'info': 'name not exists'})) return HttpResponse(json.dumps({'value': -1, 'info': 'name not exists'}))
UploadedAgent.objects.filter(name=name).delete() UploadedAgent.objects.filter(name=name).delete()
reset_model_ids() _reset_model_ids()
return HttpResponse(json.dumps({'value': 0, 'info': 'success'})) return HttpResponse(json.dumps({'value': 0, 'info': 'success'}))
def list_agents(request): def list_agents(request):