diff --git a/.gitignore b/.gitignore index b0e9c03..a046d76 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ yarn-debug.log* yarn-error.log* db.sqlite3 +__pycache__ diff --git a/README.md b/README.md index 947f202..863fddc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ **NOTE: This project is under final tesing. The one in the repo only supports the visualization of some sampled data. The full version will be available soon!** -# Server Setup +# Django Server +## Server Setup Install dependencies: ``` pip install -r requirements.txt @@ -17,7 +18,7 @@ python manage.py runserver ``` The default URL is [http://127.0.0.1:8000/](http://127.0.0.1:8000/) -# REST API +## REST API The definitions of the fields are as follows: * `eval_num`: Integer. The number of evaluation times. * `name`: String. The name of the environment. @@ -34,6 +35,19 @@ The definitions of the fields are as follows: | GET | tournament/query\_payoff | `name`, `agent0`, `agent1`, `payoff` | Query the payoffs with the given parameters | | GET | tournament/replay | `name`, `agent0`, `agent1`, `index` | Return the replay data (only support Leduc Holdem for now) | +## Example API +| API | Description | +|-----------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------| +| http://127.0.0.1:8000/tournamentlaunch?eval_num=200&name=leduc-holdem | Evaluate on Leduc Holdem with 200 games for each pair of models | +| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain teh data of the 3rd game | +| http://127.0.0.1:8000/tournament/query_game | Get all the game data | +| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem | Get all the game data of Leduc Holdem | +| http://127.0.0.1:8000/tournament/query_payoff | Get all the payoffs | +| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1 | Get all the payoffs between rule and CFR models | + + + + # Others This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). diff --git a/server/server/__pycache__/__init__.cpython-35.pyc b/server/server/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 68c34cf..0000000 Binary files a/server/server/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/server/server/__pycache__/settings.cpython-35.pyc b/server/server/__pycache__/settings.cpython-35.pyc deleted file mode 100644 index e7383c0..0000000 Binary files a/server/server/__pycache__/settings.cpython-35.pyc and /dev/null differ diff --git a/server/server/__pycache__/urls.cpython-35.pyc b/server/server/__pycache__/urls.cpython-35.pyc deleted file mode 100644 index a7ae457..0000000 Binary files a/server/server/__pycache__/urls.cpython-35.pyc and /dev/null differ diff --git a/server/server/__pycache__/wsgi.cpython-35.pyc b/server/server/__pycache__/wsgi.cpython-35.pyc deleted file mode 100644 index 8e99008..0000000 Binary files a/server/server/__pycache__/wsgi.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/.views.py.swp b/server/tournament/.views.py.swp deleted file mode 100644 index 3278d92..0000000 Binary files a/server/tournament/.views.py.swp and /dev/null differ diff --git a/server/tournament/__pycache__/__init__.cpython-35.pyc b/server/tournament/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 5c65b9c..0000000 Binary files a/server/tournament/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/__pycache__/admin.cpython-35.pyc b/server/tournament/__pycache__/admin.cpython-35.pyc deleted file mode 100644 index a912275..0000000 Binary files a/server/tournament/__pycache__/admin.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/__pycache__/models.cpython-35.pyc b/server/tournament/__pycache__/models.cpython-35.pyc deleted file mode 100644 index ff93652..0000000 Binary files a/server/tournament/__pycache__/models.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/__pycache__/tournament.cpython-35.pyc b/server/tournament/__pycache__/tournament.cpython-35.pyc deleted file mode 100644 index 1a72ad4..0000000 Binary files a/server/tournament/__pycache__/tournament.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/__pycache__/urls.cpython-35.pyc b/server/tournament/__pycache__/urls.cpython-35.pyc deleted file mode 100644 index 3c9280e..0000000 Binary files a/server/tournament/__pycache__/urls.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/__pycache__/views.cpython-35.pyc b/server/tournament/__pycache__/views.cpython-35.pyc deleted file mode 100644 index 1825b4e..0000000 Binary files a/server/tournament/__pycache__/views.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/migrations/__pycache__/0001_initial.cpython-35.pyc b/server/tournament/migrations/__pycache__/0001_initial.cpython-35.pyc deleted file mode 100644 index 97ef20d..0000000 Binary files a/server/tournament/migrations/__pycache__/0001_initial.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/migrations/__pycache__/__init__.cpython-35.pyc b/server/tournament/migrations/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 6aa2f0d..0000000 Binary files a/server/tournament/migrations/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/rlcard_wrap/__init__.py b/server/tournament/rlcard_wrap/__init__.py index 5bf495f..721f55e 100644 --- a/server/tournament/rlcard_wrap/__init__.py +++ b/server/tournament/rlcard_wrap/__init__.py @@ -1,10 +1,14 @@ import rlcard from .leduc_holdem_random_model import LeducHoldemRandomModelSpec +from .doudizhu_random_model import DoudizhuRandomModelSpec # Register Leduc Holdem Random Model rlcard.models.registration.model_registry.model_specs['leduc-holdem-random'] = LeducHoldemRandomModelSpec() +# Register Doudizhu Random Model +rlcard.models.registration.model_registry.model_specs['doudizhu-random'] = DoudizhuRandomModelSpec() + # The models we are concerned MODEL_IDS = {} MODEL_IDS['leduc-holdem'] = [ @@ -13,3 +17,8 @@ MODEL_IDS['leduc-holdem'] = [ 'leduc-holdem-rule-v1', ] +MODEL_IDS['doudizhu'] = [ + 'doudizhu-random', + 'doudizhu-random', + ] + diff --git a/server/tournament/rlcard_wrap/__pycache__/__init__.cpython-35.pyc b/server/tournament/rlcard_wrap/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 7fb4e1f..0000000 Binary files a/server/tournament/rlcard_wrap/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/rlcard_wrap/__pycache__/leduc_holdem_random_model.cpython-35.pyc b/server/tournament/rlcard_wrap/__pycache__/leduc_holdem_random_model.cpython-35.pyc deleted file mode 100644 index bf08cfe..0000000 Binary files a/server/tournament/rlcard_wrap/__pycache__/leduc_holdem_random_model.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/rlcard_wrap/__pycache__/tournament.cpython-35.pyc b/server/tournament/rlcard_wrap/__pycache__/tournament.cpython-35.pyc deleted file mode 100644 index c728737..0000000 Binary files a/server/tournament/rlcard_wrap/__pycache__/tournament.cpython-35.pyc and /dev/null differ diff --git a/server/tournament/rlcard_wrap/doudizhu_random_model.py b/server/tournament/rlcard_wrap/doudizhu_random_model.py new file mode 100644 index 0000000..70ab55c --- /dev/null +++ b/server/tournament/rlcard_wrap/doudizhu_random_model.py @@ -0,0 +1,46 @@ +# A wrap for rlcard +# Here, we include a random model as the default baseline +import rlcard +from rlcard.agents import RandomAgent +from rlcard.models.model import Model + +class DoudizhuRandomModelSpec(object): + def __init__(self): + self.model_id = 'doudizhu-random' + self._entry_point = DoudizhuRandomModel + + def load(self): + model = self._entry_point() + return model + +class DoudizhuRandomModel(Model): + ''' A random model + ''' + + def __init__(self): + ''' Load random model + ''' + env = rlcard.make('doudizhu') + self.agent = RandomAgent(action_num=env.action_num) + self.player_num = env.player_num + + @property + def agents(self): + ''' Get a list of agents for each position in a the game + + Returns: + agents (list): A list of agents + + Note: Each agent should be just like RL agent with step and eval_step + functioning well. + ''' + return [self.agent for _ in range(self.player_num)] + + @property + def use_raw(self): + ''' Indicate whether use raw state and action + + Returns: + use_raw (boolean): True if using raw state and action + ''' + return False diff --git a/server/tournament/tournament.py b/server/tournament/tournament.py index 0b52a84..fb99668 100644 --- a/server/tournament/tournament.py +++ b/server/tournament/tournament.py @@ -5,10 +5,19 @@ import numpy as np from .rlcard_wrap import rlcard +def cards2str(cards): + response = '' + for card in cards: + if card.rank == '': + response += card.suit[0] + else: + response += card.rank + return response + class Tournament(object): def __init__(self, game, model_ids, evaluate_num=100): - """ Defalt for two player games + """ Default for two player games For Dou Dizhu, the two peasants use the same model """ self.game = game @@ -28,7 +37,13 @@ class Tournament(object): if j == i: continue print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j]) - data, payoffs, wins = tournament(self.game, [self.models[i].agents[0], self.models[j].agents[1]], self.evaluate_num) + if self.game == 'doudizhu': + agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]] + names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]] + data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num) + elif self.game == 'leduc-holdem': + agents = [self.models[i].agents[0], self.models[j].agents[1]] + data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num) mean_payoff = np.mean(payoffs) print('Average payoff:', mean_payoff) print() @@ -53,7 +68,50 @@ class Tournament(object): payoffs_data.append(payoff_data) return games_data, payoffs_data -def tournament(game, agents, num): +def doudizhu_tournament(game, agents, names, num): + import rlcard + env = rlcard.make(game, config={'allow_raw_data': True}) + print(env.reset()) + print(env.step(87, False)) + exit() + env.set_agents(agents) + payoffs = [] + json_data = [] + wins = [] + for _ in tqdm(range(num)): + data = {} + roles = ['landlord', 'peasant', 'peasant'] + data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)] + state, player_id = env.reset() + #perfect = env.get_perfect_information() + #data['initHands'] = perfect['hand_cards'] + data['initHands'] =[cards2str(env.game.players[i].current_hand) for i in range(env.player_num)] + data['moveHistory'] = [] + while not env.is_over(): + action, probs = env.agents[player_id].eval_step(state) + history = {} + history['playerIdx'] = player_id + if env.agents[player_id].use_raw: + history['move'] = action + else: + history['move'] = env._decode_action(action) + + data['moveHistory'].append(history) + print(action, player_id, env.agents[player_id].use_raw) + state, player_id = env.step(action, env.agents[player_id].use_raw) + data = json.dumps(data) + #data = json.dumps(data, indent=2, sort_keys=True) + print(data) + exit() + json_data.append(data) + if env.get_payoffs()[0] > 0: + wins.append(True) + else: + wins.append(False) + payoffs.append(env.get_payoffs()[0]) + return json_data, payoffs, wins + +def leduc_holdem_tournament(game, agents, num): env = rlcard.make(game, config={'allow_raw_data': True}) env.set_agents(agents) payoffs = [] @@ -65,7 +123,9 @@ def tournament(game, agents, num): state, player_id = env.reset() perfect = env.get_perfect_information() data['initHands'] = perfect['hand_cards'] - data['moveHistory'] = [[]] + data['moveHistory'] = [] + round_history = [] + round_id = 0 while not env.is_over(): action, probs = env.agents[player_id].eval_step(state) history = {} @@ -85,9 +145,16 @@ def tournament(game, agents, num): p = -1 probabilities.append({'move':a, 'probability': p}) history['probabilities'] = probabilities - data['moveHistory'][0].append(history) + round_history.append(history) + perfect = env.get_perfect_information() + if round_id < perfect['current_round']: + round_id = perfect['current_round'] + data['moveHistory'].append(round_history) + round_history = [] state, player_id = env.step(action, env.agents[player_id].use_raw) perfect = env.get_perfect_information() + if round_id < perfect['current_round']: + data['moveHistory'].append(round_history) data['publicCard'] = perfect['public_card'] data = json.dumps(data) #data = json.dumps(data, indent=2, sort_keys=True)