Leduc

2020-05-11 16:42:08 -05:00 · 2020-05-11 16:42:08 -05:00 · 796d6f46e3
parent c92edef65c
commit 796d6f46e3
21 changed files with 144 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@ -23,3 +23,4 @@ yarn-debug.log*
 yarn-error.log*

 db.sqlite3
+__pycache__
--- a/README.md
+++ b/README.md
@ -1,6 +1,7 @@
 **NOTE: This project is under final tesing. The one in the repo only supports the visualization of some sampled data. The full version will be available soon!**

-# Server Setup
+# Django Server
+## Server Setup
 Install dependencies:
 ```
 pip install -r requirements.txt
@ -17,7 +18,7 @@ python manage.py runserver
 ```
 The default URL is [http://127.0.0.1:8000/](http://127.0.0.1:8000/)

-# REST API
+## REST API
 The definitions of the fields are as follows:
 *   `eval_num`: Integer. The number of evaluation times.
 *   `name`: String. The name of the environment.
@ -34,6 +35,19 @@ The definitions of the fields are as follows:
 | GET  | tournament/query\_payoff  | `name`, `agent0`, `agent1`, `payoff`                 | Query the payoffs with the given parameters                                                                        |
 | GET  | tournament/replay         | `name`, `agent0`, `agent1`, `index`                  | Return the replay data (only support Leduc Holdem for now)                                                         |

+## Example API
+| API                                                                                                                   | Description                                                                              |
+|-----------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
+| http://127.0.0.1:8000/tournamentlaunch?eval_num=200&name=leduc-holdem                                                 | Evaluate on Leduc Holdem with 200 games for each pair of models                          |
+| http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3 | Obtain the replay data between rule model and CFR model. Obtain teh data of the 3rd game |
+| http://127.0.0.1:8000/tournament/query_game                                                                           | Get all the game data                                                                    |
+| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem                                                         | Get all the game data of Leduc Holdem                                                    |
+| http://127.0.0.1:8000/tournament/query_payoff                                                                         | Get all the payoffs                                                                      |
+| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1                     | Get all the payoffs between rule and CFR models                                          |
+
+
+
+
 # Others
 This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).

--- a/server/server/pycache/init.cpython-35.pyc
+++ b/server/server/pycache/init.cpython-35.pyc
--- a/server/server/pycache/settings.cpython-35.pyc
+++ b/server/server/pycache/settings.cpython-35.pyc
--- a/server/server/pycache/urls.cpython-35.pyc
+++ b/server/server/pycache/urls.cpython-35.pyc
--- a/server/server/pycache/wsgi.cpython-35.pyc
+++ b/server/server/pycache/wsgi.cpython-35.pyc
--- a/server/tournament/.views.py.swp
+++ b/server/tournament/.views.py.swp
--- a/server/tournament/pycache/init.cpython-35.pyc
+++ b/server/tournament/pycache/init.cpython-35.pyc
--- a/server/tournament/pycache/admin.cpython-35.pyc
+++ b/server/tournament/pycache/admin.cpython-35.pyc
--- a/server/tournament/pycache/models.cpython-35.pyc
+++ b/server/tournament/pycache/models.cpython-35.pyc
--- a/server/tournament/pycache/tournament.cpython-35.pyc
+++ b/server/tournament/pycache/tournament.cpython-35.pyc
--- a/server/tournament/pycache/urls.cpython-35.pyc
+++ b/server/tournament/pycache/urls.cpython-35.pyc
--- a/server/tournament/pycache/views.cpython-35.pyc
+++ b/server/tournament/pycache/views.cpython-35.pyc
--- a/server/tournament/migrations/pycache/0001_initial.cpython-35.pyc
+++ b/server/tournament/migrations/pycache/0001_initial.cpython-35.pyc
--- a/server/tournament/migrations/pycache/init.cpython-35.pyc
+++ b/server/tournament/migrations/pycache/init.cpython-35.pyc
--- a/server/tournament/rlcard_wrap/init.py
+++ b/server/tournament/rlcard_wrap/init.py
@ -1,10 +1,14 @@
 import rlcard
 from .leduc_holdem_random_model import LeducHoldemRandomModelSpec
+from .doudizhu_random_model import DoudizhuRandomModelSpec


 # Register Leduc Holdem Random Model
 rlcard.models.registration.model_registry.model_specs['leduc-holdem-random'] = LeducHoldemRandomModelSpec()

+# Register Doudizhu Random Model
+rlcard.models.registration.model_registry.model_specs['doudizhu-random'] = DoudizhuRandomModelSpec()
+
 # The models we are concerned
 MODEL_IDS = {}
 MODEL_IDS['leduc-holdem'] = [
@ -13,3 +17,8 @@ MODEL_IDS['leduc-holdem'] = [
        'leduc-holdem-rule-v1',
        ]

+MODEL_IDS['doudizhu'] = [
+        'doudizhu-random',
+        'doudizhu-random',
+        ]
+
--- a/server/tournament/rlcard_wrap/pycache/init.cpython-35.pyc
+++ b/server/tournament/rlcard_wrap/pycache/init.cpython-35.pyc
--- a/server/tournament/rlcard_wrap/pycache/leduc_holdem_random_model.cpython-35.pyc
+++ b/server/tournament/rlcard_wrap/pycache/leduc_holdem_random_model.cpython-35.pyc
--- a/server/tournament/rlcard_wrap/pycache/tournament.cpython-35.pyc
+++ b/server/tournament/rlcard_wrap/pycache/tournament.cpython-35.pyc
--- a/server/tournament/rlcard_wrap/doudizhu_random_model.py
+++ b/server/tournament/rlcard_wrap/doudizhu_random_model.py
@ -0,0 +1,46 @@
+# A wrap for rlcard
+# Here, we include a random model as the default baseline
+import rlcard
+from rlcard.agents import RandomAgent
+from rlcard.models.model import Model
+
+class DoudizhuRandomModelSpec(object):
+    def __init__(self):
+        self.model_id = 'doudizhu-random'
+        self._entry_point = DoudizhuRandomModel
+
+    def load(self):
+        model = self._entry_point()
+        return model
+
+class DoudizhuRandomModel(Model):
+    ''' A random model
+    '''
+
+    def __init__(self):
+        ''' Load random model
+        '''
+        env = rlcard.make('doudizhu')
+        self.agent = RandomAgent(action_num=env.action_num)
+        self.player_num = env.player_num
+
+    @property
+    def agents(self):
+        ''' Get a list of agents for each position in a the game
+
+        Returns:
+            agents (list): A list of agents
+
+        Note: Each agent should be just like RL agent with step and eval_step
+              functioning well.
+        '''
+        return [self.agent for _ in range(self.player_num)]
+
+    @property
+    def use_raw(self):
+        ''' Indicate whether use raw state and action
+
+        Returns:
+            use_raw (boolean): True if using raw state and action
+        '''
+        return False
--- a/server/tournament/tournament.py
+++ b/server/tournament/tournament.py
@ -5,10 +5,19 @@ import numpy as np

 from .rlcard_wrap import rlcard

+def cards2str(cards):
+    response = ''
+    for card in cards:
+        if card.rank == '':
+            response += card.suit[0]
+        else:
+            response += card.rank
+    return response
+
 class Tournament(object):
    
    def __init__(self, game, model_ids, evaluate_num=100):
-        """ Defalt for two player games
+        """ Default for two player games
            For Dou Dizhu, the two peasants use the same model
        """
        self.game = game
@ -28,7 +37,13 @@ class Tournament(object):
                if j == i:
                    continue
                print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
-                data, payoffs, wins = tournament(self.game, [self.models[i].agents[0], self.models[j].agents[1]], self.evaluate_num)
+                if self.game == 'doudizhu':
+                    agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
+                    names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
+                    data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
+                elif self.game == 'leduc-holdem':
+                    agents = [self.models[i].agents[0], self.models[j].agents[1]]
+                    data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
                mean_payoff = np.mean(payoffs)
                print('Average payoff:', mean_payoff)
                print()
@ -53,7 +68,50 @@ class Tournament(object):
                payoffs_data.append(payoff_data)
        return games_data, payoffs_data

-def tournament(game, agents, num):
+def doudizhu_tournament(game, agents, names, num):
+    import rlcard
+    env = rlcard.make(game, config={'allow_raw_data': True})
+    print(env.reset())
+    print(env.step(87, False))
+    exit()
+    env.set_agents(agents)
+    payoffs = []
+    json_data = []
+    wins = []
+    for _ in tqdm(range(num)):
+        data = {}
+        roles = ['landlord', 'peasant', 'peasant']
+        data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
+        state, player_id = env.reset()
+        #perfect = env.get_perfect_information()
+        #data['initHands'] = perfect['hand_cards']
+        data['initHands'] =[cards2str(env.game.players[i].current_hand) for i in range(env.player_num)]
+        data['moveHistory'] = []
+        while not env.is_over():
+            action, probs = env.agents[player_id].eval_step(state)
+            history = {}
+            history['playerIdx'] = player_id
+            if env.agents[player_id].use_raw:
+                history['move'] = action
+            else:
+                history['move'] = env._decode_action(action)
+
+            data['moveHistory'].append(history)
+            print(action, player_id, env.agents[player_id].use_raw)
+            state, player_id = env.step(action, env.agents[player_id].use_raw)
+        data = json.dumps(data)
+        #data = json.dumps(data, indent=2, sort_keys=True)
+        print(data)
+        exit()
+        json_data.append(data)
+        if env.get_payoffs()[0] > 0:
+            wins.append(True)
+        else:
+            wins.append(False)
+        payoffs.append(env.get_payoffs()[0])
+    return json_data, payoffs, wins
+
+def leduc_holdem_tournament(game, agents, num):
    env = rlcard.make(game, config={'allow_raw_data': True})
    env.set_agents(agents)
    payoffs = []
@ -65,7 +123,9 @@ def tournament(game, agents, num):
        state, player_id = env.reset()
        perfect = env.get_perfect_information()
        data['initHands'] = perfect['hand_cards']
-        data['moveHistory'] = [[]]
+        data['moveHistory'] = []
+        round_history = []
+        round_id = 0
        while not env.is_over():
            action, probs = env.agents[player_id].eval_step(state)
            history = {}
@ -85,9 +145,16 @@ def tournament(game, agents, num):
                    p = -1
                probabilities.append({'move':a, 'probability': p})
            history['probabilities'] = probabilities
-            data['moveHistory'][0].append(history)
+            round_history.append(history)
+            perfect = env.get_perfect_information()
+            if round_id < perfect['current_round']:
+                round_id = perfect['current_round']
+                data['moveHistory'].append(round_history)
+                round_history = []
            state, player_id = env.step(action, env.agents[player_id].use_raw)
        perfect = env.get_perfect_information()
+        if round_id < perfect['current_round']:
+            data['moveHistory'].append(round_history)
        data['publicCard'] = perfect['public_card']
        data = json.dumps(data)
        #data = json.dumps(data, indent=2, sort_keys=True)