Update server

2021-05-27 10:18:30 -05:00 · 2021-05-27 10:18:30 -05:00 · 21e3ead424
parent 3d9f7cebd9
commit 21e3ead424
6 changed files with 52 additions and 55 deletions
--- a/docs/api.md
+++ b/docs/api.md
@ -17,8 +17,8 @@ The definitions of the fields are as follows:
 *   `index`: Integer. The index of the game of the same environent and same agent. It is in the range \[0, eval_num-1\]

 | type | Resource                            |  Parameters                                                                               |  Description                                                                                                       |
-|------|-------------------------------------|-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
-| GET  | tournament/launch                   | `eval_num`, `name`                                                                        | Launch tournment on the game. Each pair of models will play `eval_num` times. Results will be saved in database.   |
+|------|-------------------------------------|-------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
+| GET  | tournament/launch                   | `num_eval_games`, `name`                                                                  | Launch tournment on the game. Each pair of models will play `num_eval_games` times. Results will be saved in database. |
 | GET  | tournament/query\_game              | `name`, `index`, `agent0`, `agent1`, `win`, `payoff`, `elements_every_page`, `page_index` | Query the games with the given parameters                                                                              |
 | GET  | tournament/query\_payoff            | `name`, `agent0`, `agent1`, `payoff`                                                      | Query the payoffs with the given parameters                                                                            |
 | GET  | tournament/query\_agent\_payoff     | `name`, `elements_every_page`, `page_index`,                                              | Query the payoffs of all the agents                                                                                    |
@ -32,16 +32,16 @@ The definitions of the fields are as follows:
 ### Example API
 | API                                                                                                                                      | Description                                                                              |
 |------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
-| http://127.0.0.1:8000/tournament/launch?eval\_num=200&name=leduc-holdem                                                                  | Evaluate on Leduc Holdem with 200 games for each pair of models                          |
+| http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem                                                                  | Evaluate on Leduc Holdem with 200 games for each pair of models                          |
 | http://127.0.0.1:8000/tournament/replay?name=leduc-holdem&agent0=leduc-holdem-rule-v1&agent1=leduc-holdem-cfr&index=3                    | Obtain the replay data between rule model and CFR model. Obtain the data of the 3rd game |
-| http://127.0.0.1:8000/tournament/query\_game&elements\_every\_page=10&page\_index=0                                                      | Get all the game data                                                                    |
-| http://127.0.0.1:8000/tournament/query\_game?name=leduc-holdem&elements\_every\_page=10&page\_index=0                                    | Get all the game data of Leduc Holdem                                                    |
-| http://127.0.0.1:8000/tournament/query\_payoff                                                                                           | Get all the payoffs                                                                      |
-| http://127.0.0.1:8000/tournament/query\_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1                                       | Get all the payoffs between rule and CFR models                                          |
-| http://127.0.0.1:8000/tournament/query\_agent\_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1                            | Get the payoffs of all the agents of leduc-holdem                                        |
-| http://127.0.0.1:8000/tournament/list\_uploaded\_agents?game=leduc-holdem                                                                | List the uploaded agents of leduc-holdem                                                 |
-| http://127.0.0.1:8000/tournament/list\_baseline\_agents?game=leduc-holdem                                                                | List the baseline agents of leduc-holdem                                                 |
-| http://127.0.0.1:8000/tournament/download\_examples?name=example\_luduc\_nfsp\_model                                                     | Download the NFSP example model for Leduc Hold'em                                        |
+| http://127.0.0.1:8000/tournament/query_game&elements_every_page=10&page_index=0                                                      | Get all the game data                                                                    |
+| http://127.0.0.1:8000/tournament/query_game?name=leduc-holdem&elements_every_page=10&page_index=0                                    | Get all the game data of Leduc Holdem                                                    |
+| http://127.0.0.1:8000/tournament/query_payoff                                                                                           | Get all the payoffs                                                                      |
+| http://127.0.0.1:8000/tournament/query_payoff?agent0=leduc-holdem-cfr&agent1=leduc-holdem-rule-v1                                       | Get all the payoffs between rule and CFR models                                          |
+| http://127.0.0.1:8000/tournament/query_agent_payoff?name=leduc-holdem&elements\_every\_page=1&page\_index=1                            | Get the payoffs of all the agents of leduc-holdem                                        |
+| http://127.0.0.1:8000/tournament/list_uploaded_agents?game=leduc-holdem                                                                | List the uploaded agents of leduc-holdem                                                 |
+| http://127.0.0.1:8000/tournament/list_baseline_agents?game=leduc-holdem                                                                | List the baseline agents of leduc-holdem                                                 |
+| http://127.0.0.1:8000/tournament/download_examples?name=example_luduc_nfsp_model                                                     | Download the NFSP example model for Leduc Hold'em                                        |

 ## Registered Models
 Some models have been pre-registered as baselines
@ -64,7 +64,7 @@ curl -F 'model=@example_luduc_nfsp_model.zip' -F "name=leduc-nfsp" -F "game=ledu
 ```
 Launch the tounament with:
 ```
-curl 'http://127.0.0.1:8000/tournament/launch?eval_num=200&name=leduc-holdem'
+curl 'http://127.0.0.1:8000/tournament/launch?num_eval_games=200&name=leduc-holdem'
 ```
 We list the uploaded agent with
 ```
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,10 @@
 rlcard
 Django
 tqdm
-tensorflow==1.14
 django-cors-headers
+torch==1.6.0
+flask==1.1
+flask-cors
+onnx
+onnxruntime
+
--- a/server/tournament/rlcard_wrap/doudizhu_random_model.py
+++ b/server/tournament/rlcard_wrap/doudizhu_random_model.py
@ -21,8 +21,8 @@ class DoudizhuRandomModel(Model):
        ''' Load random model
        '''
        env = rlcard.make('doudizhu')
-        self.agent = RandomAgent(action_num=env.action_num)
-        self.player_num = env.player_num
+        self.agent = RandomAgent(num_actions=env.num_actions)
+        self.num_players = env.num_players

    @property
    def agents(self):
@ -34,7 +34,7 @@ class DoudizhuRandomModel(Model):
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
-        return [self.agent for _ in range(self.player_num)]
+        return [self.agent for _ in range(self.num_players)]

    @property
    def use_raw(self):
--- a/server/tournament/rlcard_wrap/leduc_holdem_random_model.py
+++ b/server/tournament/rlcard_wrap/leduc_holdem_random_model.py
@ -21,8 +21,8 @@ class LeducHoldemRandomModel(Model):
        ''' Load random model
        '''
        env = rlcard.make('leduc-holdem')
-        self.agent = RandomAgent(action_num=env.action_num)
-        self.player_num = env.player_num
+        self.agent = RandomAgent(num_actions=env.num_actions)
+        self.num_players = env.num_players

    @property
    def agents(self):
@ -34,7 +34,7 @@ class LeducHoldemRandomModel(Model):
        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
-        return [self.agent for _ in range(self.player_num)]
+        return [self.agent for _ in range(self.num_players)]

    @property
    def use_raw(self):
--- a/server/tournament/tournament.py
+++ b/server/tournament/tournament.py
@ -7,34 +7,34 @@ from .rlcard_wrap import rlcard

 class Tournament(object):
    
-    def __init__(self, game, model_ids, evaluate_num=100):
+    def __init__(self, game, model_ids, num_eval_games=100):
        """ Default for two player games
            For Dou Dizhu, the two peasants use the same model
        """
        self.game = game
        self.model_ids = model_ids
-        self.evaluate_num = evaluate_num
+        self.num_eval_games = num_eval_games
        # Load the models
        self.models = [rlcard.models.load(model_id) for model_id in model_ids]

    def launch(self):
        """ Currently for two-player game only
        """
-        model_num = len(self.model_ids)
+        num_models = len(self.model_ids)
        games_data = []
        payoffs_data = []
-        for i in range(model_num):
-            for j in range(model_num):
+        for i in range(num_models):
+            for j in range(num_models):
                if j == i:
                    continue
                print(self.game, '-', self.model_ids[i], 'VS', self.model_ids[j])
                if self.game == 'doudizhu':
                    agents = [self.models[i].agents[0], self.models[j].agents[1], self.models[j].agents[2]]
                    names = [self.model_ids[i], self.model_ids[j], self.model_ids[j]]
-                    data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.evaluate_num)
+                    data, payoffs, wins = doudizhu_tournament(self.game, agents, names, self.num_eval_games)
                elif self.game == 'leduc-holdem':
                    agents = [self.models[i].agents[0], self.models[j].agents[1]]
-                    data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.evaluate_num)
+                    data, payoffs, wins = leduc_holdem_tournament(self.game, agents, self.num_eval_games)
                mean_payoff = np.mean(payoffs)
                print('Average payoff:', mean_payoff)
                print()
@ -59,16 +59,16 @@ class Tournament(object):
                payoffs_data.append(payoff_data)
        return games_data, payoffs_data

-def doudizhu_tournament(game, agents, names, num):
+def doudizhu_tournament(game, agents, names, num_eval_games):
    env = rlcard.make(game, config={'allow_raw_data': True})
    env.set_agents(agents)
    payoffs = []
    json_data = []
    wins = []
-    for _ in tqdm(range(num)):
+    for _ in tqdm(range(num_eval_games)):
        data = {}
        roles = ['landlord', 'peasant', 'peasant']
-        data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.player_num)]
+        data['playerInfo'] = [{'id': i, 'index': i, 'role': roles[i], 'agentInfo': {'name': names[i]}} for i in range(env.num_players)]
        state, player_id = env.reset()
        perfect = env.get_perfect_information()
        data['initHands'] = perfect['hand_cards_with_suit']
@ -77,7 +77,7 @@ def doudizhu_tournament(game, agents, names, num):
            current_hand_cards[i] = current_hand_cards[i].split()
        data['moveHistory'] = []
        while not env.is_over():
-            action, probs = env.agents[player_id].eval_step(state)
+            action, info = env.agents[player_id].eval_step(state)
            history = {}
            history['playerIdx'] = player_id
            if env.agents[player_id].use_raw:
@ -85,6 +85,7 @@ def doudizhu_tournament(game, agents, names, num):
            else:
                _action = env._decode_action(action)
            history['move'] = _calculate_doudizhu_move(_action, player_id, current_hand_cards)
+            history['info'] = info

            data['moveHistory'].append(history)
            state, player_id = env.step(action, env.agents[player_id].use_raw)
@ -115,15 +116,15 @@ def _calculate_doudizhu_move(action, player_id, current_hand_cards):
                    break
    return ' '.join(cards_with_suit)

-def leduc_holdem_tournament(game, agents, num):
+def leduc_holdem_tournament(game, agents, num_eval_games):
    env = rlcard.make(game, config={'allow_raw_data': True})
    env.set_agents(agents)
    payoffs = []
    json_data = []
    wins = []
-    for _ in tqdm(range(num)):
+    for _ in tqdm(range(num_eval_games)):
        data = {}
-        data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.player_num)]
+        data['playerInfo'] = [{'id': i, 'index': i} for i in range(env.num_players)]
        state, player_id = env.reset()
        perfect = env.get_perfect_information()
        data['initHands'] = perfect['hand_cards']
@ -131,7 +132,7 @@ def leduc_holdem_tournament(game, agents, num):
        round_history = []
        round_id = 0
        while not env.is_over():
-            action, probs = env.agents[player_id].eval_step(state)
+            action, info = env.agents[player_id].eval_step(state)
            history = {}
            history['playerIdx'] = player_id
            if env.agents[player_id].use_raw:
@ -139,16 +140,7 @@ def leduc_holdem_tournament(game, agents, num):
            else:
                history['move'] = env._decode_action(action)

-            probabilities = []
-            for i, a in enumerate(env.actions):
-                if len(probs) == 0:
-                    p = -2
-                elif a in state['raw_legal_actions']:
-                    p = probs[i]
-                else:
-                    p = -1
-                probabilities.append({'move':a, 'probability': p})
-            history['probabilities'] = probabilities
+            history['info'] = info
            round_history.append(history)
            state, player_id = env.step(action, env.agents[player_id].use_raw)
            perfect = env.get_perfect_information()
--- a/server/tournament/views.py
+++ b/server/tournament/views.py
@ -102,13 +102,13 @@ def query_agent_payoff(request):
 def launch(request):
    if request.method == 'GET':
        try:
-            eval_num = int(request.GET['eval_num'])
+            num_eval_games = int(request.GET['num_eval_games'])
            game = request.GET['name']
        except:
            return HttpResponse(json.dumps({'value': -1, 'info': 'parameters error'}))

        MODEL_IDS_ALL = _get_model_ids_all()
-        games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], eval_num).launch()
+        games_data, payoffs_data = Tournament(game, MODEL_IDS_ALL[game], num_eval_games).launch()
        Game.objects.filter(name=game).delete()
        Payoff.objects.filter(name=game).delete()
        for game_data in games_data: