移除根据胜率叫地主逻辑（4人场景下，胜率计算未适配）

2021-12-09 20:02:40 +08:00 · 2021-12-09 20:02:40 +08:00 · c239085c24
parent a755ffe719
commit c239085c24
3 changed files with 225 additions and 9 deletions
--- a/douzero/env/env.py
+++ b/douzero/env/env.py
@ -129,11 +129,11 @@ class Env:
                    with torch.no_grad():
                        action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
                                               torch.tensor(bidding_obs["x_batch"], device=device), flags=flags)
-                    if bid_limit <= 0:
-                        wr = BidModel.predict_env(card_play_data[bidding_player])
-                        if wr >= 0.7:
-                            action = {"action": 1}  # debug
-                            bid_limit += 1
+                    # if bid_limit <= 0:
+                    #     wr = BidModel.predict_env(card_play_data[bidding_player])
+                    #     if wr >= 0.7:
+                    #         action = {"action": 1}  # debug
+                    #         bid_limit += 1

                    bid_obs_buffer.append({
                        "x_batch": bidding_obs["x_batch"][action["action"]],
--- a/evaluate.py
+++ b/evaluate.py
@ -5,6 +5,12 @@ from douzero.evaluation.simulation import evaluate


 def make_evaluate(args, t, frame, adp_frame, folder_a = 'baselines', folder_b = 'baselines'):
+    if t == 0:
+        args.landlord = 'random'
+        args.landlord_up = 'random'
+        args.landlord_front = 'random'
+        args.landlord_down = 'random'
+        print('random vs random')
    if t == 1:
        args.landlord = '%s/resnet_landlord_%i.ckpt' % (folder_a, frame)
        args.landlord_up = 'random'
@ -96,8 +102,13 @@ if __name__ == '__main__':
    # ]
    
    eval_list = [
-        [4968800, 8697600, 'baselines', 'baselines2'],
-        [4968800, 4968800, 'baselines', 'baselines'],
+        # [4968800, 8697600, 'baselines', 'baselines2'],
+        # [4968800, 4968800, 'baselines', 'baselines'],
+        # [14102400, 4968800, 'baselines', 'baselines'],
+        # [14102400, 13252000, 'baselines', 'baselines2'],
+        # [14102400, 15096800, 'baselines', 'baselines2'],
+        [14102400, 14102400, 'baselines', 'baselines'],
+        # [14102400, None, 'baselines', 'baselines'],
    ]

    for vs in reversed(eval_list):
@ -106,8 +117,11 @@ if __name__ == '__main__':
        folder_a = vs[2]
        folder_b = vs[3]
        if adp_frame is None:
-            make_evaluate(args, 1, frame, None)
-            make_evaluate(args, 2, frame, None)
+            if frame is None:
+                make_evaluate(args, 0, None, None)
+            else:
+                make_evaluate(args, 1, frame, None)
+                make_evaluate(args, 2, frame, None)
        else:
            make_evaluate(args, 3, frame, adp_frame, folder_a , folder_b)
            if frame != adp_frame:
--- a/generate_eval_data_with_bid.py
+++ b/generate_eval_data_with_bid.py
@ -0,0 +1,202 @@
+import argparse
+import pickle
+import numpy as np
+import torch
+import random
+import douzero
+
+from douzero.dmc.models import Model
+
+deck = []
+for i in range(3, 15):
+    deck.extend([i for _ in range(8)])
+deck.extend([17 for _ in range(8)])
+deck.extend([20, 20, 30, 30])
+
+def get_parser():
+    parser = argparse.ArgumentParser(description='DouZero: random data generator')
+    parser.add_argument('--output', default='eval_data', type=str)
+    parser.add_argument('--path', default='baselines/resnet_bidding_15419200.ckpt', type=str)
+    parser.add_argument('--num_games', default=10000, type=int)
+    parser.add_argument('--exp_epsilon', default=0.01, type=float)
+    return parser
+
+
+def generate_with_bid(num_games, bid_model_path):
+    data_list = []
+    for i in range(num_games):
+        bid_done = False
+        card_play_data = []
+        landlord_cards = []
+        last_bid = 0
+        bid_count = 0
+        player_ids = {}
+        bid_info = None
+        bid_obs_buffer = []
+        multiply_obs_buffer = []
+        bid_limit = 4
+        force_bid = False
+        device = torch.device("cpu")
+        model = Model(device='cpu')
+        bid_model = model.get_model("bidding")
+        weights = torch.load(bid_model_path, map_location=device)
+        bid_model.load_state_dict(weights)
+        bid_model.eval()
+
+        while not bid_done:
+            bid_limit -= 1
+            bid_obs_buffer.clear()
+            multiply_obs_buffer.clear()
+            _deck = deck.copy()
+            np.random.shuffle(_deck)
+            card_play_data = [
+                _deck[:25],
+                _deck[25:50],
+                _deck[50:75],
+                _deck[75:100],
+            ]
+            for i in range(4):
+                card_play_data[i].sort()
+            landlord_cards = _deck[100:108]
+            landlord_cards.sort()
+            bid_info = np.array([[-1, -1, -1, -1],
+                                 [-1, -1, -1, -1],
+                                 [-1, -1, -1, -1],
+                                 [-1, -1, -1, -1],
+                                 [-1, -1, -1, -1]])
+            bidding_player = random.randint(0, 3)
+            # bidding_player = 0 # debug
+            first_bid = -1
+            last_bid = -1
+            bid_count = 0
+            for r in range(4):
+                bidding_obs = douzero.env.env._get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
+                with torch.no_grad():
+                    action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
+                                           torch.tensor(bidding_obs["x_batch"], device=device), flags=flags)
+                # if bid_limit <= 0:
+                #     if random.random() < 0.5:
+                #         action = {"action": 1}  # debug
+                #         bid_limit += 1
+                # if bid_count == 0:
+                #     bid_score, farmer_score = BidModel.predict_env(card_play_data[bidding_player])
+                #     if bid_score * 3 > farmer_score or bid_score > 0:
+                #         action = {"action": 1}  # debug
+                #         bid_limit += 1
+                #     else:
+                #         action = {"action": 0}
+                # else:
+                #     bid_score, farmer_score = BidModel.predict_env(card_play_data[bidding_player])
+                #     if bid_score * 2.8 > farmer_score or bid_score > 0.1:
+                #         action = {"action": 1}  # debug
+                #         bid_limit += 1
+                #     else:
+                #         action = {"action": 0}
+
+                # bid_obs_buffer.append({
+                #     "x_batch": bidding_obs["x_batch"][0],
+                #     "z_batch": bidding_obs["z_batch"][0],
+                #     "action": action["action"],
+                #     "pid": bidding_player
+                # })
+                if action["action"] == 1:
+                    last_bid = bidding_player
+                    bid_count += 1
+                    if first_bid == -1:
+                        first_bid = bidding_player
+                    for p in range(4):
+                        if p == bidding_player:
+                            bid_info[r][p] = 1
+                        else:
+                            bid_info[r][p] = 0
+                else:
+                    bid_info[r] = [0, 0, 0, 0]
+                bidding_player = (bidding_player + 1) % 4
+            one_count = np.count_nonzero(bid_info == 1)
+            if one_count == 0:
+                continue
+            elif one_count > 1:
+                r = 4
+                bidding_player = first_bid
+                bidding_obs = douzero.env.env._get_obs_for_bid(bidding_player, bid_info, card_play_data[bidding_player])
+                with torch.no_grad():
+                    action = model.forward("bidding", torch.tensor(bidding_obs["z_batch"], device=device),
+                                           torch.tensor(bidding_obs["x_batch"], device=device), flags=flags)
+                # bid_score, farmer_score = BidModel.predict_env(card_play_data[bidding_player])
+                # if bid_score * 2.9 > farmer_score or bid_score > 0.1:
+                #     action = {"action": 1}  # debug
+                #     bid_limit += 1
+                # else:
+                #     action = {"action": 0}
+                bid_obs_buffer.append({
+                    "x_batch": bidding_obs["x_batch"][action["action"]],
+                    "z_batch": bidding_obs["z_batch"][action["action"]],
+                    "pid": bidding_player
+                })
+                if action["action"] == 1:
+                    last_bid = bidding_player
+                    bid_count += 1
+                    for p in range(4):
+                        if p == bidding_player:
+                            bid_info[r][p] = 1
+                        else:
+                            bid_info[r][p] = 0
+            break
+        card_play_data[last_bid].extend(landlord_cards)
+        card_play_data = {'landlord': card_play_data[last_bid],
+                          'landlord_up': card_play_data[(last_bid - 1) % 4],
+                          'landlord_front': card_play_data[(last_bid + 2) % 4],
+                          'landlord_down': card_play_data[(last_bid + 1) % 4],
+                        #   'three_landlord_cards': landlord_cards,
+                          }
+        card_play_data["landlord"].sort()
+        player_ids = {
+            'landlord': last_bid,
+            'landlord_up': (last_bid - 1) % 4,
+            'landlord_down': (last_bid + 1) % 4,
+            'landlord_front': (last_bid + 2) % 4,
+        }
+        player_positions = {
+            last_bid: 'landlord',
+            (last_bid - 1) % 4: 'landlord_up',
+            (last_bid + 1) % 4: 'landlord_down',
+            (last_bid + 2) % 4: 'landlord_front',
+        }
+        for bid_obs in bid_obs_buffer:
+            bid_obs.update({"position": player_positions[bid_obs["pid"]]})
+        bid_info_list = {}
+        for pos in ["landlord", "landlord_up", "landlord_front", "landlord_down"]:
+            pid = player_ids[pos]
+            bid_info_list[pos] = bid_info[:, [(pid - 1) % 4, pid, (pid + 1) % 4, (pid + 2) % 4]]
+        card_play_data = {
+            "play": card_play_data,
+            "bid": bid_info_list
+        }
+        data_list.append(card_play_data)
+    return data_list
+
+
+
+if __name__ == '__main__':
+    flags = get_parser().parse_args()
+    output_pickle = flags.output + '.pkl'
+
+    print("output_pickle:", output_pickle)
+    print("generating data...")
+
+    data = []
+    data.extend(generate_with_bid(flags.num_games, flags.path))
+    # round_count = flags.num_games // 3
+    # for _ in range(round_count):
+    #     data.extend(generate_3())
+    # if round_count * 3 < flags.num_games:
+    #     for i in range(flags.num_games - round_count*3):
+    #         data.extend(generate_1())
+    print(data)
+    print("saving pickle file...")
+    with open(output_pickle,'wb') as g:
+        pickle.dump(data,g)
+
+
+
+