diff --git a/cuda_train.cmd b/cuda_train.cmd index 5b395c3..2270905 100644 --- a/cuda_train.cmd +++ b/cuda_train.cmd @@ -1 +1 @@ -python train.py --load_model --batch_size 8 --learning_rate 0.0003 \ No newline at end of file +python train.py --load_model --batch_size 8 --learning_rate 0.0003 --enable_onnx \ No newline at end of file diff --git a/douzero/dmc/dmc.py b/douzero/dmc/dmc.py index 482e84d..00aa6f9 100644 --- a/douzero/dmc/dmc.py +++ b/douzero/dmc/dmc.py @@ -68,8 +68,9 @@ def learn(position, actor_models, model, batch, optimizer, flags, lock): nn.utils.clip_grad_norm_(model.parameters(), flags.max_grad_norm) optimizer.step() - for actor_model in actor_models.values(): - actor_model.get_model(position).load_state_dict(model.state_dict()) + if not flags.enable_onnx: + for actor_model in actor_models.values(): + actor_model.get_model(position).load_state_dict(model.state_dict()) return stats def train(flags): @@ -103,9 +104,9 @@ def train(flags): models = {} for device in device_iterator: if flags.old_model: - model = OldModel(device="cpu") + model = OldModel(device="cpu", flags = flags) else: - model = Model(device="cpu") + model = Model(device="cpu", flags = flags) model.share_memory() model.eval() models[device] = model @@ -149,8 +150,9 @@ def train(flags): for k in ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding']: # ['landlord', 'landlord_up', 'landlord_down'] learner_model.get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k]) optimizers[k].load_state_dict(checkpoint_states["optimizer_state_dict"][k]) - for device in device_iterator: - models[device].get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k]) + if not flags.enable_onnx: + for device in device_iterator: + models[device].get_model(k).load_state_dict(checkpoint_states["model_state_dict"][k]) stats = checkpoint_states["stats"] if not 'mean_episode_return_bidding' in stats: diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py index c876152..72b8f00 100644 --- a/douzero/dmc/models.py +++ b/douzero/dmc/models.py @@ -2,6 +2,7 @@ This file includes the torch models. We wrap the three models into one class for convenience. """ +import os import numpy as np @@ -331,8 +332,8 @@ class GeneralModel(nn.Module): def get_onnx_params(self): return { 'args': ( - torch.tensor(np.zeros([1, 40, 108]), dtype=torch.float32), - torch.tensor(np.zeros((1, 80)), dtype=torch.float32) + torch.tensor(np.zeros([1, 40, 108]), dtype=torch.float32, device='cuda:0'), + torch.tensor(np.zeros((1, 80)), dtype=torch.float32, device='cuda:0') ), 'input_names': ['z_batch','x_batch'], 'output_names': ['values'], @@ -478,7 +479,7 @@ class OldModel: The wrapper for the three models. We also wrap several interfaces such as share_memory, eval, etc. """ - def __init__(self, device=0): + def __init__(self, device=0, flags=None): self.models = {} if not device == "cpu": device = 'cuda:' + str(device) @@ -519,17 +520,19 @@ class OldModel: return dict(action=action) def share_memory(self): - self.models['landlord'].share_memory() - self.models['landlord_up'].share_memory() - self.models['landlord_front'].share_memory() - self.models['landlord_down'].share_memory() + if self.models['landlord'] is not None: + self.models['landlord'].share_memory() + self.models['landlord_up'].share_memory() + self.models['landlord_front'].share_memory() + self.models['landlord_down'].share_memory() self.models['bidding'].share_memory() def eval(self): - self.models['landlord'].eval() - self.models['landlord_up'].eval() - self.models['landlord_front'].eval() - self.models['landlord_down'].eval() + if self.models['landlord'] is not None: + self.models['landlord'].eval() + self.models['landlord_up'].eval() + self.models['landlord_front'].eval() + self.models['landlord_down'].eval() self.models['bidding'].eval() def parameters(self, position): @@ -547,32 +550,43 @@ class Model: The wrapper for the three models. We also wrap several interfaces such as share_memory, eval, etc. """ - def __init__(self, device=0): + def __init__(self, device=0, flags=None): self.models = {} + self.onnx_models = {} + self.flags = flags if not device == "cpu": device = 'cuda:' + str(device) # model = GeneralModel().to(torch.device(device)) - self.models['landlord'] = GeneralModel().to(torch.device(device)) - self.models['landlord_up'] = GeneralModel().to(torch.device(device)) - self.models['landlord_front'] = GeneralModel().to(torch.device(device)) - self.models['landlord_down'] = GeneralModel().to(torch.device(device)) - self.models['bidding'] = BidModel().to(torch.device(device)) - self.onnx_models = { - 'landlord': None, - 'landlord_up': None, - 'landlord_front': None, - 'landlord_down': None, - 'bidding': None - } - self.models['bidding'] = BidModel().to(torch.device(device)) + positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down'] + if flags is not None and flags.enable_onnx: + self.models['bidding'] = BidModel().to(torch.device(device)) + for position in positions: + self.models[position] = None + else: + for position in positions: + self.models[position] = GeneralModel().to(torch.device(device)) + self.models['bidding'] = BidModel().to(torch.device(device)) + self.onnx_models = { + 'landlord': None, + 'landlord_up': None, + 'landlord_front': None, + 'landlord_down': None, + 'bidding': None + } - def set_onnx_model(self, position, model_path): - self.onnx_models[position] = onnxruntime.InferenceSession(get_example(model_path)) + def set_onnx_model(self): + positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down'] + for position in positions: + model_path = os.path.abspath('%s/%s/model_%s.onnx' % (self.flags.savedir, self.flags.xpid, position)) + self.onnx_models[position] = onnxruntime.InferenceSession(get_example(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + self.onnx_models['bidding'] = None def get_onnx_params(self, position): self.models[position].get_onnx_params() def forward(self, position, z, x, return_value=False, flags=None, debug=False): + if self.flags.enable_onnx and len(self.onnx_models) == 0: + self.set_onnx_model() model = self.onnx_models[position] if model is None: model = self.models[position] @@ -590,17 +604,19 @@ class Model: return dict(action=action) def share_memory(self): - self.models['landlord'].share_memory() - self.models['landlord_up'].share_memory() - self.models['landlord_front'].share_memory() - self.models['landlord_down'].share_memory() + if self.models['landlord'] is not None: + self.models['landlord'].share_memory() + self.models['landlord_up'].share_memory() + self.models['landlord_front'].share_memory() + self.models['landlord_down'].share_memory() self.models['bidding'].share_memory() def eval(self): - self.models['landlord'].eval() - self.models['landlord_up'].eval() - self.models['landlord_front'].eval() - self.models['landlord_down'].eval() + if self.models['landlord'] is not None: + self.models['landlord'].eval() + self.models['landlord_up'].eval() + self.models['landlord_front'].eval() + self.models['landlord_down'].eval() self.models['bidding'].eval() def parameters(self, position): diff --git a/douzero/dmc/utils.py b/douzero/dmc/utils.py index d49aded..3df3f34 100644 --- a/douzero/dmc/utils.py +++ b/douzero/dmc/utils.py @@ -83,8 +83,11 @@ def create_optimizers(flags, learner_model): def act(i, device, batch_queues, model, flags, onnx_frame): positions = ['landlord', 'landlord_up', 'landlord_front', 'landlord_down', 'bidding'] - for pos in positions: - model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device)))) + if not flags.enable_onnx: + for pos in positions: + model.models[pos].to(torch.device(device if device == "cpu" else ("cuda:"+str(device)))) + else: + model.models['bidding'].to(torch.device(device if device == "cpu" else ("cuda:" + str(device)))) try: T = flags.unroll_length log.info('Device %s Actor %i started.', str(device), i) @@ -117,9 +120,7 @@ def act(i, device, batch_queues, model, flags, onnx_frame): last_onnx_frame = onnx_frame.value for p in positions: if p != 'bidding': - model_path = '%s/%s/model_%s.onnx' % (flags.savedir, flags.xpid, p) - if os.path.exists(model_path): - model.set_onnx_model(p, os.path.abspath(model_path)) + model.set_onnx_model() for bid_obs in bid_obs_buffer: obs_z_buf["bidding"].append(bid_obs['z_batch']) diff --git a/requirements.txt b/requirements.txt index 41b5f02..2dfca1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ gitdb2 rlcard psutil onnx -onnxruntime +onnxruntime-gpu