修复BUG

This commit is contained in:
zhiyang7 2021-12-23 09:34:03 +08:00
parent 016d77aeb0
commit 177cb04c03
1 changed files with 17 additions and 17 deletions

34
douzero/env/env.py vendored
View File

@ -496,25 +496,25 @@ def _get_obs_landlord(infoset, use_legacy = False, compressed_form = False):
my_action_batch[j, :] = _cards2array(action, compressed_form) my_action_batch[j, :] = _cards2array(action, compressed_form)
landlord_up_num_cards_left = _get_one_hot_array( landlord_up_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_up'], 25, 15) infoset.num_cards_left_dict['landlord_up'], 25, 15 if compressed_form else 0)
landlord_up_num_cards_left_batch = np.repeat( landlord_up_num_cards_left_batch = np.repeat(
landlord_up_num_cards_left[np.newaxis, :], landlord_up_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
landlord_front_num_cards_left = _get_one_hot_array( landlord_front_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_front'], 25, 8) infoset.num_cards_left_dict['landlord_front'], 25, 8 if compressed_form else 0)
landlord_front_num_cards_left_batch = np.repeat( landlord_front_num_cards_left_batch = np.repeat(
landlord_front_num_cards_left[np.newaxis, :], landlord_front_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
landlord_down_num_cards_left = _get_one_hot_array( landlord_down_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 25, 8) infoset.num_cards_left_dict['landlord_down'], 25, 8 if compressed_form else 0)
landlord_down_num_cards_left_batch = np.repeat( landlord_down_num_cards_left_batch = np.repeat(
landlord_down_num_cards_left[np.newaxis, :], landlord_down_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
landlord_up_played_cards = _cards2array( landlord_up_played_cards = _cards2array(
infoset.played_cards['landlord_up'], 8) infoset.played_cards['landlord_up'], compressed_form)
landlord_up_played_cards_batch = np.repeat( landlord_up_played_cards_batch = np.repeat(
landlord_up_played_cards[np.newaxis, :], landlord_up_played_cards[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -601,7 +601,7 @@ def _get_obs_landlord_up(infoset, use_legacy = False, compressed_form = False):
last_landlord_action[np.newaxis, :], last_landlord_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
landlord_num_cards_left = _get_one_hot_array( landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 33, 15) infoset.num_cards_left_dict['landlord'], 33, 15 if compressed_form else 0)
landlord_num_cards_left_batch = np.repeat( landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :], landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -618,7 +618,7 @@ def _get_obs_landlord_up(infoset, use_legacy = False, compressed_form = False):
last_teammate_action[np.newaxis, :], last_teammate_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
teammate_num_cards_left = _get_one_hot_array( teammate_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 25, 8) infoset.num_cards_left_dict['landlord_down'], 25, 8 if compressed_form else 0)
teammate_num_cards_left_batch = np.repeat( teammate_num_cards_left_batch = np.repeat(
teammate_num_cards_left[np.newaxis, :], teammate_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -635,7 +635,7 @@ def _get_obs_landlord_up(infoset, use_legacy = False, compressed_form = False):
last_teammate_front_action[np.newaxis, :], last_teammate_front_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
teammate_front_num_cards_left = _get_one_hot_array( teammate_front_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_front'], 25, 8) infoset.num_cards_left_dict['landlord_front'], 25, 8 if compressed_form else 0)
teammate_front_num_cards_left_batch = np.repeat( teammate_front_num_cards_left_batch = np.repeat(
teammate_front_num_cards_left[np.newaxis, :], teammate_front_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -722,7 +722,7 @@ def _get_obs_landlord_front(infoset, use_legacy = False, compressed_form = False
last_landlord_action[np.newaxis, :], last_landlord_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
landlord_num_cards_left = _get_one_hot_array( landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 33, 15) infoset.num_cards_left_dict['landlord'], 33, 15 if compressed_form else 0)
landlord_num_cards_left_batch = np.repeat( landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :], landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -739,7 +739,7 @@ def _get_obs_landlord_front(infoset, use_legacy = False, compressed_form = False
last_teammate_action[np.newaxis, :], last_teammate_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
teammate_num_cards_left = _get_one_hot_array( teammate_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 25, 8) infoset.num_cards_left_dict['landlord_down'], 25, 8 if compressed_form else 0)
teammate_num_cards_left_batch = np.repeat( teammate_num_cards_left_batch = np.repeat(
teammate_num_cards_left[np.newaxis, :], teammate_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -756,7 +756,7 @@ def _get_obs_landlord_front(infoset, use_legacy = False, compressed_form = False
last_teammate_front_action[np.newaxis, :], last_teammate_front_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
teammate_front_num_cards_left = _get_one_hot_array( teammate_front_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_front'], 25, 8) infoset.num_cards_left_dict['landlord_front'], 25, 8 if compressed_form else 0)
teammate_front_num_cards_left_batch = np.repeat( teammate_front_num_cards_left_batch = np.repeat(
teammate_front_num_cards_left[np.newaxis, :], teammate_front_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -843,7 +843,7 @@ def _get_obs_landlord_down(infoset, use_legacy = False, compressed_form = False)
last_landlord_action[np.newaxis, :], last_landlord_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
landlord_num_cards_left = _get_one_hot_array( landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 33, 15) infoset.num_cards_left_dict['landlord'], 33, 15 if compressed_form else 0)
landlord_num_cards_left_batch = np.repeat( landlord_num_cards_left_batch = np.repeat(
landlord_num_cards_left[np.newaxis, :], landlord_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -860,7 +860,7 @@ def _get_obs_landlord_down(infoset, use_legacy = False, compressed_form = False)
last_teammate_action[np.newaxis, :], last_teammate_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
teammate_num_cards_left = _get_one_hot_array( teammate_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_up'], 25, 8) infoset.num_cards_left_dict['landlord_up'], 25, 8 if compressed_form else 0)
teammate_num_cards_left_batch = np.repeat( teammate_num_cards_left_batch = np.repeat(
teammate_num_cards_left[np.newaxis, :], teammate_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -877,7 +877,7 @@ def _get_obs_landlord_down(infoset, use_legacy = False, compressed_form = False)
last_teammate_front_action[np.newaxis, :], last_teammate_front_action[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
teammate_front_num_cards_left = _get_one_hot_array( teammate_front_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_front'], 25, 8) infoset.num_cards_left_dict['landlord_front'], 25, 8 if compressed_form else 0)
teammate_front_num_cards_left_batch = np.repeat( teammate_front_num_cards_left_batch = np.repeat(
teammate_front_num_cards_left[np.newaxis, :], teammate_front_num_cards_left[np.newaxis, :],
num_legal_actions, axis=0) num_legal_actions, axis=0)
@ -949,16 +949,16 @@ def _get_obs_general(infoset, position, compressed_form = False):
my_action_batch[j, :] = _cards2array(action, compressed_form) my_action_batch[j, :] = _cards2array(action, compressed_form)
landlord_num_cards_left = _get_one_hot_array( landlord_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord'], 33, 15) infoset.num_cards_left_dict['landlord'], 33, 15 if compressed_form else 0)
landlord_up_num_cards_left = _get_one_hot_array( landlord_up_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_up'], 25, 8) infoset.num_cards_left_dict['landlord_up'], 25, 8 if compressed_form else 0)
landlord_front_num_cards_left = _get_one_hot_array( landlord_front_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_front'], 25, 8) infoset.num_cards_left_dict['landlord_front'], 25, 8 if compressed_form else 0)
landlord_down_num_cards_left = _get_one_hot_array( landlord_down_num_cards_left = _get_one_hot_array(
infoset.num_cards_left_dict['landlord_down'], 25, 8) infoset.num_cards_left_dict['landlord_down'], 25, 8 if compressed_form else 0)
landlord_played_cards = _cards2array( landlord_played_cards = _cards2array(
infoset.played_cards['landlord'], compressed_form) infoset.played_cards['landlord'], compressed_form)