Spaces:
Running
Running
# Copyright (c) Microsoft Corporation. | |
# Licensed under the MIT license. | |
import functools | |
import logging | |
from .. import trial | |
_logger = logging.getLogger(__name__) | |
_MUTABLE_LAYER_SPACE_PREFIX = "_mutable_layer" | |
_namespace = {} | |
_tf_variables = {} | |
_arch_logits_list = [] | |
_optimizer = None | |
_train_op = None | |
def classic_mode( | |
mutable_id, | |
mutable_layer_id, | |
funcs, | |
funcs_args, | |
fixed_inputs, | |
optional_inputs, | |
optional_input_size): | |
'''Execute the chosen function and inputs directly. | |
In this mode, the trial code is only running the chosen subgraph (i.e., the chosen ops and inputs), | |
without touching the full model graph.''' | |
if trial.get_current_parameter() is None: | |
trial.get_next_parameter() | |
chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, | |
list(optional_inputs.keys())) | |
real_chosen_inputs = [optional_inputs[input_name] for input_name in chosen_inputs] | |
layer_out = funcs[chosen_layer]([fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer]) | |
return layer_out | |
def enas_mode( | |
mutable_id, | |
mutable_layer_id, | |
funcs, | |
funcs_args, | |
fixed_inputs, | |
optional_inputs, | |
optional_input_size, | |
tf): | |
'''For enas mode, we build the full model graph in trial but only run a subgraph。 | |
This is implemented by masking inputs and branching ops. | |
Specifically, based on the received subgraph (through nni.get_next_parameter), | |
it can be known which inputs should be masked and which op should be executed.''' | |
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id) | |
# store namespace | |
_namespace[mutable_id] = True | |
_namespace[name_prefix] = dict() | |
_namespace[name_prefix]['funcs'] = list(funcs) | |
_namespace[name_prefix]['optional_inputs'] = list(optional_inputs) | |
# create tensorflow variables as 1/0 signals used to form subgraph | |
name_for_optional_inputs = name_prefix + '_optional_inputs' | |
name_for_funcs = name_prefix + '_funcs' | |
_tf_variables[name_prefix] = dict() | |
_tf_variables[name_prefix]['optional_inputs'] = tf.get_variable( | |
name_for_optional_inputs, | |
[len(optional_inputs)], | |
dtype=tf.bool, | |
trainable=False | |
) | |
_tf_variables[name_prefix]['funcs'] = tf.get_variable( | |
name_for_funcs, [], dtype=tf.int64, trainable=False) | |
# get real values using their variable names | |
real_optional_inputs_value = [optional_inputs[name] | |
for name in _namespace[name_prefix]['optional_inputs']] | |
real_func_value = [funcs[name] | |
for name in _namespace[name_prefix]['funcs']] | |
real_funcs_args = [funcs_args[name] | |
for name in _namespace[name_prefix]['funcs']] | |
# build tensorflow graph of geting chosen inputs by masking | |
real_chosen_inputs = tf.boolean_mask( | |
real_optional_inputs_value, _tf_variables[name_prefix]['optional_inputs']) | |
# build tensorflow graph of different branches by using tf.case | |
branches = dict() | |
func_output = None | |
for func_id in range(len(funcs)): | |
func_output = real_func_value[func_id]([fixed_inputs, real_chosen_inputs], **real_funcs_args[func_id]) | |
branches[tf.equal(_tf_variables[name_prefix]['funcs'], func_id)] = lambda: func_output | |
layer_out = tf.case(branches, exclusive=True, default=lambda: func_output) | |
return layer_out | |
def oneshot_mode( | |
mutable_id, | |
mutable_layer_id, | |
funcs, | |
funcs_args, | |
fixed_inputs, | |
optional_inputs, | |
optional_input_size, | |
tf): | |
'''Similar to enas mode, oneshot mode also builds the full model graph. | |
The difference is that oneshot mode does not receive subgraph. | |
Instead, it uses dropout to randomly dropout inputs and ops.''' | |
# NNI requires to get_next_parameter before report a result. But the parameter will not be used in this mode | |
if trial.get_current_parameter() is None: | |
trial.get_next_parameter() | |
optional_inputs = list(optional_inputs.values()) | |
inputs_num = len(optional_inputs) | |
# Calculate dropout rate according to the formular r^(1/k), where r is a hyper-parameter and k is the number of inputs | |
if inputs_num > 0: | |
rate = 0.01 ** (1 / inputs_num) | |
noise_shape = [inputs_num] + [1] * len(optional_inputs[0].get_shape()) | |
optional_inputs = tf.nn.dropout( | |
optional_inputs, rate=rate, noise_shape=noise_shape) | |
optional_inputs = [optional_inputs[idx] for idx in range(inputs_num)] | |
layer_outs = [func([fixed_inputs, optional_inputs], **funcs_args[func_name]) | |
for func_name, func in funcs.items()] | |
output_num = len(layer_outs) | |
rate = 0.01 ** (1 / output_num) | |
noise_shape = [output_num] + [1] * len(layer_outs[0].get_shape()) | |
layer_outs = tf.nn.dropout(layer_outs, rate=rate, noise_shape=noise_shape) | |
layer_out = tf.reduce_sum(layer_outs, axis=0) | |
return layer_out | |
def darts_mode( | |
mutable_id, | |
mutable_layer_id, | |
funcs, | |
funcs_args, | |
fixed_inputs, | |
optional_inputs, | |
optional_input_size, | |
tf): | |
optional_inputs = list(optional_inputs.values()) | |
layer_outs = [func([fixed_inputs, optional_inputs], **funcs_args[func_name]) | |
for func_name, func in funcs.items()] | |
# Create architecture weights for every func(op) | |
var_name = "{}_{}_arch_weights".format(mutable_id, mutable_layer_id) | |
arch_logits = tf.get_variable(var_name, shape=[len(funcs)], trainable=False) | |
_arch_logits_list.append(arch_logits) | |
arch_weights = tf.nn.softmax(arch_logits) | |
layer_out = tf.add_n([arch_weights[idx] * out for idx, out in enumerate(layer_outs)]) | |
return layer_out | |
def reload_tensorflow_variables(tf, session): | |
'''In Enas mode, this function reload every signal varaible created in `enas_mode` function so | |
the whole tensorflow graph will be changed into certain subgraph recerived from Tuner. | |
--------------- | |
session: the tensorflow session created by users | |
tf: tensorflow module | |
''' | |
subgraph_from_tuner = trial.get_next_parameter() | |
mutable_layers = set() | |
for subgraph_key in subgraph_from_tuner: | |
if "/" in subgraph_key: | |
# has to remove the last, could be layer_choice or whatever | |
mutable_id, mutable_layer_id = _decompose_general_key(subgraph_key[:subgraph_key.rfind("/")]) | |
if mutable_id is not None: | |
mutable_layers.add((mutable_id, mutable_layer_id)) | |
mutable_layers = sorted(list(mutable_layers)) | |
for mutable_id, mutable_layer_id in mutable_layers: | |
if mutable_id not in _namespace: | |
_logger.warning("%s not found in name space", mutable_id) | |
continue | |
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id) | |
# get optional inputs names | |
optional_inputs = _namespace[name_prefix]['optional_inputs'] | |
# extract layer information from the subgraph sampled by tuner | |
chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs) | |
chosen_layer = _namespace[name_prefix]['funcs'].index(chosen_layer) | |
chosen_inputs = [1 if inp in chosen_inputs else 0 for inp in optional_inputs] | |
# load these information into pre-defined tensorflow variables | |
_tf_variables[name_prefix]['funcs'].load(chosen_layer, session) | |
_tf_variables[name_prefix]['optional_inputs'].load( | |
chosen_inputs, session) | |
def _construct_general_key(mutable_id, mutable_layer_id): | |
# Mutable layer key in a general (search space) format | |
# that is, prefix/mutable_id/mutable_layer_id | |
return _MUTABLE_LAYER_SPACE_PREFIX + "/" + mutable_id + "/" + mutable_layer_id | |
def _decompose_general_key(key): | |
# inverse operation of above | |
if not key.startswith(_MUTABLE_LAYER_SPACE_PREFIX): | |
return None, None | |
else: | |
_, mutable_id, mutable_layer_id = key.split("/", maxsplit=2) | |
return mutable_id, mutable_layer_id | |
def darts_training(tf, session, loss, feed_dict): | |
global _optimizer, _train_op | |
if _optimizer is None: | |
_optimizer = tf.MomentumOptimizer(learning_rate=0.025) | |
# TODO: Calculate loss | |
grads_and_vars = _optimizer.compute_gradients(loss, _arch_logits_list) | |
_train_op = _optimizer.apply_gradients(grads_and_vars) | |
session.run(_train_op) | |
def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None): | |
if nas_mode == 'darts_mode': | |
darts_training(tf, session, loss, feed_dict) | |
elif nas_mode == 'enas_mode': | |
reload_tensorflow_variables(tf, session) | |
def _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs): | |
# optional_inputs should be name(key)s of the optional inputs | |
try: | |
mutable_block = trial.get_current_parameter(mutable_id) | |
# There is a NAS tuner | |
chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"] | |
chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"] | |
except KeyError: | |
# Try to find converted NAS parameters | |
params = trial.get_current_parameter() | |
expected_prefix = _construct_general_key(mutable_id, mutable_layer_id) | |
chosen_layer = params[expected_prefix + "/layer_choice"] | |
# find how many to choose | |
optional_input_size = int(params[expected_prefix + "/optional_input_size"]) # convert uniform to randint | |
# find who to choose, can duplicate | |
optional_input_state = params[expected_prefix + "/optional_input_chosen_state"] | |
chosen_inputs = [] | |
# make sure dict -> list produce stable result by sorting | |
optional_inputs_keys = sorted(optional_inputs) | |
for _ in range(optional_input_size): | |
chosen_inputs.append(optional_inputs_keys[optional_input_state % len(optional_inputs)]) | |
optional_input_state //= len(optional_inputs) | |
_logger.info("%s_%s: layer: %s, optional inputs: %s", mutable_id, mutable_layer_id, chosen_layer, chosen_inputs) | |
return chosen_layer, chosen_inputs | |
def convert_nas_search_space(search_space): | |
""" | |
Args: | |
param search_space: raw search space | |
return: the new search space, mutable_layers will be converted into choice | |
""" | |
if not isinstance(search_space, dict): | |
return search_space | |
ret = dict() | |
for k, v in search_space.items(): | |
if "_type" not in v: | |
# this should not happen | |
_logger.warning("There is no _type in one of your search space values with key '%s'" | |
". Please check your search space", k) | |
ret[k] = v | |
elif v["_type"] != "mutable_layer": | |
ret[k] = v | |
else: | |
_logger.info("Converting mutable_layer search space with key '%s'", k) | |
# v["_value"] looks like {'mutable_layer_1': {'layer_choice': ...} ...} | |
values = v["_value"] | |
for layer_name, layer_data in values.items(): | |
# there should be at most layer_choice, optional_inputs, optional_input_size in layer_data | |
# add "_mutable_layer" as prefix so that they can be recovered later | |
layer_key = _construct_general_key(k, layer_name) | |
if layer_data.get("layer_choice"): # filter out empty choice and no choice | |
layer_choice = layer_data["layer_choice"] | |
else: | |
raise ValueError("No layer choice found in %s" % layer_key) | |
if layer_data.get("optional_input_size"): | |
input_size = layer_data["optional_input_size"] | |
if isinstance(input_size, int): | |
input_size = [input_size, input_size] | |
if input_size[0] > input_size[1] or input_size[0] < 0: | |
_logger.error("Might not be able to handle optional_input_size < 0, please double check") | |
input_size[1] += 1 | |
else: | |
_logger.info("Optional input choices are set to empty by default in %s", layer_key) | |
input_size = [0, 1] | |
if layer_data.get("optional_inputs"): | |
total_state_size = len(layer_data["optional_inputs"]) ** (input_size[1] - 1) | |
else: | |
_logger.info("Optional inputs not found in %s", layer_key) | |
total_state_size = 1 | |
converted = { | |
layer_key + "/layer_choice": { | |
"_type": "choice", "_value": layer_choice | |
}, | |
layer_key + "/optional_input_size": { | |
"_type": "randint", "_value": input_size | |
}, | |
layer_key + "/optional_input_chosen_state": { | |
"_type": "randint", "_value": [0, total_state_size] | |
} | |
} | |
_logger.info(converted) | |
ret.update(converted) | |
return ret | |
def rewrite_nas_space(func): | |
def wrap(self, search_space): | |
search_space = convert_nas_search_space(search_space) | |
return func(self, search_space) | |
return wrap | |