Source code for robustx.lib.intabs.IntervalAbstractionPyTorch

from robustx.lib.models.pytorch_models.SimpleNNModel import SimpleNNModel


[docs] class IntervalAbstractionPytorch: """ Converts a provided PyTorch neural network into a representation of an Interval Neural Network ... Attributes / Properties ------- bias_intervals: Dict[str, (int, int)] The intervals for each of the biases in the NN with key 'bias_into_l{layer_idx}_n{dest_idx}', where input layer has a layer_idx of 0 weight_intervals: Dict[str, (int, int)] The intervals for each of the weights in the NN with key 'weight_into_l{layer_idx}_n{dest_idx}', where input layer has a layer_idx of 0 layers: list[int] Stores the number of nodes in each layer in a list model: BaseModel The original model delta: int The perturbation to each weight in the Neural Network bias_delta: int The perturbation to each bias in the Neural Network ------- Methods ------- evaluate() -> int: Returns the proportion of CEs which are robust for the given parameters ------- """ def __init__(self, model: SimpleNNModel, delta: float, bias_delta=None): """ @param model: SimpleNNModel, the Neural network to create an INN of @param delta: int, perturbation to weights @param bias_delta: int, perturbation to bias, default is delta itself """ self.layers = [model.input_dim] + model.hidden_dim + [model.output_dim] self.model = model self.delta = delta if bias_delta is None: self.bias_delta = delta else: self.bias_delta = bias_delta self.weight_intervals, self.bias_intervals = self.create_weights_and_bias_dictionary()
[docs] def create_weights_and_bias_dictionary(self): """ Generates the intervals for each weight and bias in the Neural Network """ # Extract the weights and biases as numpy arrays for each layer params = {} for name, param in self.model.get_torch_model().named_parameters(): params[name] = param.detach().numpy() weight_dict = {} bias_dict = {} # Loop through layers for layer_idx in range(0, len(params) // 2): # Get weights and biases weights = params[f'{layer_idx * 2}.weight'] biases = params[f'{layer_idx * 2}.bias'] for dest_idx in range(weights.shape[0]): # Set the interval for biases bias_key = f'bias_into_l{layer_idx + 1}_n{dest_idx}' bias_dict[bias_key] = [biases[dest_idx] - self.bias_delta, biases[dest_idx] + self.bias_delta] for src_idx in range(weights.shape[1]): # Set the interval for weights weight_key = f'weight_l{layer_idx}_n{src_idx}_to_l{layer_idx + 1}_n{dest_idx}' weight = weights[dest_idx, src_idx] weight_dict[weight_key] = [weight - self.delta, weight + self.delta] return weight_dict, bias_dict