Source code for robustx.generators.robust_CE_methods.RNCE

import numpy as np
import pandas as pd
from sklearn.neighbors import KDTree

from robustx.generators.CEGenerator import CEGenerator
from robustx.robustness_evaluations.DeltaRobustnessEvaluator import DeltaRobustnessEvaluator
from robustx.lib.tasks.Task import Task
from functools import lru_cache


[docs] class RNCE(CEGenerator): """ A counterfactual explanation generator that finds robust nearest counterfactual examples using KDTree. Inherits from the CEGenerator class and implements the _generation_method to find counterfactual examples that are robust to perturbations. It leverages KDTree for nearest neighbor search and uses a robustness evaluator to identify robust instances in the training data. Attributes: intabs (DeltaRobustnessEvaluator): An evaluator for checking the robustness of instances to perturbations. """ def __init__(self, task: Task): """ Initializes the RNCE CE generator with a given task and robustness evaluator. @param task: The task to solve, provided as a Task instance. """ super().__init__(task) self.intabs = DeltaRobustnessEvaluator(task) def _generation_method(self, x, robustInit=True, optimal=True, column_name="target", neg_value=0, delta=0.005, bias_delta=0.005, k=1, **kwargs): """ Generates counterfactual explanations using nearest neighbor search. @param x: The instance for which to generate a counterfactual. Can be a DataFrame or Series. @param robustInit: If True, only robust instances are considered for counterfactual generation. @param column_name: The name of the target column. @param neg_value: The value considered negative in the target variable. @param delta: The tolerance for robustness in the feature space. @param bias_delta: The bias tolerance for robustness in the feature space. @param k: The number of counterfactuals to return @param kwargs: Additional keyword arguments. @return: A DataFrame containing the counterfactual explanation. """ S = self.getCandidates(robustInit, delta, bias_delta, column_name=column_name, neg_value=neg_value) if S.empty: print("No instance in the dataset is robust for the given perturbations!") return pd.DataFrame(x).T treer = KDTree(S, leaf_size=40) x_df = pd.DataFrame(x).T idxs = np.array(treer.query(x_df, k=k)[1]).flatten() if k > 1: res = pd.DataFrame(S.iloc[idxs]) else: res = pd.DataFrame(S.iloc[idxs[0]]).T return res
[docs] @lru_cache() def getCandidates(self, robustInit, delta, bias_delta, column_name="target", neg_value=0): """ Retrieves candidate instances from the dataset that are robust to perturbations. @param robustInit: If True, only robust instances are considered. @param delta: The tolerance for robustness in the feature space. @param bias_delta: The bias tolerance for robustness in the feature space. @param column_name: The name of the target column. @param neg_value: The value considered negative in the target variable. @return: A DataFrame containing robust instances from the dataset. """ S = [] for _, instance in self.task.training_data.data.iterrows(): instance_x = instance.drop(column_name) if robustInit: if self.intabs.evaluate(instance_x, delta=delta, bias_delta=bias_delta, desired_output=1-neg_value): S.append(instance_x) else: if self.task.model.predict_single(instance_x): S.append(instance_x) return pd.DataFrame(S)