Source code for robustx.evaluations.DistanceEvaluator

import numpy as np

from robustx.evaluations.CEEvaluator import CEEvaluator
from robustx.lib.distance_functions.DistanceFunctions import euclidean



[docs]
class DistanceEvaluator(CEEvaluator):
    """
     An Evaluator class which evaluates the average distance of counterfactuals from their original instance

        ...

    Attributes / Properties
    -------

    task: Task
        Stores the Task for which we are evaluating the distance of CEs

    distance_func: Function
        A function which takes in 2 dataframes and returns an integer representing distance, defaulted to euclidean

    valid_val: int
        Stores what the target value of a valid counterfactual is defined as

    -------

    Methods
    -------

    evaluate() -> int:
        Returns the average distance of each x' from x

    -------
    """


[docs]
    def evaluate(self, counterfactuals, valid_val=1, distance_func=euclidean, column_name="target", subset=None, **kwargs):
        """
        Determines the average distance of the CEs from their original instances
        @param counterfactuals: pd.DataFrame, dataset containing CEs in same order as negative instances in dataset
        @param valid_val: int, what the target value of a valid counterfactual is defined as, default 1
        @param distance_func: Function, function which takes in 2 dataframes and returns an integer representing
                              distance, defaulted to euclidean
        @param column_name: name of target column
        @param subset: optional DataFrame, contains instances to generate CEs on
        @param kwargs: other arguments
        @return: int, average distance of CEs from their original instances
        """
        if 'predicted' in counterfactuals.columns and 'Loss' in counterfactuals.columns:
            counterfactuals = counterfactuals.drop(columns=['predicted', 'Loss']).astype(np.float32)

        df1 = counterfactuals

        if subset is None:
            df2 = self.task.get_negative_instances(neg_value=1 - valid_val, column_name=column_name)
        else:
            df2 = subset

        # Ensure the DataFrames have the same shape
        assert df1.shape == df2.shape, "DataFrames must have the same shape"

        distances = []

        # Iterate over each row in the DataFrames
        for i in range(len(df1)):
            row1 = df1.iloc[i:i + 1]  # Get the i-th row as a DataFrame
            row2 = df2.iloc[i:i + 1]  # Get the i-th row as a DataFrame

            # Calculate distance between corresponding rows
            dist = distance_func(row1, row2)
            distances.append(dist)

        # Calculate and return the average distance
        return np.mean(distances)