import numpy as np
from robustx.evaluations.CEEvaluator import CEEvaluator
from robustx.lib.distance_functions.DistanceFunctions import euclidean
[docs]
class DistanceEvaluator(CEEvaluator):
"""
An Evaluator class which evaluates the average distance of counterfactuals from their original instance
...
Attributes / Properties
-------
task: Task
Stores the Task for which we are evaluating the distance of CEs
distance_func: Function
A function which takes in 2 dataframes and returns an integer representing distance, defaulted to euclidean
valid_val: int
Stores what the target value of a valid counterfactual is defined as
-------
Methods
-------
evaluate() -> int:
Returns the average distance of each x' from x
-------
"""
[docs]
def evaluate(self, counterfactuals, valid_val=1, distance_func=euclidean, column_name="target", subset=None, **kwargs):
"""
Determines the average distance of the CEs from their original instances
@param counterfactuals: pd.DataFrame, dataset containing CEs in same order as negative instances in dataset
@param valid_val: int, what the target value of a valid counterfactual is defined as, default 1
@param distance_func: Function, function which takes in 2 dataframes and returns an integer representing
distance, defaulted to euclidean
@param column_name: name of target column
@param subset: optional DataFrame, contains instances to generate CEs on
@param kwargs: other arguments
@return: int, average distance of CEs from their original instances
"""
if 'predicted' in counterfactuals.columns and 'Loss' in counterfactuals.columns:
counterfactuals = counterfactuals.drop(columns=['predicted', 'Loss']).astype(np.float32)
df1 = counterfactuals
if subset is None:
df2 = self.task.get_negative_instances(neg_value=1 - valid_val, column_name=column_name)
else:
df2 = subset
# Ensure the DataFrames have the same shape
assert df1.shape == df2.shape, "DataFrames must have the same shape"
distances = []
# Iterate over each row in the DataFrames
for i in range(len(df1)):
row1 = df1.iloc[i:i + 1] # Get the i-th row as a DataFrame
row2 = df2.iloc[i:i + 1] # Get the i-th row as a DataFrame
# Calculate distance between corresponding rows
dist = distance_func(row1, row2)
distances.append(dist)
# Calculate and return the average distance
return np.mean(distances)