Source code for robustx.lib.tasks.ClassificationTask

import pandas as pd
import numpy as np
from robustx.lib.tasks.Task import Task


[docs] class ClassificationTask(Task): """ A specific task type for classification problems that extends the base Task class. This class provides methods for training the model and retrieving positive instances from the training data. Attributes: model: The model to be trained and used for predictions. _training_data: The dataset used for training the model. """
[docs] def get_random_positive_instance(self, neg_value, column_name="target") -> pd.Series: """ Retrieves a random positive instance from the training data that does not have the specified negative value. This method continues to sample from the training data until a positive instance is found whose predicted label is not equal to the negative value. @param neg_value: The value considered negative in the target variable. @param column_name: The name of the target column used to identify positive instances. @return: A Pandas Series representing a random positive instance. """ # Get a random positive instance from the training data pos_instance = self._training_data.get_random_positive_instance(neg_value, column_name=column_name) # Loop until a positive instance whose prediction is positive is found while self.model.predict_single(pos_instance) == neg_value: pos_instance = self._training_data.get_random_positive_instance(neg_value, column_name=column_name) return pos_instance
[docs] def get_negative_instances(self, neg_value=0, column_name="target") -> pd.DataFrame: """ Filters all the negative instances in the dataset as predicted by the model and returns them @param neg_value: What target value counts as a "negative" instance @param column_name: Target column's name @return: All instances with a negative target value predicted by the model """ preds = self.model.predict(self.training_data.X).values.flatten() if neg_value == 0: idxs = np.where(preds < 0.5)[0] negatives = self.training_data.data.drop(columns=[column_name]) negatives = pd.DataFrame(negatives.values[idxs], columns=negatives.columns) else: idxs = np.where(preds >= 0.5)[0] negatives = self.training_data.data.drop(columns=[column_name]) negatives = pd.DataFrame(negatives.values[idxs], columns=negatives.columns) return negatives