Source code for robustx.datasets.DatasetLoader

from abc import ABC, abstractmethod

import pandas as pd


[docs] class DatasetLoader(ABC): """ An abstract class used to outline the minimal functionality of a dataset loader ... Attributes / Properties ------------------------ _data: pd.DataFrame Stores the dataset as a DataFrame, only has value once load_data() called X: pd.DataFrame Stores the feature columns as a DataFrame, only has value once load_data() called y: pd.DataFrame Stores the target column as a DataFrame, only has value once load_data() called ------- Methods ------- get_negative_instances() -> pd.DataFrame: Filters all the negative instances in the dataset and returns them get_random_positive_instance() -> pd.Series: Returns a random instance where the target variable is NOT the neg_value ------- """ def __init__(self, seed=None): self._data = None self._seed = seed @property def data(self) -> pd.DataFrame: """ Returns whole dataset as DataFrame @return: pd.DataFrame """ return self._data @data.setter def data(self, value): self._data = value @property @abstractmethod def X(self) -> pd.DataFrame: """ Returns only feature variables as DataFrame @return: pd.DataFrame """ pass @property @abstractmethod def y(self) -> pd.Series: """ Returns only target variable as Series @return: pd.Series """ pass
[docs] def get_negative_instances(self, neg_value, column_name="target") -> pd.DataFrame: """ Filters all the negative instances in the dataset and returns them @param neg_value: What target value counts as a "negative" instance @param column_name: Target column's name @return: All instances with a negative target value """ return self.data[self.data[column_name] == neg_value].drop(columns=[column_name])
[docs] def get_random_positive_instance(self, neg_value, column_name="target") -> pd.Series: """ Returns a random instance where the target variable is NOT the neg_value @param neg_value: What target value counts as a "negative" instance @param column_name: Target column's name @return: Random instance in dataset with positive target value """ return self.data[self.data[column_name] != neg_value].drop(columns=[column_name]).sample(random_state=self._seed)