Source code for robustx.datasets.provided_datasets.IonosphereDatasetLoader

import pandas as pd
from sklearn.preprocessing import StandardScaler

from robustx.datasets.provided_datasets.ExampleDatasetLoader import ExampleDatasetLoader


[docs] class IonosphereDatasetLoader(ExampleDatasetLoader): """ A DataLoader class responsible for loading the Ionosphere dataset """ def __init__(self, seed=None): categoricals = [] numericals = [f"feature_{i}" for i in range(34)] super().__init__(categoricals, numericals, seed=seed)
[docs] def load_data(self): url = "https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data" column_names = self.numerical + ["target"] self._data = pd.read_csv(url, header=None, names=column_names)
[docs] def get_default_preprocessed_features(self): # We will map the target variable here for default preprocessing self.data['target'] = self.data['target'].map({'g': 1, 'b': 0}) features = self.X # Standardize the features scaler = StandardScaler() features_scaled = scaler.fit_transform(features) data_preprocessed = pd.DataFrame(features_scaled, columns=features.columns) # Add target column to standardized features return data_preprocessed
@property def X(self): return self.data.drop(columns=["target"]) @property def y(self) -> pd.Series: return self.data[["target"]]