Source code for robustx.lib.models.pytorch_models.CustomPyTorchModel

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from robustx.lib.models.BaseModel import BaseModel


[docs] class CustomPyTorchModel(BaseModel): """ A custom PyTorch model that can be trained, used for predictions, and evaluated for performance. Attributes ---------- model: nn.Module The PyTorch model architecture. criterion: nn.CrossEntropyLoss The loss function used for training. optimizer: optim.Adam The optimizer used for training the model. Methods ------- train(X: pd.DataFrame, y: pd.DataFrame, epochs: int = 10, batch_size: int = 32) -> None: Trains the model on the provided data for a specified number of epochs and batch size. predict(X: pd.DataFrame) -> pd.DataFrame: Predicts outcomes for multiple instances of the provided feature data. predict_single(X: pd.DataFrame) -> int: Predicts the outcome for a single instance of the provided feature data. predict_proba(X: pd.DataFrame) -> pd.DataFrame: Predicts the probability of each outcome for multiple instances. evaluate(X: pd.DataFrame, y: pd.DataFrame) -> float: Evaluates the model's accuracy on the provided feature and target data. """ def __init__(self, model, criterion=nn.CrossEntropyLoss(), optimizer_class=optim.Adam, learning_rate=0.001): """ Initializes the CustomPyTorchModel with a specified model, loss function, and optimizer. @param model: The PyTorch model architecture. @param criterion: The loss function, default is CrossEntropyLoss. @param optimizer_class: The optimizer class, default is Adam. @param learning_rate: The learning rate for the optimizer, default is 0.001. """ super().__init__(model) self.criterion = criterion self.optimizer = optimizer_class(self._model.parameters(), lr=learning_rate)
[docs] def train(self, X: pd.DataFrame, y: pd.DataFrame, epochs=10, batch_size=32, **kwargs): """ Train the PyTorch model using the provided data. @param X: Feature variables as a pandas DataFrame. @param y: Target variable as a pandas DataFrame. @param epochs: Number of training epochs, default is 10. @param batch_size: Size of each mini-batch, default is 32. """ # Convert pandas DataFrames to torch tensors X_tensor = torch.tensor(X.values, dtype=torch.float32) y_tensor = torch.tensor(y.values, dtype=torch.float32) # Assuming y is for classification dataset = TensorDataset(X_tensor, y_tensor) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) self._model.train() for epoch in range(epochs): running_loss = 0.0 for X_batch, y_batch in loader: self.optimizer.zero_grad() outputs = self._model(X_batch) loss = self.criterion(outputs, y_batch) loss.backward() self.optimizer.step() running_loss += loss.item() print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(loader)}")
[docs] def predict(self, X: pd.DataFrame) -> pd.DataFrame: """ Predict outcomes for the given features X (multiple instances). @param X: Input data as a pandas DataFrame. @return: Predictions as a pandas DataFrame. """ self._model.eval() with torch.no_grad(): X_tensor = torch.tensor(X.values, dtype=torch.float32) outputs = self._model(X_tensor) _, predicted = torch.max(outputs, 1) return pd.DataFrame(predicted.numpy(), columns=['Prediction'])
[docs] def predict_single(self, X: pd.DataFrame) -> int: """ Predict the outcome for a single instance. @param X: Input data for a single instance as a pandas DataFrame. @return: Predicted class as an integer. """ self._model.eval() with torch.no_grad(): X_tensor = torch.tensor(X.values, dtype=torch.float32) outputs = self._model(X_tensor) _, predicted = torch.max(outputs, 1) return int(predicted.item())
[docs] def predict_proba(self, X: pd.DataFrame) -> pd.DataFrame: """ Predict the probability of outcomes for multiple instances. @param X: Input data as a pandas DataFrame. @return: Probabilities as a pandas DataFrame. """ self._model.eval() with torch.no_grad(): X_tensor = torch.tensor(X.values, dtype=torch.float32) outputs = self._model(X_tensor) probabilities = torch.nn.functional.softmax(outputs, dim=1) return pd.DataFrame(probabilities.numpy())
[docs] def evaluate(self, X: pd.DataFrame, y: pd.DataFrame) -> float: """ Evaluate the model's performance on a test set (X, y). @param X: Feature variables as a pandas DataFrame. @param y: Target variable as a pandas DataFrame. @return: Accuracy of the model as a float. """ self._model.eval() X_tensor = torch.tensor(X.values, dtype=torch.float32) y_tensor = torch.tensor(y.values, dtype=torch.long) with torch.no_grad(): outputs = self._model(X_tensor) _, predicted = torch.max(outputs, 1) correct = (predicted == y_tensor).sum().item() total = y_tensor.size(0) accuracy = correct / total return accuracy