Source code for pyccea.evaluation.wrapper

import copy
import logging
import warnings
import numpy as np
from ..utils.datasets import DataLoader
from ..utils.models import ClassificationModel, RegressionModel
from ..utils.metrics import ClassificationMetrics, RegressionMetrics

warnings.filterwarnings(action="ignore", category=UserWarning, message="y_pred contains classes")


[docs] class WrapperEvaluation(): """Evaluate selected features based on the predictive performance of a machine learning model. Attributes ---------- model_evaluator : object of one of the metrics classes Responsible for computing performance metrics to evaluate models. base_model : sklearn model object Model that has not been fitted. Works as a template to avoid multiple model initializations. As each model evaluates a subset of features (individual), the base model is copied and fitted for each individual. model : sklearn model object Model that has been fitted to evaluate the current individual. estimators : list of sklearn model objects Estimators used in the current evaluation. It is one when 'eval_mode' is set to "hold_out" and k when 'eval_mode' is set to "k_fold" or "leave_one_out". """ models = {"classification": ClassificationModel, "regression": RegressionModel} metrics = {"classification": ClassificationMetrics, "regression": RegressionMetrics} eval_modes = ["hold_out", "k_fold", "leave_one_out"] def __init__( self, task: str, model_type: str, eval_function: str, eval_mode: str, n_classes: int = None ): """ Parameters ---------- task : str Name of the supervised learning task (e.g., regression, classification). model_type : str Name of the machine learning model that will be fitted for the task. eval_function : str Metric that will be used to evaluate the performance of the model trained with the selected subset of features (makes up the fitness of the individual). eval_mode : str Evaluation mode. It can be 'hold_out', 'leave_one_out', or 'k_fold'. n_classes : int, default None Number of classes when task parameter is set to 'classification'. """ # Check if the chosen task is available if not task in WrapperEvaluation.metrics.keys(): raise NotImplementedError( f"Task '{task}' is not implemented. " f"The available tasks are {', '.join(WrapperEvaluation.metrics.keys())}." ) # Initialize the model evaluator according to the task task_kwargs = { "classification": {"n_classes": n_classes}, "regression": {}, } self.model_evaluator = WrapperEvaluation.metrics[task](**task_kwargs[task]) self.task = task # Check if the chosen evaluation function is available if not eval_function in self.model_evaluator.metrics: raise NotImplementedError( f"Evaluation function '{eval_function}' is not implemented. " f"The available {task} metrics are " f"{', '.join(self.model_evaluator.metrics)}." ) self.eval_function = eval_function # Initialize the model present in the wrapper model_evaluator self.base_model = WrapperEvaluation.models[task](model_type=model_type) self.model_type = model_type # Check if the chosen evaluation mode is available if not eval_mode in WrapperEvaluation.eval_modes: raise NotImplementedError( f"Evaluation mode '{eval_mode}' is not implemented. " f"The available evaluation modes are {', '.join(WrapperEvaluation.eval_modes)}." ) self.eval_mode = eval_mode # Initialize logger with info level logging.basicConfig(encoding="utf-8", level=logging.INFO) def _hold_out_validation(self, solution_mask: np.ndarray, data: DataLoader) -> None: """Evaluate an individual using hold_out validation (train/test).""" # Get model that has not been previously fitted self.model = copy.deepcopy(self.base_model) # Select subset of features in the training set X_train = data.X_train[:, solution_mask].copy() y_train = data.y_train.copy() # Select subset of features in the test set X_test = data.X_test[:, solution_mask].copy() y_test = data.y_test.copy() # Train model with the current subset of features self.model.train(X_train=X_train, y_train=y_train, optimize=False, verbose=False) self.estimators.append(copy.deepcopy(self.model.estimator)) # Evaluate the individual self.model_evaluator.compute( estimator=self.model.estimator, X_test=X_test, y_test=y_test, verbose=False ) # Get evaluation in the test set self.evaluations = self.model_evaluator.values def _cross_validation(self, solution_mask: np.ndarray, data: DataLoader) -> None: """Evaluate an individual using cross-validation (leave-one-out or k-fold).""" for k in range(data.kfolds): # Get training and validations subsets built from the full training set X_train, y_train = data.train_folds[k] X_val, y_val = data.val_folds[k] # Select subset of features in the training subset X_train = X_train[:, solution_mask].copy() # Select subset of features in the validation subset X_val = X_val[:, solution_mask].copy() # Get model that has not been previously fitted self.model = copy.deepcopy(self.base_model) # Train model with the current subset of features self.model.train(X_train=X_train, y_train=y_train, optimize=False, verbose=False) self.estimators.append(copy.deepcopy(self.model.estimator)) # Evaluate the individual self.model_evaluator.compute( estimator=self.model.estimator, X_test=X_val, y_test=y_val, verbose=False ) for metric in self.evaluations.keys(): self.evaluations[metric] += self.model_evaluator.values[metric] del self.model # Calculate average performance over k folds for metric in self.evaluations.keys(): self.evaluations[metric] = round(self.evaluations[metric]/data.kfolds, 4)
[docs] def evaluate(self, solution: np.ndarray, data: DataLoader) -> dict: """ Evaluate an individual represented by a complete solution through the predictive performance of a machine learning model. Parameters ---------- solution : np.ndarray Solution represented by a binary n-dimensional array, where n is the number of features. data : DataLoader Container with process data and training and test sets. Returns ------- : dict Evaluation metrics. """ # Estimator(s) used for the current evaluation self.estimators = list() # If no feature is selected self.evaluations = {metric: 0 for metric in self.model_evaluator.metrics} if solution.sum() == 0: return self.evaluations # Boolean array used to filter which features will be used to fit the model solution_mask = solution.astype(bool).copy() # Hold-out validation if self.eval_mode == "hold_out": self._hold_out_validation( solution_mask=solution_mask, data=data, ) # K-fold cross validation or leave-one-out cross validation elif self.eval_mode in ["k_fold", "leave_one_out"]: self._cross_validation( solution_mask=solution_mask, data=data, ) return self.evaluations