Source code for pyccea.projection.vip

import numpy as np



[docs]
class VIP:
    """ Variable Importance in Projection (VIP).

    Mehmood, Tahir, et al. "A review of variable selection methods in partial least squares
    regression." Chemometrics and intelligent laboratory systems 118 (2012): 62-69.
    Source: https://github.com/scikit-learn/scikit-learn/issues/7050

    Attributes
    ----------
    n_features : int
        Number of variables.
    n_components : int
        Number of components.
    x_rotations_ : np.ndarray (n_features, n_components)
        Projection matrix used to transform X.
    x_scores_ : np.ndarray (n_samples, n_components)
        The transformed training samples (latent components).
    y_loadings_ : np.ndarray (n_targets, n_components)
        The loadings of Y.
    importances : np.ndarray (n_features,)
        Importance of each feature based on its contribution to yield the latent space.
    """

    def __init__(self, model):
        """
        Parameters
        ----------
        model : sklearn model object
            Partial Least Squares regression model. It can be the traditional version (PLS) or the
            Covariance-free version (CIPLS).
        """
        # Projection matrix
        self.x_rotations_ = model.x_rotations_.copy()
        # Latent components
        self.x_scores_ = model.x_scores_.copy()
        # Loadings of Y
        self.y_loadings_ = model.y_loadings_.copy()
        # Number of features and number of components, respectively
        self.n_features, self.n_components = self.x_rotations_.shape


[docs]
    def compute(self):
        """Calculate feature importances."""
        # Sum of squares explained by each component (n_components,)
        sum_of_squares = np.diag(self.x_scores_.T @ self.x_scores_ @ self.y_loadings_.T @ self.y_loadings_)
        # Reshape array (n_components, 1)
        sum_of_squares = sum_of_squares.reshape(self.n_components, -1)
        # Cumulative sum of squares
        cum_sum_of_squares = np.sum(sum_of_squares)
        # Projection matrix norm
        weight_norm = np.linalg.norm(self.x_rotations_, axis=0)
        # Normalized weights
        weights = (self.x_rotations_ / np.expand_dims(weight_norm, axis=0)) ** 2
        # Variable Importances in Projection (VIP)
        squared_importances = self.n_features * (weights @ sum_of_squares).ravel() / cum_sum_of_squares
        # To avoid "RuntimeWarning: invalid value encountered in sqrt"
        squared_importances[squared_importances < 0] = np.nan
        self.importances = np.sqrt(squared_importances)
        self.importances[np.where(np.isnan(self.importances))[0]] = -999