import numpy as np
[docs]
class VIP:
""" Variable Importance in Projection (VIP).
Mehmood, Tahir, et al. "A review of variable selection methods in partial least squares
regression." Chemometrics and intelligent laboratory systems 118 (2012): 62-69.
Source: https://github.com/scikit-learn/scikit-learn/issues/7050
Attributes
----------
n_features : int
Number of variables.
n_components : int
Number of components.
x_rotations_ : np.ndarray (n_features, n_components)
Projection matrix used to transform X.
x_scores_ : np.ndarray (n_samples, n_components)
The transformed training samples (latent components).
y_loadings_ : np.ndarray (n_targets, n_components)
The loadings of Y.
importances : np.ndarray (n_features,)
Importance of each feature based on its contribution to yield the latent space.
"""
def __init__(self, model):
"""
Parameters
----------
model : sklearn model object
Partial Least Squares regression model. It can be the traditional version (PLS) or the
Covariance-free version (CIPLS).
"""
# Projection matrix
self.x_rotations_ = model.x_rotations_.copy()
# Latent components
self.x_scores_ = model.x_scores_.copy()
# Loadings of Y
self.y_loadings_ = model.y_loadings_.copy()
# Number of features and number of components, respectively
self.n_features, self.n_components = self.x_rotations_.shape
[docs]
def compute(self):
"""Calculate feature importances."""
# Sum of squares explained by each component (n_components,)
sum_of_squares = np.diag(self.x_scores_.T @ self.x_scores_ @ self.y_loadings_.T @ self.y_loadings_)
# Reshape array (n_components, 1)
sum_of_squares = sum_of_squares.reshape(self.n_components, -1)
# Cumulative sum of squares
cum_sum_of_squares = np.sum(sum_of_squares)
# Projection matrix norm
weight_norm = np.linalg.norm(self.x_rotations_, axis=0)
# Normalized weights
weights = (self.x_rotations_ / np.expand_dims(weight_norm, axis=0)) ** 2
# Variable Importances in Projection (VIP)
squared_importances = self.n_features * (weights @ sum_of_squares).ravel() / cum_sum_of_squares
# To avoid "RuntimeWarning: invalid value encountered in sqrt"
squared_importances[squared_importances < 0] = np.nan
self.importances = np.sqrt(squared_importances)
self.importances[np.where(np.isnan(self.importances))[0]] = -999