Source code for pyccea.projection.cipls

import copy
import numpy as np
from sklearn.utils import check_array
from sklearn.base import BaseEstimator
from sklearn.preprocessing import normalize
from sklearn.utils.validation import FLOAT_DTYPES


[docs] class CIPLS(BaseEstimator): """Covariance-free Partial Least Squares (CIPLS). Jordao, Artur, et al. "Covariance-free partial least squares: An incremental dimensionality reduction method." Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (2021). Source: https://github.com/arturjordao/IncrementalDimensionalityReduction Attributes ---------- n: int Number of iterations. It starts at 0 and incrementally goes up to the number of samples (n_samples). n_features: int Number of variables. x_weights_: np.ndarray (n_features, n_components) Projection matrix. x_scores_: np.ndarray (n_samples, n_components) The transformed training samples (latent components). x_loadings_: np.ndarray (n_features, n_components) The loadings of X. y_loadings_: np.ndarray (n_targets, n_components) The loadings of Y, where n_targets is the number of response variables. x_rotations_: np.ndarray (n_components, n_features) Transposed and non-normalized projection matrix. sum_x: np.ndarray (n_features,) The sum of each feature individually across all training samples. sum_y: np.ndarray (1,) The sum of targets across all training samples. """ def __init__(self, n_components=10, copy=True): """ Parameters ---------- n_components: int or None, default 10 Number of components to keep. If 'n_components' is None, then its value is set to min(n_samples, n_features). copy: bool, default True If False, X will be overwritten. 'copy=False' can be used to save memory but is unsafe for general use. """ self.__name__ = 'Covariance-free Partial Least Squares' self.n_components = n_components self.n = 0 self.copy = copy self.sum_x = None self.sum_y = None self.n_features = None self.x_rotations_ = None self.x_loadings_ = None self.y_loadings_ = None self.x_scores_ = None self.x_weights_ = None
[docs] def normalize(self, x): """Scale input vectors individually to unit norm (vector length).""" # This function takes a one-dimensional vector x, adds a new dimension to it (because the # normalize function from scikit-learn expects a two-dimensional array as input), # normalizes it along axis 0, and then returns the resulting normalized one-dimensional # vector. return normalize(x[:, np.newaxis], axis=0).ravel()
[docs] def fit(self, X, Y): """Fit model to data Parameters ---------- X: np.ndarray (n_samples, n_features) Training data. Y: np.ndarray (n_samples,) or (n_samples, n_targets) Target data. """ X = check_array(X, dtype=FLOAT_DTYPES, copy=self.copy) Y = check_array(Y, dtype=FLOAT_DTYPES, copy=self.copy, ensure_2d=False) if Y.ndim == 1: Y = Y.reshape(-1, 1) if np.unique(Y).shape[0] == 2: Y[np.where(Y == 0)[0]] = -1 n_samples, n_features = X.shape if self.n == 0: self.x_rotations_ = np.zeros((self.n_components, n_features)) self.x_loadings_ = np.zeros((n_features, self.n_components)) self.y_loadings_ = np.zeros((Y.shape[1], self.n_components)) self.n_features = n_features for j in range(0, n_samples): self.n = self.n + 1 u = X[j] l = Y[j] if self.n == 1: self.sum_x = u self.sum_y = l else: # Compute the incremental mean old_mean = 1 / (self.n - 1) * self.sum_x self.sum_x = self.sum_x + u mean_x = 1 / self.n * self.sum_x u = u - mean_x delta_x = mean_x - old_mean # Deflation process self.x_rotations_[0] = self.x_rotations_[0] - delta_x * self.sum_y self.x_rotations_[0] = self.x_rotations_[0] + (u * l) self.sum_y = self.sum_y + l t = np.dot(u, self.normalize(self.x_rotations_[0].T)) self.x_loadings_[:, 0] = self.x_loadings_[:, 0] + (u * t) self.y_loadings_[:, 0] = self.y_loadings_[:, 0] + (l * t) # Compute the i-th component of the c-dimensional space for c in range(1, self.n_components): u -= np.dot(t, self.x_loadings_[:, c - 1]) l -= np.dot(t, self.y_loadings_[:, c - 1]) # Deflation process self.x_rotations_[c] = self.x_rotations_[c] + (u * l) self.x_loadings_[:, c] = self.x_loadings_[:, c] + (u * t) self.y_loadings_[:, c] = self.y_loadings_[:, c] + (l * t) t = np.dot(u, self.normalize(self.x_rotations_[c].T)) # Apply the dimension reduction self.transform(X=X) return self
[docs] def transform(self, X, Y=None): """Apply the dimension reduction learned on the training data. Parameters ---------- X: np.ndarray (n_samples, n_features) Training data. Y: np.ndarray (n_samples,) or (n_samples, n_targets), default None Target data. """ X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES) # Centralize the data mean = 1 / self.n * self.sum_x X -= mean # Scale each component of the projection matrix w_rotation = np.zeros(self.x_rotations_.shape) for c in range(0, self.n_components): w_rotation[c] = self.normalize(self.x_rotations_[c]) self.x_weights_ = w_rotation.T.copy() self.x_scores_ = np.dot(X, w_rotation.T)