Source code for pyccea.decomposition.ranking

import logging
import numpy as np
from ..decomposition.grouping import FeatureGrouping


[docs] class RankingFeatureGrouping(FeatureGrouping): """ Decompose the problem (a collection of features) according to a score-based method. """ methods = ["distributed", "elitist"] def __init__(self, n_subcomps: int = None, subcomp_sizes: list = list(), scores: np.ndarray = np.empty(0), method: str = None, ascending: bool = True): super().__init__(n_subcomps, subcomp_sizes) """ Parameters ---------- n_subcomps: int Number of subcomponents, where each subcomponent is a subset of features. subcomp_sizes: list Number of features in each subcomponent. scores: np.ndarray Scores relative to the features and that allows sorting them by priority. method: str Grouping method used to decompose the problem according to scores. ascending: bool, default True If True, sort in ascending order. Otherwise, sort in descending order. """ # Check if the chosen method is available if method not in RankingFeatureGrouping.methods: raise AssertionError( f"Method {method} was not found. " f"The available methods are {', '.join(RankingFeatureGrouping.methods)}." ) self.scores = scores.copy() self.method = method self.ascending = ascending
[docs] def decompose(self, X: np.ndarray, feature_idxs: np.ndarray = None): """ Divide an n-dimensional problem into m subproblems. Parameters ---------- X: np.ndarray n-dimensional input data. feature_idxs: np.ndarray, default None Indexes of features sorted according to the score. It is passed as a parameter if it has been previously calculated. Returns ------- subcomponents: list Subcomponents, where each subcomponent is an array that can be accessed by indexing the list. feature_idxs: np.ndarray, default None Indexes of features sorted according to the score. """ if feature_idxs is None: logging.info("Generating feature indexes according to the scores.") ranking = np.argsort(self.scores, axis=-1) # If lower scores should be ranked better. if not self.ascending: logging.info("Descending order of scores was chosen.") ranking = ranking[::-1].copy() if self.method == "elitist": # The order of features for decomposition is the ranking itself feature_idxs = ranking.copy() elif self.method == "distributed": # Distributes the top-ranked features evenly among the groups self.n_subcomps = self.n_subcomps if self.n_subcomps else len(self.subcomp_sizes) feature_idxs = [list() for _ in range(self.n_subcomps)] for i, value in enumerate(ranking): feature_idxs[i % self.n_subcomps].append(value) feature_idxs = np.concatenate(feature_idxs, axis=0) # Shuffle the data features according to the indexes X = X[:, feature_idxs].copy() # Decompose the problem subcomponents = self._get_subcomponents(X=X) return subcomponents, feature_idxs