Source code for pyccea.decomposition.clustering

import numpy as np
from ..decomposition.grouping import FeatureGrouping



[docs]
class ClusteringFeatureGrouping(FeatureGrouping):
    """
    Decompose the problem (a collection of features) according to a clustering.
    """

    def __init__(self, n_subcomps: int = None, clusters: np.ndarray = np.empty(0),):
        super().__init__(n_subcomps)
        """
        Parameters
        ----------
        n_subcomps: int
            Number of subcomponents, where each subcomponent is a subset of features.
        clusters: np.ndarray
            Index of the cluster each feature belongs to.
        """
        self.clusters = clusters.copy()
        self.n_subcomps = n_subcomps


[docs]
    def decompose(self, X: np.ndarray, feature_idxs: np.ndarray = None):
        """
        Divide an n-dimensional problem into m subproblems.

        Parameters
        ----------
        X: np.ndarray
            n-dimensional input data.
        feature_idxs: np.ndarray, default None
            Feature indexes sorted according to clustering. It is passed as a parameter if it has
            been previously generated.

        Returns
        -------
        subcomponents: list
            Subcomponents, where each subcomponent is an array that can be accessed by indexing
            the list.
        feature_idxs: np.ndarray, default None
            Feature indexes sorted according to clustering. For example, if the first
            subpopulation has size x, the first x elements of this list will be the features of
            the first subcomponent and so on.
        """
        if feature_idxs is None:
            feature_idxs = list()
            self.subcomp_sizes = list()
            for cluster_id in range(self.n_subcomps):
                cluster_features = np.where(self.clusters == cluster_id)[0]
                self.subcomp_sizes.append(len(cluster_features))
                feature_idxs.extend(cluster_features)
            feature_idxs = np.array(feature_idxs)

        # Shuffle the data features according to the indexes
        X = X[:, feature_idxs].copy()
        # Decompose the problem
        subcomponents = self._get_subcomponents(X=X)

        return subcomponents, feature_idxs