Source code for pyccea.utils.stats

import logging
import numpy as np
from typing import Tuple
from scipy.stats import ttest_ind, mannwhitneyu, shapiro, levene


# Initialize logger with info level
logging.basicConfig(level=logging.INFO)
# Reset handlers
logging.getLogger().handlers = []
# Add a custom handler
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(message)s"))
logging.getLogger().addHandler(handler)



[docs]
def statistical_comparison_between_independent_samples(
        x: np.ndarray,
        y: np.ndarray,
        alpha: float = 0.05,
        alternative: str = "two-sided"
) -> Tuple[str, float, float, bool]:
    """Perform a statistical comparison between two independent samples.

    This function compares two independent samples `x` and `y` using a
    statistical test (e.g., t-test) to determine if there is a significant
    difference between them. The test is two-sided by default, but can be
    configured to perform one-sided tests. It returns the test statistic
    name, the computed p-value, the test statistic, and whether the null
    hypothesis is rejected at the given significance level `alpha`.

    Parameters
    ----------
    x : np.ndarray
        The first independent sample.
    y : np.ndarray
        The second independent sample.
    alpha : float, optional
        The significance level for the hypothesis test (default is 0.05).
    alternative : str, optional
        Defines the alternative hypothesis. Options are 'two-sided', 
        'greater', or 'less' (default is 'two-sided').

    Returns
    -------
    test_name : str
        The name of the statistical test used (e.g., 't-test', 'mann-whitney-u').
    p_comparison : float
        The computed p-value of the test.
    statistic : float
        The test statistic from the hypothesis test.
    reject_null : bool
        True if the null hypothesis is rejected at the given significance level,
        False otherwise.

    Raises
    ------
    ValueError
        If the input arrays `x` and `y` have different lengths or are not 
        one-dimensional.
    ValueError
        If the `alternative` argument is not one of 'two-sided', 'greater', 
        or 'less'.
    """

    if alternative not in ["two-sided", "greater", "less"]:
        raise ValueError("Alternative hypothesis must be 'two-sided', 'greater', or 'less'.")

    logging.info(f"Sample sizes: x: {len(x)}, y: {len(y)}")

    if len(x) < 3:
        raise ValueError(f"1st sample has only {len(x)} observation(s).")
    if len(y) < 3:
        raise ValueError(f"2nd sample has only {len(y)} observation(s).")
    logging.info("")
    if len(x) >= 3 and len(y) >= 3:

        # Test for normality using Shapiro-Wilk test
        # Test for homogeneity of variances (homocedasticity) using Levene's test
        # The null-hypothesis of the Shapiro Wilk test is that the population is normally distributed.
        # Thus, if the p value is less than the chosen significance level, then the null hypothesis is
        # rejected and there is evidence that the data tested are not normally distributed.
        logging.info("Applying Shapiro-Wilk test to validate normality...")
        _, p_x_normality = shapiro(np.array(x))
        _, p_y_normality = shapiro(np.array(y))
        logging.info(f"p-value for sample x: {p_x_normality}.\n")
        logging.info(f"p-value for sample y: {p_y_normality}.\n")

        # The Levene test tests the null hypothesis that all input samples are from populations with equal variances.
        # Thus, if the p value is less than the chosen significance level, then the null hypothesis is rejected and
        # there is evidence that the samples are not from populations with equal variances.
        logging.info("Applying Levene's test to validate homocedasticity...")
        _, p_homocedasticity = levene(x, y)
        logging.info(f"p-value of Levene's test: {p_homocedasticity}.\n")

        # Decide which test to use based on normality and homocedasticity
        if (p_x_normality > alpha) and (p_y_normality > alpha) and (p_homocedasticity > alpha):
            logging.info("Applying t-test independent samples for comparison...")
            test_name = "t-test"
            statistic, p_comparison = ttest_ind(x, y, alternative=alternative)
        else:
            logging.info("Applying Mann-Whitney U rank test for comparison...")
            test_name = "mann-whitney-u"
            statistic, p_comparison = mannwhitneyu(x, y, alternative=alternative)

        # P-value small indicates statistical difference
        if p_comparison <= alpha:
            confidence_level = int((1-alpha)*100)
            alternative = 'different from' if alternative == 'two-sided' else f'{alternative} than'
            logging.info(f"There are statistical differences between the two samples with a confidence level of {confidence_level}%.")
            logging.info(f"The 1st sample is statistical {alternative} the 2nd sample.")
            reject_null = True
        else:
            logging.info("There are NOT statistical differences between the two samples.")
            reject_null = False
        logging.info(f"p-value of {p_comparison}.")

        return (test_name, statistic, p_comparison, reject_null)