Source code for pythonsi.test_statistics.fs

import numpy as np
import numpy.typing as npt
from typing import Tuple



[docs]
class FSTestStatistic:
    r"""Compute test statistic and other utilities for feature selection inference.

    This class computes test statistics for testing individual features
    after feature selection, implementing the post-selection inference
    framework for validating selected features.

    The test statistic is designed for testing:

    .. math::
        H_0: \beta_j = 0 \quad \text{vs} \quad H_1: \beta_j \neq 0

    for a specific feature :math:`j` in the active set, where :math:`\beta_j`
    is the coefficient of feature :math:`j` in the linear model.

    Parameters
    ----------
    x : array-like, shape (n, p)
        Design matrix containing all features
    y : array-like, shape (n, 1)
        Response vector

    Attributes
    ----------
    x_node : Data
        Node containing the design matrix
    y_node : Data
        Node containing the response vector
    """

    def __init__(self, x: npt.NDArray[np.floating], y: npt.NDArray[np.floating]):
        self.x_node = x
        self.y_node = y

    def __call__(
        self,
        active_set: npt.NDArray[np.floating],
        feature_id: int,
        Sigma: npt.NDArray[np.floating],
    ) -> Tuple[
        list, npt.NDArray[np.floating], npt.NDArray[np.floating], float, float, list
    ]:
        r"""Compute test statistic for a selected feature.

        Computes the test statistic and parametrization for testing whether
        a specific feature in the active set has a non-zero coefficient.

        The test statistic follows the form:

        .. math::
            T = \eta_j^T \mathbf{y}

        where :math:`\eta_j^ = x` is the direction of the test statistic:

        .. math::
            \eta_j = x_{A}(x_{A}^T x_{A})^{-1} \bm{e}_j

        Parameters
        ----------
        active_set : array-like, shape (k,)
            Indices of features in the active set
        feature_id : int
            Index of the feature to test (within active_set)
        Sigma : array-like, shape (n, n)
            Covariance matrix of the noise

        Returns
        -------
        test_statistic_direction : array-like, shape (n, 1)
            Direction vector for the test statistic
        a : array-like, shape (n, 1)
            Parametrized intercept
        b : array-like, shape (n, 1)
            Parametrized coefficient
        test_statistic : float
            Observed value of the test statistic
        variance : float
            Variance of the test statistic under null hypothesis
        deviation : float
            Standard deviation of the test statistic
        """
        x = self.x_node()
        y = self.y_node()

        x_active = x[:, active_set]
        ej = np.zeros((x_active.shape[1], 1))
        ej[feature_id, 0] = 1
        test_statistic_direction = x_active.dot(
            np.linalg.inv(x_active.T.dot(x_active))
        ).dot(ej)

        b = Sigma.dot(test_statistic_direction).dot(
            np.linalg.inv(
                test_statistic_direction.T.dot(Sigma).dot(test_statistic_direction)
            )
        )
        a = (np.identity(x_active.shape[0]) - b.dot(test_statistic_direction.T)).dot(y)

        test_statistic = test_statistic_direction.T.dot(y)[0, 0]
        variance = test_statistic_direction.T.dot(Sigma).dot(test_statistic_direction)[
            0, 0
        ]
        deviation = np.sqrt(variance)

        self.x_node.parametrize(data=x)
        self.y_node.parametrize(a=a, b=b)
        return (
            test_statistic_direction,
            a,
            b,
            test_statistic,
            variance,
            deviation,
            [-np.inf, np.inf],
        )