Source code for pythonsi.test_statistics.fs_after_da

import numpy as np
import numpy.typing as npt
from typing import Tuple, List
from scipy.linalg import block_diag


[docs] class SFS_DATestStatistic: r"""Test statistic for feature selection inference after domain adaptation. This class computes test statistics for testing individual features after feature selection on domain-adapted data, implementing the post-selection inference framework for cross-domain feature validation. The test statistic is designed for testing: .. math:: H_0: \beta_j = 0 \quad \text{vs} \quad H_1: \beta_j \neq 0 for a specific feature :math:`j` in the active set, where :math:`\beta_j` is the coefficient of feature :math:`j` in the target domain after domain adaptation via optimal transport. Parameters ---------- xs : array-like, shape (ns, p) Source domain design matrix ys : array-like, shape (ns, 1) Source domain response vector xt : array-like, shape (nt, p) Target domain design matrix yt : array-like, shape (nt, 1) Target domain response vector Attributes ---------- xs_node : Data Node containing the source domain design matrix ys_node : Data Node containing the source domain response vector xt_node : Data Node containing the target domain design matrix yt_node : Data Node containing the target domain response vector Notes ----- The test statistic accounts for the domain adaptation step by focusing the inference on the target domain data while using the source domain for adaptation. This allows for valid inference on features selected after optimal transport domain adaptation. """ def __init__( self, xs: npt.NDArray[np.floating], ys: npt.NDArray[np.floating], xt: npt.NDArray[np.floating], yt: npt.NDArray[np.floating], ): self.xs_node = xs self.ys_node = ys self.xt_node = xt self.yt_node = yt def __call__( self, active_set: npt.NDArray[np.floating], feature_id: int, Sigmas: List[npt.NDArray[np.floating]], ) -> Tuple[ list, npt.NDArray[np.floating], npt.NDArray[np.floating], float, float, list ]: r"""Compute test statistic for a selected feature after domain adaptation. Computes the test statistic and parametrization for testing whether a specific feature in the active set has a non-zero coefficient in the target domain after domain adaptation. The test statistic focuses on the target domain: .. math:: T = \eta_j^T \begin{bmatrix} \mathbf{y}_s \\ \mathbf{y}_t \end{bmatrix} where .. math:: \eta_j = \begin{bmatrix} \mathbf{0}_{ns} \\ \mathbf{x}_t^A(\mathbf{x}_t^{A^T}\mathbf{x}_t^A)^{-1}\mathbf{e}_j \end{bmatrix}, :math:`n_s` is the number of source domain samples and :math:`\mathbf{x}_t^A` is the target domain active set design matrix. Parameters ---------- active_set : array-like, shape (k,) Indices of features in the active set feature_id : int Index of the feature to test (within active_set) Sigmas : list of array-like List containing [Sigma_source, Sigma_target] covariance matrices Returns ------- test_statistic_direction : array-like, shape (ns+nt, 1) Direction vector for the test statistic a : array-like, shape (ns+nt, 1) Parametrization intercept for combined data b : array-like, shape (ns+nt, 1) Parametrization coefficient for combined data test_statistic : float Observed value of the test statistic variance : float Variance of the test statistic under null hypothesis deviation : float Standard deviation of the test statistic """ xs = self.xs_node() ys = self.ys_node() xt = self.xt_node() yt = self.yt_node() y = np.vstack((ys, yt)) Sigma_s = Sigmas[0] Sigma_t = Sigmas[1] Sigma = block_diag(Sigma_s, Sigma_t) x_active = xt[:, active_set] ej = np.zeros((len(active_set), 1)) ej[feature_id, 0] = 1 test_statistic_direction = np.vstack( ( np.zeros((xs.shape[0], 1)), x_active.dot(np.linalg.inv(x_active.T.dot(x_active))).dot(ej), ) ) b = Sigma.dot(test_statistic_direction).dot( np.linalg.inv( test_statistic_direction.T.dot(Sigma).dot(test_statistic_direction) ) ) a = ( np.identity(x_active.shape[0] + xs.shape[0]) - b.dot(test_statistic_direction.T) ).dot(y) test_statistic = test_statistic_direction.T.dot(y)[0, 0] variance = test_statistic_direction.T.dot(Sigma).dot(test_statistic_direction)[ 0, 0 ] deviation = np.sqrt(variance) self.xs_node.parametrize(data=xs) self.ys_node.parametrize(a=a[: xs.shape[0], :], b=b[: xs.shape[0], :]) self.xt_node.parametrize(data=xt) self.yt_node.parametrize(a=a[xs.shape[0] :, :], b=b[xs.shape[0] :, :]) return ( test_statistic_direction, a, b, test_statistic, variance, deviation, [-np.inf, np.inf], )