Source code for pythonsi.pipeline

from pythonsi.node import Data
import numpy as np
import numpy.typing as npt
from pythonsi.util import compute_p_value
from typing import List


def line_search(output_node: Data, z_min: float, z_max: float, step_size: float = 5e-4):
    list_intervals = []
    list_outputs = []
    z = z_min
    while z < z_max:
        # print(f"Line search at z = {z}")
        output, _, _, interval_of_z = output_node.inference(z=z)

        interval_of_z = [max(interval_of_z[0], z_min, z), min(interval_of_z[1], z_max)]

        list_intervals.append(interval_of_z)
        list_outputs.append(output)

        # # For debug:
        # print(f"z: {z}, interval: {interval_of_z}, output: {output}")

        z = interval_of_z[1] + step_size

    return list_intervals, list_outputs


[docs] class Pipeline: r"""Selective inference for Feature selection pipeline. Parameters ---------- inputs : list of Data Input datas for the pipeline output : Data Output data for the pipeline test_statistic : object Chosen test statistic for inference Attributes ---------- input_nodes : list of Data Input data nodes output_node : Data Output data node test_statistic : object Test statistic computation object """ def __init__(self, inputs: List[Data], output: Data, test_statistic: any): self.input_nodes = inputs self.output_node = output self.test_statistic = test_statistic def __call__( self, inputs: List[npt.NDArray[np.floating]], covariances: List[npt.NDArray[np.floating]], verbose: bool = False, ) -> npt.NDArray[np.floating]: r"""Execute the pipeline and inference on the observed output. Parameters ---------- inputs : list of array-like Input data arrays corresponding to input_nodes covariances : list of array-like Covariance matrices for noise in each input verbose : bool, optional Whether to print detailed progress information, default False Returns ------- outputs : array-like, shape (k,) Output data p_values : list of float Selective p-values for the output data """ for input_data, input_node in zip(inputs, self.input_nodes): input_node.update(input_data) output = self.output_node() if verbose: print(f"Selected output: {output}") list_p_value = [] for output_id, _ in enumerate(output): if verbose: print(f"Testing feature {output_id}") p_value = self.inference( output_id=output_id, output=output, covariances=covariances ) if verbose: print(f"Feature {output_id}: p-value = {p_value}") list_p_value.append(p_value) return output, list_p_value
[docs] def inference( self, output_id: int, covariances: List[npt.NDArray[np.floating]], output: npt.NDArray[np.floating], ) -> float: r"""Perform selective inference for a single specific data based on the chosen hypothesis and test statistic. For example, testing if a specific selected feature has a non-zero coefficient Parameters ---------- feature_id : int Index of the feature to test (within selected features) covariances : list of array-like Covariance matrices for noise in each input output : array-like, shape (k,) Array of selected feature indices Returns ------- p_value : float Selective p-value for the data """ test_statistic_direction, a, b, test_statistic, variance, deviation, itv = ( self.test_statistic(output, output_id, covariances) ) # # For debug: # print(f"Test statistic: {test_statistic}") # print(f"Variance: {variance}") # print(f"Deviation: {deviation}") list_intervals, list_outputs = line_search( self.output_node, z_min=min(max(-20 * deviation, itv[0]), test_statistic), z_max=max(min(20 * deviation, itv[1]), test_statistic), step_size=1e-4, ) p_value = compute_p_value( test_statistic, variance, list_intervals, list_outputs, output ) return p_value