add help functions

yztxwd · yztxwd · commit 0d1e592f7836 · 2026-01-08T16:29:46.000-05:00
diff --git a/README.md b/README.md
@@ -1,14 +1,10 @@
 # TPCAV (Testing with PCA projected Concept Activation Vectors)
 
-Analysis pipeline for TPCAV
+This repository contains code to compute TPCAV (Testing with PCA projected Concept Activation Vectors) on deep learning models. TPCAV is an extension of the original TCAV method, which uses PCA to reduce the dimensionality of the activations at a selected intermediate layer before computing Concept Activation Vectors (CAVs)
 
-## Dependencies
+## Installation
 
-You can use your own environment for the model, in addition, you need to install the following packages:
 
-- captum 0.7
-- seqchromloader 0.8.5
-- scikit-learn 1.5.2
 
 ## Workflow
 
diff --git a/tpcav/cavs.py b/tpcav/cavs.py
@@ -246,6 +246,16 @@ def tpcav_score(
 
         return scores
 
+    def tpcav_score_all_concepts(self, attributions: torch.Tensor) -> dict:
+        """
+        Compute TCAV scores for all trained concepts.
+        """
+        scores_dict = {}
+        for concept_name in self.cav_weights.keys():
+            scores = self.tpcav_score(concept_name, attributions)
+            scores_dict[concept_name] = scores
+        return scores_dict
+
     def tpcav_score_binary_log_ratio(
         self, concept_name: str, attributions: torch.Tensor, pseudocount: float = 1.0
     ) -> float:
@@ -259,6 +269,20 @@ def tpcav_score_binary_log_ratio(
 
         return np.log((pos_count + pseudocount) / (neg_count + pseudocount))
 
+    def tpcav_score_all_concepts_log_ratio(
+        self, attributions: torch.Tensor, pseudocount: float = 1.0
+    ) -> dict:
+        """
+        Compute TCAV log ratio scores for all trained concepts.
+        """
+        log_ratio_dict = {}
+        for concept_name in self.cav_weights.keys():
+            log_ratio = self.tpcav_score_binary_log_ratio(
+                concept_name, attributions, pseudocount
+            )
+            log_ratio_dict[concept_name] = log_ratio
+        return log_ratio_dict
+
     def plot_cavs_similaritiy_heatmap(
         self,
         attributions: torch.Tensor,
diff --git a/tpcav/tpcav_model.py b/tpcav/tpcav_model.py
@@ -11,10 +11,6 @@
 
 def _abs_attribution_func(multipliers, inputs, baselines):
     "Multiplier x abs(inputs - baselines) to avoid double-sign effects."
-    # print(f"inputs: {inputs[1][:5]}")
-    # print(f"baselines: {baselines[1][:5]}")
-    # print(f"multipliers: {multipliers[0][:5]}")
-    # print(f"multipliers: {multipliers[1][:5]}")
     return tuple(
         (input_ - baseline).abs() * multiplier
         for input_, baseline, multiplier in zip(inputs, baselines, multipliers)
@@ -174,8 +170,12 @@ def layer_attributions(
         target_batches: Iterable,
         baseline_batches: Iterable,
         multiply_by_inputs: bool = True,
+        abs_inputs_diff: bool = True,
     ) -> Dict[str, torch.Tensor]:
-        """Compute DeepLift attributions on PCA embedding space.
+        """
+        Compute DeepLift attributions on PCA embedding space.
+
+        By default, it computes (input - baseline).abs() * multiplier to avoid double-sign effects (abs_inputs_diff=True).
 
         target_batches and baseline_batches should yield (seq, chrom) pairs of matching length.
         """
@@ -184,6 +184,8 @@ def layer_attributions(
         self.forward = self.forward_from_embeddings_at_layer
         deeplift = DeepLift(self, multiply_by_inputs=multiply_by_inputs)
 
+        custom_attr_func = _abs_attribution_func if abs_inputs_diff else None
+
         attributions = []
         for inputs, binputs in zip(target_batches, baseline_batches):
             avs = self._layer_output(*[i.to(self.device) for i in inputs])
@@ -205,7 +207,7 @@ def layer_attributions(
                     ),
                     additional_forward_args=(inputs,),
                     custom_attribution_func=(
-                        None if not multiply_by_inputs else _abs_attribution_func
+                        None if not multiply_by_inputs else custom_attr_func
                     ),
                 )
                 attr_residual, attr_projected = attribution
@@ -219,7 +221,7 @@ def layer_attributions(
                         inputs,
                     ),
                     custom_attribution_func=(
-                        None if not multiply_by_inputs else _abs_attribution_func
+                        None if not multiply_by_inputs else custom_attr_func
                     ),
                 )[0]