minor bug fixes & imporve log message

yztxwd · yztxwd · commit 364578e20259 · 2026-04-03T22:39:37.000-04:00
diff --git a/test/test_cav_trainer.py b/test/test_cav_trainer.py
@@ -279,7 +279,7 @@ def test_all(self):
         cav_trainer.set_control(builder.control_concepts[0], num_samples=100)
 
         cav_trainer.train_concepts(
-                builder.concepts, 100, output_dir="data/cavs/", num_processes=2, backend='torch', device='cuda:1' if torch.cuda.device_count() > 1 else 'cuda:0', 
+                builder.concepts, 100, output_dir="data/cavs/", num_processes=1, backend='torch', device='cuda:1' if torch.cuda.device_count() > 1 else 'cuda:0', 
         )
         cav_trainer.train_concepts(
             builder.concepts, 100, output_dir="data/cavs/", num_processes=2
diff --git a/tpcav/cavs.py b/tpcav/cavs.py
@@ -238,6 +238,7 @@ def _train(
     penalty: str = "l2",
     backend: str = "sklearn",
     device=None,
+    name=None,
 ) -> Tuple[float, torch.Tensor]:
     """
     Train a binary CAV classifier for a concept vs cached control embeddings.
@@ -270,16 +271,18 @@ def _eval(avs, l, name: str):
         precision, recall, fscore, support = precision_recall_fscore_support(
             l, y_preds, average="binary", pos_label=1
         )
-        logger.info("[%s] Accuracy: %.4f", name, acc)
+        #logger.info("[%s] Accuracy: %.4f", name, acc)
         (output_dir / f"classifier_perform_on_{name}.txt").write_text(
             f"Accuracy: {acc}\n"
         )
         return fscore
 
     output_dir.mkdir(parents=True, exist_ok=True)
-    _eval(train_avs, train_l, "train")
+    train_fscore = _eval(train_avs, train_l, "train")
     test_fscore = _eval(test_avs, test_l, "test")
 
+    logger.info("Concept %s: [train] F-score: %.4f, [test] F-score: %.4f", name, train_fscore, test_fscore)
+
     weights = clf.weights
     assert len(weights.shape) == 2 and weights.shape[0] == 2
     torch.save(weights, output_dir / "classifier_weights.pt")
@@ -357,11 +360,11 @@ def _cleanup_paths(paths: list[str]) -> None:
                 pass
 
     @classmethod
-    def _reap_done_futures(cls, futures: list):
+    def _reap_done_futures(cls, futures: list, results: list):
         pending = []
         for name, fut, paths in futures:
             if fut.done():
-                fut.result()  # raises if worker failed
+                results.append((name, fut.result()))  # raises if worker failed
                 cls._cleanup_paths(paths)
             else:
                 pending.append((name, fut, paths))
@@ -371,11 +374,12 @@ def _reap_done_futures(cls, futures: list):
     def _wait_for_capacity(
         cls,
         futures: list,
+        results: list,
         capacity: int,
         sleep_s: int = 5,
     ):
         while True:
-            futures = cls._reap_done_futures(futures)
+            futures = cls._reap_done_futures(futures, results)
             if len(futures) < capacity:
                 return futures
             time.sleep(sleep_s)
@@ -418,15 +422,16 @@ def train_concepts(
                     concept_dir,
                     self.penalty,
                     backend=backend,
-                    device=device
+                    device=device,
+                    name=c.name,
                 )
                 self.cav_fscores[c.name] = fscore
                 self.cav_weights[c.name] = weight
                 self.cavs_list.append(weight)
 
                 self._cleanup_paths([str(concept_memmap_path)])
         else:
-            futures = []
+            futures = []; results = []
             ctx = mp.get_context("spawn")
             with ProcessPoolExecutor(mp_context=ctx, max_workers=num_processes) as executor:
                 for c in concept_list:
@@ -441,7 +446,7 @@ def train_concepts(
 
                     # block the process to avoid too long queue
                     futures = self._wait_for_capacity(
-                        futures, capacity=(max_pending + num_processes), sleep_s=5
+                        futures, results, capacity=(max_pending + num_processes), sleep_s=5
                     )
 
                     future = executor.submit(
@@ -451,12 +456,12 @@ def train_concepts(
                         concept_dir,
                         self.penalty,
                         backend=backend,
-                        device=device
+                        device=device,
+                        name=c.name,
                     )
                     logger.info("Submitted CAV training for concept %s", c.name)
                     futures.append((c.name, future, [str(concept_memmap_path)]))
 
-                results = []
                 for name, fut, paths in futures:
                     results.append((name, fut.result()))
                     self._cleanup_paths(paths)
@@ -504,15 +509,16 @@ def train_concepts_pairs(self,
                     concept_dir,
                     self.penalty,
                     backend=backend,
-                    device=device
+                    device=device,
+                    name=c_test.name,
                 )
                 self.cav_fscores[c_test.name] = fscore
                 self.cav_weights[c_test.name] = weight
                 self.cavs_list.append(weight)
 
                 self._cleanup_paths([str(concept_memmap_path), str(control_memmap_path)])
         else:
-            futures = []
+            futures = []; results = []
             with ProcessPoolExecutor(max_workers=num_processes) as executor:
                 for c_test, c_control in concept_pair_list:
                     concept_embeddings = self.tpcav.concept_embeddings(
@@ -531,7 +537,7 @@ def train_concepts_pairs(self,
 
                     # block the process to avoid too long queue
                     futures = self._wait_for_capacity(
-                        futures, capacity=(max_pending + num_processes), sleep_s=5
+                        futures, results, capacity=(max_pending + num_processes), sleep_s=5
                     )
 
                     future = executor.submit(
@@ -541,7 +547,8 @@ def train_concepts_pairs(self,
                         concept_dir,
                         self.penalty,
                         backend=backend,
-                        device=device
+                        device=device,
+                        name=c_test.name,
                     )
                     logger.info("Submitted CAV training for concept %s", c_test.name)
                     futures.append(
@@ -552,7 +559,6 @@ def train_concepts_pairs(self,
                         )
                     )
 
-                results = []
                 for name, fut, paths in futures:
                     results.append((name, fut.result()))
                     self._cleanup_paths(paths)
diff --git a/tpcav/report.py b/tpcav/report.py
@@ -83,7 +83,7 @@ def compute_ic(row: Any) -> float:
         buf = io.BytesIO()
         fig.savefig(buf, format="png", dpi=200, bbox_inches="tight", transparent=True)
         plt.close(fig)
-        out[str(name)] = "data:image/png;base64," + base64.b64encode(
+        out[_utils.clean_motif_name(str(name))] = "data:image/png;base64," + base64.b64encode(
             buf.getvalue()
         ).decode("ascii")
 
@@ -343,10 +343,9 @@ def _new_trainer() -> Any:
     # -----------------------------------------------------------------------------
     # 5) Build JS payload (used by Plotly)
     # -----------------------------------------------------------------------------
-    motif_logo_concepts = selected_motif_concepts[:]
     motif_logo_dict = _maybe_build_motif_logo_data_uris(
             motif_file if motif_file_fmt == "meme" else None,
-            motif_logo_concepts,
+            selected_motif_concepts,
         )
     js_payload: dict[str, Any] = {
         "motif_file_fmt": motif_file_fmt,
@@ -435,7 +434,7 @@ def _to_list(x: Any) -> Any:
     if motif_auc_df is not None:
         # append motif logo column if exists
         if motif_file_fmt=='meme' and (len(motif_logo_dict)>0):
-            motif_auc_df['motif_logo'] = motif_auc_df.apply(lambda x: "<img src=\"" + motif_logo_dict[x['concept']] + "\" width=\"100\">", axis=1)
+            motif_auc_df['motif_logo'] = motif_auc_df.apply(lambda x: "<img src=\"" + motif_logo_dict.get(x['concept'], 'null') + "\" width=\"100\">", axis=1)
         motif_auc_table_html = _render_df_table(motif_auc_df, max_rows=5000)
 
     if embed_images:

Original file line number	Diff line number	Diff line change
`@@ -279,7 +279,7 @@ def test_all(self):`
`279`	`279`	`cav_trainer.set_control(builder.control_concepts[0], num_samples=100)`
`280`	`280`
`281`	`281`	`cav_trainer.train_concepts(`
`282`		`- builder.concepts, 100, output_dir="data/cavs/", num_processes=2, backend='torch', device='cuda:1' if torch.cuda.device_count() > 1 else 'cuda:0',`
	`282`	`+ builder.concepts, 100, output_dir="data/cavs/", num_processes=1, backend='torch', device='cuda:1' if torch.cuda.device_count() > 1 else 'cuda:0',`
`283`	`283`	`)`
`284`	`284`	`cav_trainer.train_concepts(`
`285`	`285`	`builder.concepts, 100, output_dir="data/cavs/", num_processes=2`