@@ -877,6 +877,7 @@ def run_tpcav(
877877 motif_control_type = "random" ,
878878 bed_seq_file : Optional [str ] = None ,
879879 bed_chrom_file : Optional [str ] = None ,
880+ synthetic_gc_concept_step : Optional [float ] = None ,
880881 layer_name : Optional [str ]= None ,
881882 layer = None ,
882883 output_dir : str = "tpcav/" ,
@@ -941,8 +942,8 @@ def run_tpcav(
941942 motif_concept_builders [nm ] = builder
942943
943944 ## bed concepts (optional)
944- if bed_seq_file is not None or bed_chrom_file is not None :
945- bed_builder = ConceptBuilder (
945+ if bed_seq_file is not None or bed_chrom_file is not None or synthetic_gc_concept_step is not None :
946+ non_motif_concept_builder = ConceptBuilder (
946947 genome_fasta = genome_fasta ,
947948 input_window_length = input_window_length ,
948949 bws = bws ,
@@ -953,23 +954,26 @@ def run_tpcav(
953954 rng_seed = seed ,
954955 )
955956 # use random regions as control
956- bed_builder .build_control ()
957+ non_motif_concept_builder .build_control ()
957958 if bed_seq_file is not None :
958959 # build concepts from fasta sequences in bed file
959- bed_builder .add_bed_sequence_concepts (bed_seq_file )
960+ non_motif_concept_builder .add_bed_sequence_concepts (bed_seq_file )
960961 if bed_chrom_file is not None :
961962 # build concepts from chromatin tracks in bed file
962- bed_builder .add_bed_chrom_concepts (bed_chrom_file )
963+ non_motif_concept_builder .add_bed_chrom_concepts (bed_chrom_file )
964+ if synthetic_gc_concept_step is not None :
965+ # build synthetic gc content concepts
966+ non_motif_concept_builder .add_synthetic_gc_content_concepts (synthetic_gc_concept_step )
963967 # apply transform to convert fasta sequences to one-hot encoded sequences
964- bed_builder .apply_transform (input_transform_func )
968+ non_motif_concept_builder .apply_transform (input_transform_func )
965969 else :
966- bed_builder = None
970+ non_motif_concept_builder = None
967971
968972 # create TPCAV model on top of the given model
969973 tpcav_model = TPCAV (model , layer_name = layer_name , layer = layer )
970974 # fit PCA on sampled all concept activations of the last builder (should have the most motifs)
971975 tpcav_model .fit_pca (
972- concepts = motif_concept_builders [num_motif_insertions [- 1 ]].concepts_for_pca () + bed_builder .concepts_for_pca () if bed_builder is not None else motif_concept_builders [num_motif_insertions [- 1 ]].concepts_for_pca (),
976+ concepts = motif_concept_builders [num_motif_insertions [- 1 ]].concepts_for_pca () + non_motif_concept_builder .concepts_for_pca () if non_motif_concept_builder is not None else motif_concept_builders [num_motif_insertions [- 1 ]].concepts_for_pca (),
973977 num_samples_per_concept = num_samples_for_pca ,
974978 num_pc = num_pc ,
975979 )
@@ -993,13 +997,14 @@ def run_tpcav(
993997 if save_cav_trainer :
994998 torch .save (cav_trainer , str (output_path / f"cavs_{ nm } _motifs/cav_trainer.pt" ))
995999 motif_cav_trainers [nm ] = cav_trainer
996- if bed_builder is not None :
1000+
1001+ if non_motif_concept_builder is not None :
9971002 bed_cav_trainer = CavTrainer (tpcav_model , penalty = "l2" )
9981003 bed_cav_trainer .set_control (
999- bed_builder .control_concepts [0 ], num_samples = num_samples_for_cav
1004+ non_motif_concept_builder .control_concepts [0 ], num_samples = num_samples_for_cav
10001005 )
10011006 bed_cav_trainer .train_concepts (
1002- bed_builder .concepts ,
1007+ non_motif_concept_builder .concepts ,
10031008 num_samples_for_cav ,
10041009 output_dir = str (output_path / f"cavs_bed_concepts/" ),
10051010 num_processes = p ,
0 commit comments