@@ -383,7 +383,9 @@ def main(par):
383383 gene_mask = np .logical_or (np .any (A , axis = 1 ), np .any (A , axis = 0 ))
384384 in_degrees = np .sum (A != 0 , axis = 0 )
385385 out_degrees = np .sum (A != 0 , axis = 1 )
386- idx = np .argsort (np .maximum (out_degrees , in_degrees ))[:- par ['n_top_genes' ]]
386+ # n_genes = par['n_top_genes']
387+ n_genes = 3000
388+ idx = np .argsort (np .maximum (out_degrees , in_degrees ))[:- n_genes ]
387389 gene_mask [idx ] = False
388390 X = X [:, gene_mask ]
389391 X = X .toarray () if isinstance (X , csr_matrix ) else X
@@ -445,52 +447,67 @@ def main(par):
445447 # Evaluate inferred GRN
446448 print ("\n ======== Evaluate inferred GRN ========" )
447449 scores = evaluate_grn (X_controls , delta_X , is_train , is_reporter , A , signed = use_signs )
448-
449- # Evaluate baseline GRN
450- print ("\n ======== Evaluate shuffled GRN ========" )
451- scores_baseline = evaluate_grn (X_controls , delta_X , is_train , is_reporter , A_baseline , signed = use_signs )
452-
453- # Keep only the genes for which both GRNs got a score
454- mask = ~ np .logical_or (np .isnan (scores ), np .isnan (scores_baseline ))
455- scores = scores [mask ]
456- scores_baseline = scores_baseline [mask ]
457-
458- rr_all = {}
459- # Perform rank test between actual scores and baseline
460- rr_all ['spearman' ] = float (np .mean (scores ))
461- rr_all ['spearman_shuffled' ] = float (np .mean (scores_baseline ))
462- if len (scores ) == 0 :
450+
451+ # Keep only valid scores (non-NaN)
452+ valid_scores = scores [~ np .isnan (scores )]
453+
454+ if len (valid_scores ) == 0 :
463455 # No valid genes to evaluate
464- df_results = pd .DataFrame ({'sem_precision' : [np .nan ], 'sem_balanced' : [np .nan ]})
465- elif np .all (scores - scores_baseline == 0 ):
466- # Identical performance (suspicious - likely an error)
467- print ("WARNING: Identical scores detected - possible evaluation error!" )
468- df_results = pd .DataFrame ({'sem_precision' : [1.0 ], 'sem_balanced' : [0.0 ]})
456+ print ("WARNING: No valid genes to evaluate!" )
457+ results = {'sem' : [0.0 ]}
469458 else :
470- res = wilcoxon (scores - scores_baseline , zero_method = 'wilcox' , alternative = 'greater' )
471- rr_all ['Wilcoxon pvalue' ] = float (res .pvalue )
472-
473- print (rr_all )
459+ # Final score is mean of valid R² scores
460+ final_score = float (np .mean (valid_scores ))
474461
475- eps = 1e-300 # very small number to avoid log(0)
476- pval_clipped = max (res .pvalue , eps )
462+ print (f"\n Method: { method_id } " )
463+ print (f"SEM score (mean R²): { final_score :.4f} " )
464+ print (f"Valid genes evaluated: { len (valid_scores )} /{ len (scores )} " )
465+ print (f"SEM score (min): { np .min (valid_scores ):.4f} " )
466+ print (f"SEM score (max): { np .max (valid_scores ):.4f} " )
477467
478- # Set to 0 if not significant (p >= 0.05)
479- if res .pvalue >= 0.05 :
480- score = 0.0
481- print (f"p-value: { res .pvalue :.6f} (not significant, p >= 0.05)" )
482- print (f"SEM score set to 0" )
468+ results = {'sem' : [float (final_score )]}
469+
470+ # Evaluate baseline GRN
471+ if False :
472+ print ("\n ======== Evaluate shuffled GRN ========" )
473+ scores_baseline = evaluate_grn (X_controls , delta_X , is_train , is_reporter , A_baseline , signed = use_signs )
474+
475+ # Keep only the genes for which both GRNs got a score
476+ mask = ~ np .logical_or (np .isnan (scores ), np .isnan (scores_baseline ))
477+ scores = scores [mask ]
478+ scores_baseline = scores_baseline [mask ]
479+
480+ rr_all = {}
481+ # Perform rank test between actual scores and baseline
482+ rr_all ['spearman' ] = float (np .mean (scores ))
483+ rr_all ['spearman_shuffled' ] = float (np .mean (scores_baseline ))
484+ if len (scores ) == 0 :
485+ raise ValueError ("No valid scores to compare between inferred GRN and baseline GRN." )
486+ elif np .all (scores - scores_baseline == 0 ):
487+ # Identical performance (suspicious - likely an error)
488+ raise ValueError ("Identical performance between inferred GRN and baseline GRN - likely an error." )
483489 else :
484- # Compute final score
485- score = - np .log10 (pval_clipped )
486- print (f"p-value: { res .pvalue :.6f} (significant)" )
487-
488- print (f"Final score: { score } " )
489-
490- results = {
491- 'sem_precision' : [float (np .log2 (np .mean (scores ) / (np .mean (scores_baseline ) + 1e-6 )))],
492- 'sem' : [float (score )]
493- }
494-
495- df_results = pd .DataFrame (results )
490+ res = wilcoxon (scores - scores_baseline , zero_method = 'wilcox' , alternative = 'greater' )
491+ rr_all ['Wilcoxon pvalue' ] = float (res .pvalue )
492+
493+ print (rr_all )
494+
495+ eps = 1e-300 # very small number to avoid log(0)
496+ pval_clipped = max (res .pvalue , eps )
497+
498+ # Set to 0 if not significant (p >= 0.05)
499+ if res .pvalue >= 0.05 :
500+ score = 0.0
501+ print (f"p-value: { res .pvalue :.6f} (not significant, p >= 0.05)" )
502+ print (f"SEM score set to 0" )
503+ else :
504+ # Compute final score
505+ score = - np .log10 (pval_clipped )
506+ print (f"p-value: { res .pvalue :.6f} (significant)" )
507+
508+ print (f"Final score: { score } " )
509+ results ['sem_precision' ] = [float (np .log2 (np .mean (scores ) / (np .mean (scores_baseline ) + 1e-6 )))]
510+ results ['sem_n' ] = [float (score )]
511+
512+ df_results = pd .DataFrame (results )
496513 return df_results
0 commit comments