@@ -366,7 +366,7 @@ def _sorter_key(sh):
366366"""
367367
368368
369- def split_hog (hog :HOG , * partitions ):
369+ def split_hog (hog :HOG , level_name : str , * partitions ):
370370 """splits a hog into parts based to the partitions provided.
371371
372372 The partitions need to be a lists of Representatives (or simply members)
@@ -402,4 +402,17 @@ def split_hog(hog:HOG, *partitions):
402402 h = HOG (subhogs , taxnomic_range = hog .taxlevel , rhogid = hog .rhogid , msa = hog .get_msa (), representatives = partitions [p ])
403403 hogs .append (h )
404404 return hogs
405+ else :
406+ rep_to_subhog_list = {rep : hog .get_subhog_path (rep .get_id (), max_depth = - 1 ) for part in partitions for rep in part }
407+ for depth in range (max (len (sh ) for sh in rep_to_subhog_list .values ())):
408+ rep_sets = [set (rep_to_subhog_list [rep ][depth ] for rep in part if len (rep_to_subhog_list [rep ]) > depth ) for part in partitions ]
409+ if all (s1 .isdisjoint (s2 ) for s1 , s2 in itertools .combinations (rep_sets , 2 )):
410+ break
411+ depth -= 1
412+ rep_sets = [set (rep_to_subhog_list [rep ][depth ] for rep in part if len (rep_to_subhog_list [rep ]) > depth ) for part in partitions ]
413+ merged_in = set .union (* (s1 .intersection (s2 ) for s1 , s2 in itertools .combinations (rep_sets , 2 )))
414+ involved_reps = [(r , h [depth ].hogid , h [depth ].taxname ) for r , h in rep_to_subhog_list .items () if len (h )> depth and h [depth ] in merged_in ]
415+ logger .warning (f"{ hog } should be split in { len (partitions )} partitions at { level_name } . Not implemented yet." )
416+ for rep in involved_reps :
417+ logger .warning (f" - Rep { rep [0 ]} merged into { rep [1 ]} at level { rep [2 ]} " )
405418 #raise RuntimeError("this part of the code needs more thinking")
0 commit comments