Skip to content

Commit 92980b3

Browse files
committed
improve diagnostics logging to identify split candidates
1 parent bd470a5 commit 92980b3

2 files changed

Lines changed: 15 additions & 2 deletions

File tree

FastOMA/_hog_class.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def _sorter_key(sh):
366366
"""
367367

368368

369-
def split_hog(hog:HOG, *partitions):
369+
def split_hog(hog:HOG, level_name:str, *partitions):
370370
"""splits a hog into parts based to the partitions provided.
371371
372372
The partitions need to be a lists of Representatives (or simply members)
@@ -402,4 +402,17 @@ def split_hog(hog:HOG, *partitions):
402402
h = HOG(subhogs, taxnomic_range=hog.taxlevel, rhogid=hog.rhogid, msa=hog.get_msa(), representatives=partitions[p])
403403
hogs.append(h)
404404
return hogs
405+
else:
406+
rep_to_subhog_list = {rep: hog.get_subhog_path(rep.get_id(), max_depth=-1) for part in partitions for rep in part}
407+
for depth in range(max(len(sh) for sh in rep_to_subhog_list.values())):
408+
rep_sets = [set(rep_to_subhog_list[rep][depth] for rep in part if len(rep_to_subhog_list[rep]) > depth) for part in partitions]
409+
if all(s1.isdisjoint(s2) for s1, s2 in itertools.combinations(rep_sets, 2)):
410+
break
411+
depth -= 1
412+
rep_sets = [set(rep_to_subhog_list[rep][depth] for rep in part if len(rep_to_subhog_list[rep]) > depth) for part in partitions]
413+
merged_in = set.union(*(s1.intersection(s2) for s1, s2 in itertools.combinations(rep_sets, 2)))
414+
involved_reps = [(r, h[depth].hogid, h[depth].taxname) for r, h in rep_to_subhog_list.items() if len(h)>depth and h[depth] in merged_in]
415+
logger.warning(f"{hog} should be split in {len(partitions)} partitions at {level_name}. Not implemented yet.")
416+
for rep in involved_reps:
417+
logger.warning(f" - Rep {rep[0]} merged into {rep[1]} at level {rep[2]}")
405418
#raise RuntimeError("this part of the code needs more thinking")

FastOMA/_infer_subhog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ def merge_subhogs(self, reconciled_genetree:TreeNode, msa:MultipleSeqAlignment):
587587
if len(subtrees) > 1:
588588
logger.info(f"Representaives of {hogid} are split among {len(subtrees)} candidate subtrees.")
589589
split_parts = [list(n.name for n in sub.iter_leaves() if n.hogid == hogid) for sub in subtrees]
590-
split_hogs = split_hog(self.subhogs[hogid], *split_parts)
590+
split_hogs = split_hog(self.subhogs[hogid], self.node_species_tree.name, *split_parts)
591591
if split_hogs and len(split_hogs) > 1:
592592
# we could split the current hog.
593593
self.subhogs.pop(hogid)

0 commit comments

Comments
 (0)