From 2e7d0c565227bc8f167b3374c3275429155e9362 Mon Sep 17 00:00:00 2001 From: Brandon Seah Date: Wed, 6 Aug 2025 17:06:08 +0800 Subject: [PATCH 1/2] Check for flipped coordinates when checking overlaps Genomic coordinates can be flipped to order (end,start) for reverse-strand features when adding hmmer and interpro domains. Unflip flipped coordinates in the `has_overlap` and `get_overlap` functions. See https://github.com/HerwigLab/IsoTools2/issues/23 --- src/isotools/_utils.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/isotools/_utils.py b/src/isotools/_utils.py index c6a1fce..ca772cb 100644 --- a/src/isotools/_utils.py +++ b/src/isotools/_utils.py @@ -277,14 +277,21 @@ def find_orfs(sequence, start_codons=None, stop_codons=None, ref_cds=None): def has_overlap(r1, r2): "check the overlap of two intervals" - # assuming start < end - return r1[1] > r2[0] and r2[1] > r1[0] + # Interval objects have lengths >=2, first two elements are coordinates + assert len(r1) >= 2, f"Should be length >=2, instead saw: {str(r1)}" + assert len(r2) >= 2, f"Should be length >=2, instead saw: {str(r2)}" + # Flip coordinates if given as (end, start) + r1_start, r1_end = (r1[0], r1[1]) if r1[0] < r1[1] else (r1[1], r1[0]) + r2_start, r2_end = (r2[0], r2[1]) if r2[0] < r2[1] else (r2[1], r2[0]) + return r1_end > r2_start and r2_end > r1_start def get_overlap(r1, r2): "check the overlap of two intervals" - # assuming start < end - return max(0, min(r1[1], r2[1]) - max(r1[0], r2[0])) + # Flip coordinates if given as (end, start) + r1_start, r1_end = (r1[0], r1[1]) if r1[0] < r1[1] else (r1[1], r1[0]) + r2_start, r2_end = (r2[0], r2[1]) if r2[0] < r2[1] else (r2[1], r2[0]) + return max(0, min(r1_end, r2_end) - max(r1_start, r2_start)) def get_intersects(tr1, tr2): From eed8321c85e71465557d8b1c6bc9217f20057f06 Mon Sep 17 00:00:00 2001 From: Brandon Seah Date: Fri, 8 Aug 2025 10:58:30 +0800 Subject: [PATCH 2/2] Document function behavior --- src/isotools/_utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/isotools/_utils.py b/src/isotools/_utils.py index ca772cb..b4c0698 100644 --- a/src/isotools/_utils.py +++ b/src/isotools/_utils.py @@ -276,7 +276,10 @@ def find_orfs(sequence, start_codons=None, stop_codons=None, ref_cds=None): def has_overlap(r1, r2): - "check the overlap of two intervals" + """check the overlap of two intervals + + Does not distinguish between (start,end) or (end,start) order in coordinates + """ # Interval objects have lengths >=2, first two elements are coordinates assert len(r1) >= 2, f"Should be length >=2, instead saw: {str(r1)}" assert len(r2) >= 2, f"Should be length >=2, instead saw: {str(r2)}" @@ -287,7 +290,10 @@ def has_overlap(r1, r2): def get_overlap(r1, r2): - "check the overlap of two intervals" + """get the overlap length of two intervals + + Does not distinguish between (start,end) or (end,start) order in coordinates + """ # Flip coordinates if given as (end, start) r1_start, r1_end = (r1[0], r1[1]) if r1[0] < r1[1] else (r1[1], r1[0]) r2_start, r2_end = (r2[0], r2[1]) if r2[0] < r2[1] else (r2[1], r2[0])