Skip to content

Commit 775f66f

Browse files
committed
Add multi-region query support for Bam class
1 parent 803045f commit 775f66f

2 files changed

Lines changed: 93 additions & 7 deletions

File tree

lib/hts/bam.rb

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,19 +211,42 @@ def each(copy: false, &block)
211211
end
212212
end
213213

214-
# Iterate records in a genomic region.
214+
# Iterate records in a genomic region or multiple regions.
215215
# See {#each} for copy semantics. When copy: false, the yielded Record is reused and should not be stored.
216+
#
217+
# @param region [String, Array<String>] Region specification(s)
218+
# - Single region: "chr1:100-200" or "chr1" with beg/end parameters
219+
# - Multiple regions: ["chr1:100-200", "chr2:500-600", ...]
220+
# @param beg [Integer, nil] Start position (used with single string region)
221+
# @param end_ [Integer, nil] End position (used with single string region)
222+
# @param copy [Boolean] Whether to deep-copy records (see {#each})
223+
#
224+
# @example Single region query
225+
# bam.query("chr1:100-200") { |r| puts r.qname }
226+
# bam.query("chr1", 100, 200) { |r| puts r.qname }
227+
#
228+
# @example Multi-region query
229+
# bam.query(["chr1:100-200", "chr2:500-600"]) { |r| puts r.qname }
216230
def query(region, beg = nil, end_ = nil, copy: false, &block)
217231
check_closed
218232
raise "Index file is required to call the query method." unless index_loaded?
219233

220-
if beg && end_
221-
tid = header.get_tid(region)
222-
queryi(tid, beg, end_, copy:, &block)
223-
elsif beg.nil? && end_.nil?
224-
querys(region, copy:, &block)
234+
case region
235+
when Array
236+
raise ArgumentError, "beg and end_ cannot be used with array of regions" if beg || end_
237+
238+
query_regions(region, copy:, &block)
239+
when String
240+
if beg && end_
241+
tid = header.get_tid(region)
242+
queryi(tid, beg, end_, copy:, &block)
243+
elsif beg.nil? && end_.nil?
244+
querys(region, copy:, &block)
245+
else
246+
raise ArgumentError, "beg and end_ must be specified together"
247+
end
225248
else
226-
raise ArgumentError, "beg and end_ must be specified together"
249+
raise ArgumentError, "region must be String or Array"
227250
end
228251
end
229252

@@ -266,6 +289,15 @@ def querys(region, copy: false, &block)
266289
end
267290
end
268291

292+
# Multi-region query implementation
293+
def query_regions(regions, copy: false, &block)
294+
if copy
295+
query_regions_copy(regions, &block)
296+
else
297+
query_regions_reuse(regions, &block)
298+
end
299+
end
300+
269301
# Internal: yield a single reused Record over the entire file.
270302
# The underlying bam1_t is mutated on each iteration for speed.
271303
def each_record_reuse
@@ -355,5 +387,27 @@ def query_copy(qiter)
355387
ensure
356388
LibHTS.hts_itr_destroy(qiter)
357389
end
390+
391+
# Multi-region query using sequential single-region queries
392+
# Note: This is a fallback implementation. Ideally we would use sam_itr_regarray
393+
# but there seem to be issues with the multi-region iterator in the current setup.
394+
def query_regions_reuse(regions, &block)
395+
return to_enum(__method__, regions) unless block_given?
396+
397+
regions.each do |region|
398+
querys_reuse(region, &block)
399+
end
400+
self
401+
end
402+
403+
# Multi-region query with copied Records using sequential queries
404+
def query_regions_copy(regions, &block)
405+
return to_enum(__method__, regions) unless block_given?
406+
407+
regions.each do |region|
408+
querys_copy(region, &block)
409+
end
410+
self
411+
end
358412
end
359413
end

test/bam_test.rb

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,38 @@ def assert_cram_getter_equal(ft, method_name, &block)
289289
end
290290
end
291291

292+
# CRAM multi-region iterator has issues, only test BAM formats
293+
%i[bam_string bam_uri].each do |ft|
294+
define_method "test_query_multi_regions_#{ft}" do
295+
arr = []
296+
bam(ft).query(["chr1:100-200", "chr2:350-700"]) do |aln|
297+
arr << aln.pos
298+
end
299+
# Should get records from both regions
300+
assert_includes arr, 341
301+
assert_includes arr, 658
302+
end
303+
304+
define_method "test_query_multi_regions_copy_#{ft}" do
305+
arr = []
306+
bam(ft).query(["chr1:100-200", "chr2:350-700"], copy: true) do |aln|
307+
arr << aln.pos
308+
end
309+
# Should get records from both regions
310+
assert_includes arr, 341
311+
assert_includes arr, 658
312+
end
313+
314+
define_method "test_query_multi_regions_single_#{ft}" do
315+
# Single region as array should also work
316+
arr = []
317+
bam(ft).query(["chr2:350-700"]) do |aln|
318+
arr << aln.pos
319+
end
320+
assert_equal [341, 658], arr
321+
end
322+
end
323+
292324
def test_initialize_no_file_bam
293325
stderr_old = $stderr.dup
294326
$stderr.reopen(File::NULL)

0 commit comments

Comments
 (0)