33require_relative "../htslib"
44
55require_relative "hts"
6+ require_relative "bcf/errors"
67require_relative "bcf/header"
78require_relative "bcf/info"
89require_relative "bcf/format"
@@ -38,16 +39,16 @@ def self.build_index(file_name, index_name = nil, min_shift = 14, threads = 0, v
3839
3940 case LibHTS . bcf_index_build3 ( file_name , index_name , min_shift , threads )
4041 when 0 # successful
41- when -1 then raise "indexing failed"
42- when -2 then raise "opening #{ file_name } failed"
43- when -3 then raise "format not indexable"
44- when -4 then raise "failed to create and/ or save the index"
45- else raise "unknown error"
42+ when -1 then raise IndexError , "Indexing failed for #{ file_name } "
43+ when -2 then raise IndexError , "Opening #{ file_name } failed while building the index "
44+ when -3 then raise IndexError , " #{ file_name } is not in an indexable format "
45+ when -4 then raise IndexError , "Failed to create or save the index for #{ file_name } "
46+ else raise IndexError , "Unknown index build error for #{ file_name } "
4647 end
4748 end
4849
4950 def initialize ( file_name , mode = "r" , index : nil , threads : nil ,
50- build_index : false )
51+ build_index : false , subset : nil )
5152 if block_given?
5253 message = "HTS::Bcf.new() does not take block; Please use HTS::Bcf.open() instead"
5354 raise message
@@ -61,13 +62,16 @@ def initialize(file_name, mode = "r", index: nil, threads: nil,
6162 @nthreads = threads
6263 @hts_file = LibHTS . hts_open ( @file_name , mode )
6364
64- raise Errno :: ENOENT , "Failed to open #{ @file_name } " if @hts_file . null?
65+ raise OpenError , "Failed to open #{ @file_name } " if @hts_file . null?
6566
6667 set_threads ( threads ) if threads
6768
69+ raise SubsetError , "Sample subsetting is only available when reading BCF/VCF files" if subset && @mode [ 0 ] == "w"
70+
6871 return if @mode [ 0 ] == "w"
6972
70- @header = Bcf ::Header . new ( @hts_file )
73+ @read_header = Bcf ::Header . new ( @hts_file )
74+ @header = subset ? @read_header . subset ( subset ) : @read_header
7175 build_index ( index ) if build_index
7276 @idx = load_index ( index )
7377 @start_position = tell
@@ -219,8 +223,8 @@ def each(copy: false, &block)
219223 def query ( region , beg = nil , end_ = nil , copy : false , &block )
220224 check_closed
221225
222- raise "query is only available for BCF files" unless file_format == "bcf"
223- raise "Index file is required to call the query method. " unless index_loaded?
226+ raise QueryError , "Query is only available for BCF files" unless file_format == "bcf"
227+ raise MissingIndexError , "Index file is required to call the query method for #{ @file_name } " unless index_loaded?
224228
225229 case region
226230 when Array
@@ -269,7 +273,7 @@ def queryi_reuse(tid, beg, end_, &block)
269273 return to_enum ( __method__ , tid , beg , end_ ) unless block_given?
270274
271275 qiter = LibHTS . bcf_itr_queryi ( @idx , tid , beg , end_ )
272- raise "Failed to query region #{ tid } #{ beg } #{ end_ } " if qiter . null?
276+ raise QueryError , "Failed to query region #{ tid } : #{ beg } - #{ end_ } in #{ @file_name } " if qiter . null?
273277
274278 query_reuse_yield ( qiter , &block )
275279 self
@@ -278,8 +282,8 @@ def queryi_reuse(tid, beg, end_, &block)
278282 def querys_reuse ( region , &block )
279283 return to_enum ( __method__ , region ) unless block_given?
280284
281- qiter = LibHTS . bcf_itr_querys ( @idx , header , region )
282- raise "Failed to query region #{ region } " if qiter . null?
285+ qiter = LibHTS . bcf_itr_querys ( @idx , read_header , region )
286+ raise QueryError , "Failed to query region #{ region . inspect } in #{ @file_name } " if qiter . null?
283287
284288 query_reuse_yield ( qiter , &block )
285289 self
@@ -303,6 +307,7 @@ def query_reuse_yield(qiter)
303307 break if slen == -1
304308 raise if slen < -1
305309
310+ apply_subset! ( record )
306311 yield record
307312 end
308313 ensure
@@ -314,7 +319,7 @@ def queryi_copy(tid, beg, end_, &block)
314319 return to_enum ( __method__ , tid , beg , end_ ) unless block_given?
315320
316321 qiter = LibHTS . bcf_itr_queryi ( @idx , tid , beg , end_ )
317- raise "Failed to query region #{ tid } #{ beg } #{ end_ } " if qiter . null?
322+ raise QueryError , "Failed to query region #{ tid } : #{ beg } - #{ end_ } in #{ @file_name } " if qiter . null?
318323
319324 query_copy_yield ( qiter , &block )
320325 self
@@ -323,8 +328,8 @@ def queryi_copy(tid, beg, end_, &block)
323328 def querys_copy ( region , &block )
324329 return to_enum ( __method__ , region ) unless block_given?
325330
326- qiter = LibHTS . bcf_itr_querys ( @idx , header , region )
327- raise "Failed to query region #{ region } " if qiter . null?
331+ qiter = LibHTS . bcf_itr_querys ( @idx , read_header , region )
332+ raise QueryError , "Failed to query region #{ region . inspect } in #{ @file_name } " if qiter . null?
328333
329334 query_copy_yield ( qiter , &block )
330335 self
@@ -346,7 +351,9 @@ def query_copy_yield(qiter)
346351 break if slen == -1
347352 raise if slen < -1
348353
349- yield Record . new ( header , bcf1 )
354+ record = Record . new ( header , bcf1 )
355+ apply_subset! ( record )
356+ yield record
350357 end
351358 ensure
352359 LibHTS . bcf_itr_destroy ( qiter )
@@ -359,7 +366,10 @@ def each_record_reuse
359366
360367 bcf1 = LibHTS . bcf_init
361368 record = Record . new ( header , bcf1 )
362- yield record while LibHTS . bcf_read ( @hts_file , header , bcf1 ) != -1
369+ while LibHTS . bcf_read ( @hts_file , read_header , bcf1 ) != -1
370+ apply_subset! ( record )
371+ yield record
372+ end
363373 self
364374 end
365375
@@ -368,11 +378,25 @@ def each_record_copy
368378
369379 return to_enum ( __method__ ) unless block_given?
370380
371- while LibHTS . bcf_read ( @hts_file , header , bcf1 = LibHTS . bcf_init ) != -1
381+ while LibHTS . bcf_read ( @hts_file , read_header , bcf1 = LibHTS . bcf_init ) != -1
372382 record = Record . new ( header , bcf1 )
383+ apply_subset! ( record )
373384 yield record
374385 end
375386 self
376387 end
388+
389+ def read_header
390+ @read_header || header
391+ end
392+
393+ def apply_subset! ( record )
394+ return unless header . subset?
395+
396+ rc = LibHTS . bcf_subset ( header . struct , record . struct , header . subset_sample_count , header . subset_imap_pointer || ::FFI ::Pointer ::NULL )
397+ return if rc >= 0
398+
399+ raise SubsetError , "Failed to subset samples #{ header . subset_samples . inspect } while reading #{ @file_name } "
400+ end
377401 end
378402end
0 commit comments