Skip to content

Commit 2030863

Browse files
committed
Update BCF INFO handling with int64 support and enhance tests for new functionalities
1 parent fc4dd11 commit 2030863

6 files changed

Lines changed: 114 additions & 28 deletions

File tree

TUTORIAL.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,23 @@ in.close
254254
out.close
255255
```
256256

257+
Update INFO fields
258+
259+
```ruby
260+
bcf = HTS::Bcf.open("in.vcf")
261+
record = bcf.first
262+
info = record.info
263+
264+
info.update_int("DP", [30])
265+
# info.update_int64("DP", [2**40]) # Backend-dependent (BCF_HT_LONG)
266+
info.update_float("AF", [0.25])
267+
info.update_string("STR", "sample")
268+
info.update_flag("SOMATIC", true)
269+
270+
info["DP"] = 100
271+
# Out-of-int32 integers require explicit update_int64
272+
```
273+
257274
Writing and modifying auxiliary tags
258275

259276
```ruby

lib/hts/bcf/format.rb

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ class Bcf < Hts
55
class Format
66
def initialize(record)
77
@record = record
8-
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
98
end
109

1110
# @note: Why is this method named "get" instead of "fetch"?
@@ -14,15 +13,22 @@ def initialize(record)
1413
# I think they are better than `fetch_int`` and `fetch_float`.
1514
def get(key, type = nil)
1615
n = FFI::MemoryPointer.new(:int)
17-
p1 = @p1
16+
p1 = FFI::MemoryPointer.new(:pointer)
17+
p1.write_pointer(FFI::Pointer::NULL)
1818
h = @record.header.struct
1919
r = @record.struct
2020

21-
format_values = proc do |typ|
21+
format_values = proc do |typ, reader|
2222
ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, typ)
2323
return nil if ret < 0 # return from method.
2424

25-
p1.read_pointer
25+
dst = p1.read_pointer
26+
begin
27+
reader.call(dst, n.read_int)
28+
ensure
29+
LibHTS.hts_free(dst) unless dst.null?
30+
p1.write_pointer(FFI::Pointer::NULL)
31+
end
2632
end
2733

2834
# The GT FORMAT field is special in that it is marked as a string in the header,
@@ -35,11 +41,9 @@ def get(key, type = nil)
3541

3642
case type&.to_sym
3743
when :int, :int32
38-
format_values.call(LibHTS::BCF_HT_INT)
39-
.read_array_of_int32(n.read_int)
44+
format_values.call(LibHTS::BCF_HT_INT, ->(dst, len) { dst.read_array_of_int32(len) })
4045
when :float, :real
41-
format_values.call(LibHTS::BCF_HT_REAL)
42-
.read_array_of_float(n.read_int)
46+
format_values.call(LibHTS::BCF_HT_REAL, ->(dst, len) { dst.read_array_of_float(len) })
4347
when :flag
4448
raise NotImplementedError, "Flag type not implemented yet. " \
4549
"Please file an issue on GitHub."

lib/hts/bcf/info.rb

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ class Bcf < Hts
66
class Info
77
def initialize(record)
88
@record = record
9-
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
109
end
1110

1211
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
@@ -16,37 +15,49 @@ def initialize(record)
1615
# I think they are better than `fetch_int`` and `fetch_float`.
1716
def get(key, type = nil)
1817
n = FFI::MemoryPointer.new(:int)
19-
p1 = @p1
18+
p1 = FFI::MemoryPointer.new(:pointer)
19+
p1.write_pointer(FFI::Pointer::NULL)
2020
h = @record.header.struct
2121
r = @record.struct
2222

23-
info_values = proc do |typ|
23+
info_values = proc do |typ, reader|
2424
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
2525
return nil if ret < 0 # return from method.
2626

27-
p1.read_pointer
27+
dst = p1.read_pointer
28+
begin
29+
reader.call(dst, n.read_int)
30+
ensure
31+
LibHTS.hts_free(dst) unless dst.null?
32+
p1.write_pointer(FFI::Pointer::NULL)
33+
end
2834
end
2935

3036
type ||= ht_type_to_sym(get_info_type(key))
3137

3238
case type&.to_sym
3339
when :int, :int32
34-
info_values.call(LibHTS::BCF_HT_INT)
35-
.read_array_of_int32(n.read_int)
40+
info_values.call(LibHTS::BCF_HT_INT, ->(dst, len) { dst.read_array_of_int32(len) })
41+
when :int64, :long
42+
info_values.call(LibHTS::BCF_HT_LONG, ->(dst, len) { dst.read_array_of_int64(len) })
3643
when :float, :real
37-
info_values.call(LibHTS::BCF_HT_REAL)
38-
.read_array_of_float(n.read_int)
44+
info_values.call(LibHTS::BCF_HT_REAL, ->(dst, len) { dst.read_array_of_float(len) })
3945
when :flag, :bool
40-
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
41-
when 1 then true
42-
when 0 then false
43-
when -1 then nil
44-
else
45-
raise "Unknown return value from bcf_get_info_flag: #{ret}"
46+
begin
47+
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
48+
when 1 then true
49+
when 0 then false
50+
when -1 then nil
51+
else
52+
raise "Unknown return value from bcf_get_info_flag: #{ret}"
53+
end
54+
ensure
55+
dst = p1.read_pointer
56+
LibHTS.hts_free(dst) unless dst.null?
57+
p1.write_pointer(FFI::Pointer::NULL)
4658
end
4759
when :string, :str
48-
info_values.call(LibHTS::BCF_HT_STR)
49-
.read_string
60+
info_values.call(LibHTS::BCF_HT_STR, ->(dst, _len) { dst.read_string })
5061
end
5162
end
5263

@@ -60,6 +71,11 @@ def get_float(key)
6071
get(key, :float)
6172
end
6273

74+
# For compatibility with HTS.cr.
75+
def get_int64(key)
76+
get(key, :int64)
77+
end
78+
6379
# For compatibility with HTS.cr.
6480
def get_string(key)
6581
get(key, :string)
@@ -89,6 +105,9 @@ def []=(key, value)
89105
when true, false
90106
update_flag(key, value)
91107
when Integer
108+
unless int32_range?(value)
109+
raise RangeError, "Integer out of int32 range for []=. Current htslib backend does not support int64 INFO update."
110+
end
92111
update_int(key, [value])
93112
when Float
94113
update_float(key, [value])
@@ -98,6 +117,9 @@ def []=(key, value)
98117
if value.empty?
99118
raise ArgumentError, "Cannot set INFO field to empty array. Use nil to delete."
100119
elsif value.all? { |v| v.is_a?(Integer) }
120+
unless value.all? { |v| int32_range?(v) }
121+
raise RangeError, "Integer array contains out-of-int32 values for []=. Current htslib backend does not support int64 INFO update."
122+
end
101123
update_int(key, value)
102124
elsif value.all? { |v| v.is_a?(Numeric) }
103125
update_float(key, value)
@@ -130,6 +152,14 @@ def update_int(key, values)
130152
ret
131153
end
132154

155+
# Update INFO field with int64 value(s).
156+
# @note int64 INFO values are primarily relevant for VCF output.
157+
# @param key [String] INFO tag name
158+
# @param values [Array<Integer>] integer values (use single-element array for scalar)
159+
def update_int64(key, values)
160+
raise NotImplementedError, "htslib backend does not implement int64 INFO update (BCF_HT_LONG)"
161+
end
162+
133163
# Update INFO field with float value(s).
134164
# For compatibility with HTS.cr.
135165
# @param key [String] INFO tag name
@@ -295,9 +325,13 @@ def ht_type_to_sym(t)
295325
when LibHTS::BCF_HT_INT then :int
296326
when LibHTS::BCF_HT_REAL then :float
297327
when LibHTS::BCF_HT_STR then :string
298-
when LibHTS::BCF_HT_LONG then :float
328+
when LibHTS::BCF_HT_LONG then :int64
299329
end
300330
end
331+
332+
def int32_range?(value)
333+
value >= -2_147_483_648 && value <= 2_147_483_647
334+
end
301335
end
302336
end
303337
end

lib/hts/libhts/vcf_funcs.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ def bcf_update_info_int32(hdr, line, key, values, n)
9797
bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
9898
end
9999

100+
# Function for updating INFO fields (int64; VCF only)
101+
def bcf_update_info_int64(hdr, line, key, values, n)
102+
bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG)
103+
end
104+
100105
# Function for updating INFO fields
101106
def bcf_update_info_float(hdr, line, key, values, n)
102107
bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
@@ -175,6 +180,11 @@ def bcf_get_info_int32(hdr, line, tag, dst, ndst)
175180
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
176181
end
177182

183+
# Get INFO values (int64; VCF only)
184+
def bcf_get_info_int64(hdr, line, tag, dst, ndst)
185+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_LONG)
186+
end
187+
178188
# Get INFO values
179189
def bcf_get_info_float(hdr, line, tag, dst, ndst)
180190
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)

test/bcf/info_test.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ def test_get_with_type
2727

2828
def test_get_like_crystal
2929
assert_equal [1, 2, 3, 4], @info.get_int("DP4")
30+
assert_equal [1, 2, 3, 4], @info.get_int64("DP4")
3031
assert_equal [4], @info.get_int("AN")
32+
assert_equal [4], @info.get_int64("AN")
3133
assert_equal [2], @info.get_int("AC")
34+
assert_equal [2], @info.get_int64("AC")
3235
assert_equal true, @info.get_flag("INDEL")
3336
assert_equal "test", @info.get_string("STR")
3437
end
@@ -53,6 +56,8 @@ def test_get_unknown_key
5356
assert_nil @info.get("UNKNOWN")
5457
assert_nil @info.get("UNKNOWN", :int)
5558
assert_nil @info.get_int("UNKNOWN")
59+
assert_nil @info.get("UNKNOWN", :int64)
60+
assert_nil @info.get_int64("UNKNOWN")
5661
assert_nil @info.get("UNKNOWN", :float)
5762
assert_nil @info.get_float("UNKNOWN")
5863
assert_nil @info.get("UNKNOWN", :flag)

test/bcf_test.rb

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,25 @@ def test_info_update_float
230230
end
231231

232232
def test_info_update_string
233-
# String INFO fields are rare in VCF, skip for now
234-
# (would need to add string INFO to header first)
235-
skip "String INFO fields require header definition"
233+
require "tempfile"
234+
235+
Tempfile.create(["test_bcf_info_string_source", ".vcf"]) do |src|
236+
src.write <<~VCF
237+
##fileformat=VCFv4.2
238+
##contig=<ID=1,length=1000>
239+
##INFO=<ID=STRX,Number=1,Type=String,Description="string info test">
240+
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
241+
1\t10\t.\tA\tT\t.\tPASS\t.
242+
VCF
243+
src.flush
244+
245+
bcf = HTS::Bcf.new(src.path)
246+
record = bcf.first
247+
info = record.info
248+
info.update_string("STRX", "hello")
249+
assert_equal "hello", info.get_string("STRX")
250+
bcf.close
251+
end
236252
end
237253

238254
def test_info_update_flag

0 commit comments

Comments
 (0)