Skip to content

Commit 0ee0306

Browse files
committed
Refactor Faidx class
1 parent bb9b65b commit 0ee0306

3 files changed

Lines changed: 123 additions & 30 deletions

File tree

lib/hts/faidx.rb

Lines changed: 82 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
module HTS
77
class Faidx
8+
include Enumerable
9+
810
attr_reader :file_name
911

1012
def self.open(*args, **kw)
@@ -20,12 +22,9 @@ def self.open(*args, **kw)
2022
end
2123

2224
def initialize(file_name)
23-
if block_given?
24-
message = "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead"
25-
raise message
26-
end
25+
raise ArgumentError, "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead" if block_given?
2726

28-
@file_name = file_name
27+
@file_name = file_name.freeze
2928
@fai = case File.extname(@file_name)
3029
when ".fq", ".fastq"
3130
LibHTS.fai_load_format(@file_name, 2)
@@ -52,105 +51,160 @@ def closed?
5251
end
5352

5453
def file_format
54+
check_closed
5555
@fai[:format]
5656
end
5757

58+
# Iterate over each sequence in the index.
59+
# @yield [Sequence] each sequence object
60+
# @return [Enumerator] if no block given
61+
def each
62+
return to_enum(__method__) unless block_given?
63+
64+
check_closed
65+
names.each { |name| yield self[name] }
66+
end
67+
5868
# the number of sequences in the index.
69+
# @return [Integer] the number of sequences
5970
def length
71+
check_closed
6072
LibHTS.faidx_nseq(@fai)
6173
end
6274
alias size length
6375

64-
# return the length of the requested chromosome.
76+
# Return the list of sequence names in the index.
77+
# @return [Array<String>] sequence names
6578
def names
79+
check_closed
6680
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
6781
end
6882

6983
alias keys names
7084

85+
# Check if a sequence exists in the index.
86+
# @param key [String, Symbol] sequence name
87+
# @return [Boolean] true if the sequence exists
7188
def has_key?(key)
89+
check_closed
7290
raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
7391

7492
key = key.to_s
7593
case LibHTS.faidx_has_seq(@fai, key)
7694
when 1 then true
7795
when 0 then false
78-
else raise
96+
else raise HTS::Error, "Unexpected return value from faidx_has_seq"
7997
end
8098
end
8199

100+
# Get a Sequence object by name or index.
101+
# @param name [String, Symbol, Integer] sequence name or index
102+
# @return [Sequence] the sequence object
103+
# @raise [ArgumentError] if the sequence does not exist
82104
def [](name)
105+
check_closed
83106
name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
84107
Sequence.new(self, name)
85108
end
86109

87-
# return the length of the requested chromosome.
110+
# Return the length of the requested chromosome.
111+
# @param chrom [String, Symbol] chromosome name
112+
# @return [Integer] sequence length
113+
# @raise [ArgumentError] if the sequence does not exist
88114
def seq_len(chrom)
115+
check_closed
89116
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
90117

91118
chrom = chrom.to_s
92119
result = LibHTS.faidx_seq_len(@fai, chrom)
93-
result == -1 ? nil : result
120+
raise ArgumentError, "Sequence not found: #{chrom}" if result == -1
121+
122+
result
94123
end
95124

96-
# @overload seq(name)
125+
# @overload fetch_seq(name)
97126
# Fetch the sequence as a String.
98-
# @param name [String] chr1:0-10
99-
# @overload seq(name, start, stop)
127+
# @param name [String, Symbol] chr1:0-10
128+
# @return [String] the sequence
129+
# @overload fetch_seq(name, start, stop)
100130
# Fetch the sequence as a String.
101-
# @param name [String] the name of the chromosome
131+
# @param name [String, Symbol] the name of the chromosome
102132
# @param start [Integer] the start position of the sequence (0-based)
103133
# @param stop [Integer] the end position of the sequence (0-based)
104134
# @return [String] the sequence
105-
106135
def fetch_seq(name, start = nil, stop = nil)
136+
check_closed
107137
name = name.to_s
108138
rlen = FFI::MemoryPointer.new(:int)
109139

110140
if start.nil? && stop.nil?
111141
result = LibHTS.fai_fetch64(@fai, name, rlen)
112142
else
113-
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
114-
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
115-
start > stop && raise(ArgumentError, "Expect start to be <= stop")
116-
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
117-
143+
validate_range!(name, start, stop)
118144
result = LibHTS.faidx_fetch_seq64(@fai, name, start, stop, rlen)
119145
end
120146

121147
case rlen.read_int
122-
when -2 then raise "Invalid chromosome name: #{name}"
123-
when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
148+
when -2 then raise ArgumentError, "Invalid chromosome name: #{name}"
149+
when -1 then raise HTS::Error, "Error fetching sequence: #{name}:#{start}-#{stop}"
124150
end
125151

126152
result
127153
end
128154

129155
alias seq fetch_seq
130156

157+
# @overload fetch_qual(name)
158+
# Fetch the quality string.
159+
# @param name [String, Symbol] sequence name
160+
# @return [String] the quality string
161+
# @overload fetch_qual(name, start, stop)
162+
# Fetch the quality string.
163+
# @param name [String, Symbol] the name of the chromosome
164+
# @param start [Integer] the start position of the sequence (0-based)
165+
# @param stop [Integer] the end position of the sequence (0-based)
166+
# @return [String] the quality string
131167
def fetch_qual(name, start = nil, stop = nil)
168+
check_closed
132169
name = name.to_s
133170
rlen = FFI::MemoryPointer.new(:int)
134171

135172
if start.nil? && stop.nil?
136173
result = LibHTS.fai_fetchqual64(@fai, name, rlen)
137174
else
138-
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
139-
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
140-
start > stop && raise(ArgumentError, "Expect start to be <= stop")
141-
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
142-
175+
validate_range!(name, start, stop)
143176
result = LibHTS.faidx_fetch_qual64(@fai, name, start, stop, rlen)
144177
end
145178

146179
case rlen.read_int
147-
when -2 then raise "Invalid chromosome name: #{name}"
148-
when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
180+
when -2 then raise ArgumentError, "Invalid chromosome name: #{name}"
181+
when -1 then raise HTS::Error, "Error fetching quality: #{name}:#{start}-#{stop}"
149182
end
150183

151184
result
152185
end
153186

154187
alias qual fetch_qual
188+
189+
private
190+
191+
def check_closed
192+
raise IOError, "closed Faidx" if closed?
193+
end
194+
195+
# Validate range parameters.
196+
# @param name [String] sequence name
197+
# @param start [Integer] start position (0-based)
198+
# @param stop [Integer] stop position (0-based)
199+
# @raise [ArgumentError] if range is invalid
200+
def validate_range!(name, start, stop)
201+
raise ArgumentError, "Expect start to be >= 0" if start < 0
202+
raise ArgumentError, "Expect stop to be >= 0" if stop < 0
203+
raise ArgumentError, "Expect start to be <= stop" if start > stop
204+
205+
len = seq_len(name)
206+
raise ArgumentError, "Sequence not found: #{name}" if len.nil?
207+
raise ArgumentError, "Expect stop to be < seq_len (#{len})" if stop >= len
208+
end
155209
end
156210
end

lib/hts/faidx/sequence.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ class Sequence
44
attr_reader :name, :faidx
55

66
def initialize(faidx, name)
7-
raise unless faidx.has_key?(name)
7+
raise ArgumentError, "Sequence not found: #{name}" unless faidx.has_key?(name)
88

99
@faidx = faidx
1010
@name = name

test/faidx_test.rb

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_seq_len
6060
assert_equal 500, @fai.seq_len("chr1")
6161
assert_equal 500, @fai.seq_len(:chr1)
6262
assert_raises(ArgumentError) { @fai.seq_len(nil) }
63-
assert_nil @fai.seq_len("chr")
63+
assert_raises(ArgumentError) { @fai.seq_len("chr") }
6464
end
6565

6666
def test_names
@@ -88,4 +88,43 @@ def test_qual
8888
fq = HTS::Faidx.new(Fixtures["moo.fastq"])
8989
assert_equal "2222222222222222222222222222222222222222", fq.qual(fq.names.first)
9090
end
91+
92+
def test_each
93+
count = 0
94+
@fai.each do |seq|
95+
assert_instance_of HTS::Faidx::Sequence, seq
96+
count += 1
97+
end
98+
assert_equal 5, count
99+
end
100+
101+
def test_each_enumerator
102+
enum = @fai.each
103+
assert_instance_of Enumerator, enum
104+
assert_equal 5, enum.count
105+
end
106+
107+
def test_closed_object_raises
108+
@fai.close
109+
assert_raises(IOError) { @fai.length }
110+
assert_raises(IOError) { @fai.names }
111+
assert_raises(IOError) { @fai.has_key?("chr1") }
112+
assert_raises(IOError) { @fai["chr1"] }
113+
assert_raises(IOError) { @fai.seq_len("chr1") }
114+
assert_raises(IOError) { @fai.seq("chr1") }
115+
assert_raises(IOError) { @fai.qual("chr1") }
116+
assert_raises(IOError) { @fai.each {} }
117+
end
118+
119+
def test_invalid_range
120+
assert_raises(ArgumentError) { @fai.seq("chr1", -1, 10) }
121+
assert_raises(ArgumentError) { @fai.seq("chr1", 0, -1) }
122+
assert_raises(ArgumentError) { @fai.seq("chr1", 10, 5) }
123+
assert_raises(ArgumentError) { @fai.seq("chr1", 0, 500) }
124+
assert_raises(ArgumentError) { @fai.seq("nonexistent", 0, 10) }
125+
end
126+
127+
def test_initialize_with_block_raises
128+
assert_raises(ArgumentError) { HTS::Faidx.new(Fixtures["random.fa"]) {} }
129+
end
91130
end

0 commit comments

Comments
 (0)