Skip to content

Commit 09ec3cb

Browse files
committed
Implement add_pg method enhancements with validation and tests for duplicate IDs, unknown parents, and invalid characters
1 parent 8e59545 commit 09ec3cb

2 files changed

Lines changed: 116 additions & 6 deletions

File tree

lib/hts/bam/header.rb

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,94 @@ def get_tid(name)
132132
# header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
133133
# header.add_pg("samtools", VN: "1.15", PP: "bwa")
134134
def add_pg(program_name, **options)
135-
args = options.flat_map { |k, v| [:string, k.to_s, :string, v.to_s] }
136-
LibHTS.sam_hdr_add_pg(@sam_hdr, program_name, *args, :pointer, FFI::Pointer::NULL)
135+
line = build_pg_line(program_name.to_s, options)
136+
result = LibHTS.sam_hdr_add_lines(@sam_hdr, line, line.bytesize)
137+
raise "Failed to add @PG line" if result < 0
138+
139+
self
137140
end
138141

139142
private
140143

144+
def build_pg_line(program_name, options)
145+
ordered_tags = normalize_pg_tags(program_name, options)
146+
"@PG\t#{ordered_tags.map { |key, value| "#{key}:#{value}" }.join("\t")}\n"
147+
end
148+
149+
def normalize_pg_tags(program_name, options)
150+
existing_ids = pg_ids
151+
tag_map = options.each_with_object({}) do |(key, value), tags|
152+
string_key = key.to_s
153+
string_value = value.to_s
154+
validate_pg_tag(string_key, string_value)
155+
tags[string_key] = string_value
156+
end
157+
158+
pg_id = resolve_pg_id(program_name, tag_map, existing_ids)
159+
validate_pg_parent(tag_map["PP"], existing_ids)
160+
161+
ordered_tags = []
162+
ordered_tags << ["ID", pg_id]
163+
ordered_tags << ["PN", tag_map.fetch("PN", program_name)]
164+
tag_map.each do |key, value|
165+
next if key == "ID" || key == "PN"
166+
167+
ordered_tags << [key, value]
168+
end
169+
ordered_tags
170+
end
171+
172+
def validate_pg_tag(key, value)
173+
raise ArgumentError, "PG tag keys must not be empty" if key.empty?
174+
return unless value.include?("\t") || value.include?("\n") || value.include?("\r")
175+
176+
raise ArgumentError, "PG tag values must not contain tabs or newlines"
177+
end
178+
179+
def resolve_pg_id(program_name, tag_map, existing_ids)
180+
explicit_id = tag_map["ID"]
181+
if explicit_id
182+
raise ArgumentError, "PG ID already exists: #{explicit_id}" if existing_ids.include?(explicit_id)
183+
184+
explicit_id
185+
else
186+
next_pg_id(program_name, existing_ids)
187+
end
188+
end
189+
190+
def validate_pg_parent(parent_id, existing_ids)
191+
return unless parent_id
192+
return if existing_ids.include?(parent_id)
193+
194+
raise ArgumentError, "Unknown PG parent: #{parent_id}"
195+
end
196+
197+
def next_pg_id(program_name, existing_ids)
198+
candidate = program_name
199+
suffix = 0
200+
while existing_ids.include?(candidate)
201+
suffix += 1
202+
candidate = "#{program_name}.#{suffix}"
203+
end
204+
candidate
205+
end
206+
207+
def pg_ids
208+
ids = []
209+
to_s.each_line do |line|
210+
next unless line.start_with?("@PG\t")
211+
212+
line.chomp.split("\t")[1..].each do |field|
213+
key, value = field.split(":", 2)
214+
next unless key == "ID" && value
215+
216+
ids << value
217+
break
218+
end
219+
end
220+
ids
221+
end
222+
141223
def name2tid(name)
142224
LibHTS.sam_hdr_name2tid(@sam_hdr, name)
143225
end

test/header_test.rb

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def teardown
1515
def test_add_pg
1616
# Test basic @PG line addition
1717
result = @header.add_pg("test_program", VN: "1.0.0", CL: "test_program input.bam")
18-
assert_equal 0, result
18+
assert_same @header, result
1919

2020
# Check that the @PG line was added
2121
header_text = @header.to_s
@@ -30,7 +30,7 @@ def test_add_pg_with_pp
3030

3131
# Add second program with PP reference
3232
result = @header.add_pg("program2", VN: "2.0", PP: "program1")
33-
assert_equal 0, result
33+
assert_same @header, result
3434

3535
header_text = @header.to_s
3636
assert_match(/@PG\t.*PN:program1/, header_text)
@@ -53,7 +53,7 @@ def test_add_pg_auto_id_generation
5353

5454
def test_add_pg_with_id
5555
result = @header.add_pg("myprogram", ID: "custom_id", VN: "0.1")
56-
assert_equal 0, result
56+
assert_same @header, result
5757

5858
header_text = @header.to_s
5959
assert_match(/ID:custom_id/, header_text)
@@ -62,9 +62,37 @@ def test_add_pg_with_id
6262

6363
def test_add_pg_empty_options
6464
result = @header.add_pg("simple_program")
65-
assert_equal 0, result
65+
assert_same @header, result
6666

6767
header_text = @header.to_s
6868
assert_match(/@PG\t.*PN:simple_program/, header_text)
6969
end
70+
71+
def test_add_pg_rejects_duplicate_id
72+
@header.add_pg("align", ID: "existing_pg")
73+
74+
error = assert_raises(ArgumentError) do
75+
@header.add_pg("sort", ID: "existing_pg")
76+
end
77+
assert_includes error.message, "PG ID already exists"
78+
end
79+
80+
def test_add_pg_rejects_unknown_parent
81+
error = assert_raises(ArgumentError) do
82+
@header.add_pg("sort", PP: "missing")
83+
end
84+
assert_includes error.message, "Unknown PG parent"
85+
end
86+
87+
def test_add_pg_rejects_tabs_and_newlines
88+
error = assert_raises(ArgumentError) do
89+
@header.add_pg("align", CL: "samtools\tview")
90+
end
91+
assert_includes error.message, "must not contain tabs or newlines"
92+
93+
error = assert_raises(ArgumentError) do
94+
@header.add_pg("align", CL: "samtools\nview")
95+
end
96+
assert_includes error.message, "must not contain tabs or newlines"
97+
end
7098
end

0 commit comments

Comments
 (0)