@@ -132,12 +132,94 @@ def get_tid(name)
132132 # header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
133133 # header.add_pg("samtools", VN: "1.15", PP: "bwa")
134134 def add_pg ( program_name , **options )
135- args = options . flat_map { |k , v | [ :string , k . to_s , :string , v . to_s ] }
136- LibHTS . sam_hdr_add_pg ( @sam_hdr , program_name , *args , :pointer , FFI ::Pointer ::NULL )
135+ line = build_pg_line ( program_name . to_s , options )
136+ result = LibHTS . sam_hdr_add_lines ( @sam_hdr , line , line . bytesize )
137+ raise "Failed to add @PG line" if result < 0
138+
139+ self
137140 end
138141
139142 private
140143
144+ def build_pg_line ( program_name , options )
145+ ordered_tags = normalize_pg_tags ( program_name , options )
146+ "@PG\t #{ ordered_tags . map { |key , value | "#{ key } :#{ value } " } . join ( "\t " ) } \n "
147+ end
148+
149+ def normalize_pg_tags ( program_name , options )
150+ existing_ids = pg_ids
151+ tag_map = options . each_with_object ( { } ) do |( key , value ) , tags |
152+ string_key = key . to_s
153+ string_value = value . to_s
154+ validate_pg_tag ( string_key , string_value )
155+ tags [ string_key ] = string_value
156+ end
157+
158+ pg_id = resolve_pg_id ( program_name , tag_map , existing_ids )
159+ validate_pg_parent ( tag_map [ "PP" ] , existing_ids )
160+
161+ ordered_tags = [ ]
162+ ordered_tags << [ "ID" , pg_id ]
163+ ordered_tags << [ "PN" , tag_map . fetch ( "PN" , program_name ) ]
164+ tag_map . each do |key , value |
165+ next if key == "ID" || key == "PN"
166+
167+ ordered_tags << [ key , value ]
168+ end
169+ ordered_tags
170+ end
171+
172+ def validate_pg_tag ( key , value )
173+ raise ArgumentError , "PG tag keys must not be empty" if key . empty?
174+ return unless value . include? ( "\t " ) || value . include? ( "\n " ) || value . include? ( "\r " )
175+
176+ raise ArgumentError , "PG tag values must not contain tabs or newlines"
177+ end
178+
179+ def resolve_pg_id ( program_name , tag_map , existing_ids )
180+ explicit_id = tag_map [ "ID" ]
181+ if explicit_id
182+ raise ArgumentError , "PG ID already exists: #{ explicit_id } " if existing_ids . include? ( explicit_id )
183+
184+ explicit_id
185+ else
186+ next_pg_id ( program_name , existing_ids )
187+ end
188+ end
189+
190+ def validate_pg_parent ( parent_id , existing_ids )
191+ return unless parent_id
192+ return if existing_ids . include? ( parent_id )
193+
194+ raise ArgumentError , "Unknown PG parent: #{ parent_id } "
195+ end
196+
197+ def next_pg_id ( program_name , existing_ids )
198+ candidate = program_name
199+ suffix = 0
200+ while existing_ids . include? ( candidate )
201+ suffix += 1
202+ candidate = "#{ program_name } .#{ suffix } "
203+ end
204+ candidate
205+ end
206+
207+ def pg_ids
208+ ids = [ ]
209+ to_s . each_line do |line |
210+ next unless line . start_with? ( "@PG\t " )
211+
212+ line . chomp . split ( "\t " ) [ 1 ..] . each do |field |
213+ key , value = field . split ( ":" , 2 )
214+ next unless key == "ID" && value
215+
216+ ids << value
217+ break
218+ end
219+ end
220+ ids
221+ end
222+
141223 def name2tid ( name )
142224 LibHTS . sam_hdr_name2tid ( @sam_hdr , name )
143225 end
0 commit comments