@@ -163,29 +163,44 @@ bam_set1_wrapper(bam1_t *bam,
163163 const int32_t mtid, const hts_pos_t mpos,
164164 const hts_pos_t isize, const size_t l_seq,
165165 const size_t l_aux) {
166- /* This is a modified version of bam_set1.
167- * It assigns the attributes of bam1_t object except for the sequence.
168- * Many checks are skipped, as we assume that many quantities have
169- * been validaded.
166+ /* This is based on how assignment is done in the `bam_set1`
167+ function defined in `sam.c` from htslib */
168+
169+ /*
170+ * This modification assigns variables of bam1_t struct but not the sequence.
171+ *
172+ * Many checks have been removed because they are checked in code
173+ * that calls this function, mostly because they already come from a
174+ * valid `bam1_t` struct and so the values have been individually
175+ * validated.
176+ *
170177 * Assumptions:
171- * cigar has been computed
178+ * cigar has been computed and is in the right format
172179 * rlen = isize
173180 * qlen = l_seq
174181 * l_qname <= 254
175182 * HTS_POS_MAX - rlen > pos
183+ *
184+ * ADS: what is HTS_POS_MAX?
185+ *
176186 * Number of bytes needed for the data is smaller than INT32_MAX
177- * qual = Null
187+ *
188+ * qual = NULL, because we do not keep the quality scores through
189+ * formatting the reads.
178190 */
179191
180- size_t qname_nuls = 4 - l_qname % 4 ;
192+ // `qname_nuls` below is the number of '\0' to use to pad the qname
193+ // so that the cigar has 4-byte alignment.
194+ const size_t qname_nuls = 4 - l_qname % 4 ;
181195 bam_set1_core (bam->core , l_qname, flag, tid, pos, mapq, n_cigar,
182- mtid, mpos, isize, l_seq, qname_nuls);
196+ mtid, mpos, isize, l_seq, qname_nuls);
197+
198+ const size_t data_len =
199+ (l_qname + qname_nuls + n_cigar*sizeof (uint32_t ) + (l_seq + 1 ) / 2 + l_seq);
183200
184- size_t data_len = l_qname + qname_nuls + n_cigar * 4 +
185- (l_seq + 1 ) / 2 + l_seq;
186201 bam->l_data = data_len;
187202 if (data_len + l_aux > bam->m_data ) {
188- int ret = sam_realloc_bam_data (bam, data_len + l_aux);
203+ const int ret = sam_realloc_bam_data (bam, data_len + l_aux);
189204 if (ret < 0 ) {
190205 throw fr_expt (ret, " Failed to allocate memory for BAM record" );
191206 }
@@ -196,14 +211,19 @@ bam_set1_wrapper(bam1_t *bam,
196211 std::fill_n (data_iter + l_qname, qname_nuls, ' \0 ' );
197212 data_iter += l_qname + qname_nuls;
198213
214+ // ADS: reinterpret here because we know the cigar is originally an
215+ // array of uint32_t and has been aligned for efficiency
199216 std::copy_n (cigar, n_cigar, reinterpret_cast <uint32_t *>(data_iter));
200217 data_iter += n_cigar * sizeof (uint32_t );
201218
202219 // skipping sequece assignment
203220 data_iter += (l_seq + 1 ) / 2 ;
204221
205222 std::fill (data_iter, data_iter + l_seq, ' \xff ' );
206- return (int )data_len;
223+ // ADS: this will never return a negative value, and either should
224+ // have the return type of this function changed, or change the type
225+ // of `data_len` so that it can be used to signal error.
226+ return static_cast <int >(data_len);
207227}
208228
209229static inline bool
@@ -363,23 +383,26 @@ get_full_and_partial_ops(const uint32_t *cig_in, const uint32_t in_ops,
363383 return i;
364384}
365385
386+
387+ // ADS: MN the table below needs some comments
366388const uint8_t byte_revcom_table[] = {
367- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
368- 8 , 136 , 72 , 0 , 40 , 0 , 0 , 0 , 24 , 0 , 0 , 0 , 0 , 0 , 0 , 248 ,
369- 4 , 132 , 68 , 0 , 36 , 0 , 0 , 0 , 20 , 0 , 0 , 0 , 0 , 0 , 0 , 244 ,
370- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
371- 2 , 130 , 66 , 0 , 34 , 0 , 0 , 0 , 18 , 0 , 0 , 0 , 0 , 0 , 0 , 242 ,
372- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
373- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
374- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
375- 1 , 129 , 65 , 0 , 33 , 0 , 0 , 0 , 17 , 0 , 0 , 0 , 0 , 0 , 0 , 241 ,
376- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
377- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
378- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
379- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
380- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
381- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
382- 15 , 143 , 79 , 0 , 47 , 0 , 0 , 0 , 31 , 0 , 0 , 0 , 0 , 0 , 0 , 255 };
389+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
390+ 8 , 136 , 72 , 0 , 40 , 0 , 0 , 0 , 24 , 0 , 0 , 0 , 0 , 0 , 0 , 248 ,
391+ 4 , 132 , 68 , 0 , 36 , 0 , 0 , 0 , 20 , 0 , 0 , 0 , 0 , 0 , 0 , 244 ,
392+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
393+ 2 , 130 , 66 , 0 , 34 , 0 , 0 , 0 , 18 , 0 , 0 , 0 , 0 , 0 , 0 , 242 ,
394+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
395+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
396+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
397+ 1 , 129 , 65 , 0 , 33 , 0 , 0 , 0 , 17 , 0 , 0 , 0 , 0 , 0 , 0 , 241 ,
398+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
399+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
400+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
401+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
402+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
403+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
404+ 15 , 143 , 79 , 0 , 47 , 0 , 0 , 0 , 31 , 0 , 0 , 0 , 0 , 0 , 0 , 255
405+ };
383406
384407
385408static inline void
@@ -399,7 +422,7 @@ static void
399422revcomp_seq_by_byte (bam1_t *aln) {
400423 const size_t l_qseq = get_qlen (aln);
401424 auto seq = bam_get_seq (aln);
402- size_t num_bytes = ceil (l_qseq / 2.0 );
425+ const size_t num_bytes = ceil (l_qseq / 2.0 );
403426 auto seq_end = seq + num_bytes;
404427 revcom_byte_then_reverse (seq, seq_end);
405428 if (l_qseq % 2 == 1 ) {
0 commit comments