Skip to content

Commit cfe7350

Browse files
Addressed some of the issues brought up by Andrew
1 parent e051bc8 commit cfe7350

1 file changed

Lines changed: 14 additions & 9 deletions

File tree

src/utils/format-reads.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ static inline void
110110
bam_copy_core(const bam1_t *a, bam1_t *b) {
111111
/* ADS: prepared for a possibly more efficient block copy to assign
112112
all variables at once */
113-
// ADS: confirm order of vars below matches order within the struct.
114113
b->core.pos = a->core.pos;
115114
b->core.tid = a->core.tid;
116115
b->core.bin = a->core.bin;
@@ -134,15 +133,15 @@ bam_set1_core(bam1_core_t &core,
134133
/* ADS: need to clarify what these mean. They are used in
135134
`hts_reg2bin` from `htslib/hts.h` and likely mean "region to bin"
136135
for indexing */
136+
// MN: hts_reg2bin categorizes the size of the reference region.
137+
// Here, we use the numbers used in htslib/cram/cram_samtools.h
137138
static const int min_shift = 14;
138139
static const int n_lvls = 5;
139140

140141
core.pos = pos;
141142
core.tid = tid;
142143
/* ADS: MN I recall we migth not have needed this core.bin below */
143144
core.bin = hts_reg2bin(pos, pos + isize, min_shift, n_lvls);
144-
// used to be: core.bin = bam_reg2bin(pos, pos + rlen);
145-
// Changed based on htslib/cram/cram_samtools.h
146145
core.qual = mapq;
147146
core.l_extranul = qname_nuls - 1;
148147
core.flag = flag;
@@ -180,8 +179,8 @@ bam_set1_wrapper(bam1_t *bam,
180179
* qlen = l_seq
181180
* l_qname <= 254
182181
* HTS_POS_MAX - rlen > pos
183-
*
184-
* ADS: what is HTS_POS_MAX?
182+
* Where HTS_POS_MAX = ((((int64_t)INT_MAX)<<32)|INT_MAX) is the highest
183+
* supported position.
185184
*
186185
* Number of bytes needed for the data is smaller than INT32_MAX
187186
*
@@ -220,9 +219,7 @@ bam_set1_wrapper(bam1_t *bam,
220219
data_iter += (l_seq + 1) / 2;
221220

222221
std::fill(data_iter, data_iter + l_seq, '\xff');
223-
// ADS: this will never return a negative value, and either should
224-
// have the return type of this function changed, or change the type
225-
// of `data_len` so that it can be used to signal error.
222+
226223
return static_cast<int>(data_len);
227224
}
228225

@@ -384,7 +381,15 @@ get_full_and_partial_ops(const uint32_t *cig_in, const uint32_t in_ops,
384381
}
385382

386383

387-
// ADS: MN the table below needs some comments
384+
/* This table converts 2 bases packed in a byte to their reverse
385+
* complement. The input is therefore a unit8_t representing 2 bases.
386+
* It is assumed that the input uint8_t value is of form "xx" or "x-", where
387+
* 'x' a 4-bit number representing either A, C, G, T, or N and '-' is 0000.
388+
* For example, the ouptut for "AG" is "CT". The format "x-" is often used
389+
* at the end of an odd-length sequence.
390+
* The output of "A-" is "-T", and the output of "C-" is "-G", and so forth.
391+
* The user must handle this case separately.
392+
*/
388393
const uint8_t byte_revcom_table[] = {
389394
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390395
8, 136, 72, 0, 40, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 248,

0 commit comments

Comments
 (0)