Skip to content

Commit 002d706

Browse files
authored
Merge pull request #79 from ArcInstitute/integrate-cbq
Integrate cbq
2 parents 54dc048 + b3cb076 commit 002d706

24 files changed

Lines changed: 2345 additions & 32 deletions

Cargo.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "binseq"
33
version = "0.8.3"
4-
edition = "2021"
4+
edition = "2024"
55
description = "A high efficiency binary format for sequencing data"
66
license = "MIT"
77
authors = ["Noam Teyssier <noam.teyssier@arcinstitute.org>"]
@@ -13,13 +13,15 @@ keywords = ["bioinformatics", "nucleotide", "sequencing", "genomics", "fastq"]
1313
[dependencies]
1414
anyhow = "1.0.100"
1515
auto_impl = "1.3.0"
16-
bitnuc = "0.3.2"
17-
bytemuck = "1.24.0"
16+
bitnuc = "0.4.0"
17+
bytemuck = { version = "1.24.0", features = ["derive", "extern_crate_alloc"] }
1818
byteorder = "1.5.0"
19-
itoa = "1.0.15"
19+
itoa = "1.0.17"
20+
memchr = "2.7.6"
2021
memmap2 = "0.9.9"
2122
num_cpus = "1.17.0"
2223
rand = { version = "0.9.2", features = ["small_rng"] }
24+
sucds = "0.8.3"
2325
thiserror = "2.0.17"
2426
zstd = { version = "0.13.3", features = ["zstdmt"] }
2527

@@ -28,8 +30,6 @@ nucgen = "0.2.0"
2830
niffler = "3.0.0"
2931
seq_io = "0.3.4"
3032
parking_lot = "0.12.5"
31-
itoa = "1.0.15"
32-
memchr = "2.7.6"
3333

3434
[lints.clippy]
3535
pedantic = { level = "warn", priority = -1 }

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
BINSEQ is a binary file format family designed for efficient storage and processing of DNA sequences.
1111
They make use of two-bit encoding for nucleotides and are optimized for high-performance parallel processing.
1212

13-
BINSEQ currently has two flavors:
13+
BINSEQ currently has three variants:
1414

1515
1. **BQ**: (`*.bq`) files are for _fixed-length_ records **without** quality scores.
1616
2. **VBQ**: (`*.vbq`) files are for _variable-length_ records **with optional** quality scores and headers.
17+
3. **CBQ**: (`*.cbq`) files are for _columnar variable-length_ records **with optional** quality scores and headers.
1718

18-
Both flavors support both single and paired sequences.
19+
All variants support both single and paired sequences.
1920

2021
## Getting Started
2122

examples/example.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::fs::File;
2-
use std::io::{stdout, BufWriter, Write};
2+
use std::io::{BufWriter, Write, stdout};
33
use std::sync::Arc;
44

55
use anyhow::Result;

examples/parallel_processing.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ use std::{
22
fs::File,
33
io::BufWriter,
44
sync::{
5-
atomic::{AtomicUsize, Ordering},
65
Arc,
6+
atomic::{AtomicUsize, Ordering},
77
},
88
};
99

10-
use anyhow::{bail, Result};
10+
use anyhow::{Result, bail};
1111
use binseq::{
1212
bq::{self, BinseqHeaderBuilder},
1313
context::SeqCtx,

examples/parallel_range.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use binseq::{BinseqReader, BinseqRecord, ParallelProcessor, ParallelReader, Result};
2-
use std::sync::atomic::{AtomicUsize, Ordering};
32
use std::sync::Arc;
3+
use std::sync::atomic::{AtomicUsize, Ordering};
44

55
#[derive(Clone)]
66
struct RangeProcessor {

examples/read_write.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ use std::{
33
io::{BufReader, BufWriter},
44
};
55

6-
use anyhow::{bail, Result};
6+
use anyhow::{Result, bail};
77
use binseq::{
8-
bq::{BinseqHeaderBuilder, BinseqWriterBuilder, MmapReader},
98
BinseqRecord,
9+
bq::{BinseqHeaderBuilder, BinseqWriterBuilder, MmapReader},
1010
};
1111
use seq_io::fastq::{Reader, Record};
1212

src/bq/reader.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ use memmap2::Mmap;
1919

2020
use super::header::{BinseqHeader, SIZE_HEADER};
2121
use crate::{
22-
error::{ReadError, Result},
2322
BinseqRecord, Error, ParallelProcessor, ParallelReader,
23+
error::{ReadError, Result},
2424
};
2525

2626
/// A reference to a binary sequence record in a memory-mapped file
@@ -692,10 +692,10 @@ impl<R: Read> StreamReader<R> {
692692
/// * The data format is invalid
693693
pub fn next_record(&mut self) -> Option<Result<RefRecord<'_>>> {
694694
// Ensure header is read
695-
if self.header.is_none() {
696-
if let Some(e) = self.read_header().err() {
697-
return Some(Err(e));
698-
}
695+
if self.header.is_none()
696+
&& let Some(e) = self.read_header().err()
697+
{
698+
return Some(Err(e));
699699
}
700700

701701
let config = self

src/bq/writer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
use std::io::{BufWriter, Write};
1111

1212
use byteorder::{LittleEndian, WriteBytesExt};
13-
use rand::{rngs::SmallRng, SeedableRng};
13+
use rand::{SeedableRng, rngs::SmallRng};
1414

1515
use super::BinseqHeader;
1616
use crate::{
17-
error::{Result, WriteError},
1817
Policy, RNG_SEED,
18+
error::{Result, WriteError},
1919
};
2020

2121
/// Writes a single flag value to a writer in little-endian format

0 commit comments

Comments
 (0)