-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathgrep.rs
More file actions
76 lines (64 loc) · 1.82 KB
/
grep.rs
File metadata and controls
76 lines (64 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
use std::sync::Arc;
use anyhow::Result;
use binseq::prelude::*;
use clap::Parser;
use memchr::memmem::Finder;
use parking_lot::Mutex;
#[derive(Clone)]
pub struct GrepCounter {
// (thread) local variables
local_count: usize,
// search pattern (using memchr::memmem::Finder for fast searching)
pattern: Finder<'static>,
// global variables
count: Arc<Mutex<usize>>,
}
impl GrepCounter {
#[must_use]
pub fn new(pattern: &[u8]) -> Self {
Self {
pattern: Finder::new(pattern).into_owned(),
local_count: 0,
count: Arc::new(Mutex::new(0)),
}
}
fn match_sequence(&self, seq: &[u8]) -> bool {
self.pattern.find(seq).is_some()
}
fn pprint(&self) {
println!("Matching records: {}", self.count.lock());
}
}
impl ParallelProcessor for GrepCounter {
fn process_record<R: binseq::BinseqRecord>(&mut self, record: R) -> binseq::Result<()> {
if self.match_sequence(&record.sseq()) || self.match_sequence(&record.xseq()) {
self.local_count += 1;
}
Ok(())
}
fn on_batch_complete(&mut self) -> binseq::Result<()> {
*self.count.lock() += self.local_count;
self.local_count = 0;
Ok(())
}
}
#[derive(Parser)]
struct Args {
/// Input BINSEQ path to grep
#[clap(required = true)]
input: String,
/// Pattern to search for (either sseq or xseq)
#[clap(required = true)]
pattern: String,
/// Threads to use [0: auto]
#[clap(short = 'T', long, default_value_t = 0)]
threads: usize,
}
fn main() -> Result<()> {
let args = Args::parse();
let reader = BinseqReader::new(&args.input)?;
let counter = GrepCounter::new(args.pattern.as_bytes());
reader.process_parallel(counter.clone(), args.threads)?;
counter.pprint();
Ok(())
}