Skip to content

Commit 44e038a

Browse files
authored
Merge pull request #1 from w15eacre/fix/fix_tokenizer
Add supporting utf-8 symbols
2 parents 347a972 + c8167e7 commit 44e038a

2 files changed

Lines changed: 52 additions & 51 deletions

File tree

  • calculator/src

calculator/src/math_expression_parser/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ impl MathExpressionParser {
3030
let mut parsed_expression = MathExpression { expression: vec![] };
3131
let mut braces = vec![];
3232

33-
while let Ok((token, idx)) = tokenizer.next_token() {
33+
while let Ok((idx, token)) = tokenizer.next_token() {
3434
match token {
3535
Token::OpenBrace => {
3636
braces.push(idx);

calculator/src/math_expression_tokenizer/mod.rs

Lines changed: 51 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ pub enum MathExpressionTokenizerError {
2020

2121
pub struct MathExpressionTokenizer {
2222
expr: String,
23-
curr_idx: usize,
23+
curr_byte_idx: usize,
2424
}
2525

2626
pub trait TokenizerTraits {
2727
fn has_token(&self) -> bool;
28-
fn next_token(&mut self) -> Result<(Token, usize), MathExpressionTokenizerError>;
28+
fn next_token(&mut self) -> Result<(usize, Token), MathExpressionTokenizerError>;
2929
fn curr_index(&self) -> usize;
3030
}
3131

@@ -36,52 +36,55 @@ impl TokenizerTraits for MathExpressionTokenizer {
3636
}
3737

3838
fn curr_index(&self) -> usize {
39-
self.curr_idx
39+
self.curr_byte_idx
4040
}
4141

42-
fn next_token(&mut self) -> Result<(Token, usize), MathExpressionTokenizerError> {
42+
fn next_token(&mut self) -> Result<(usize, Token), MathExpressionTokenizerError> {
4343
if !self.has_token() {
4444
return Err(MathExpressionTokenizerError::NoToken);
4545
}
4646

47-
self.curr_idx = self.skip_spaces();
48-
49-
match self.expr.chars().nth(self.curr_idx).unwrap() {
50-
'(' => {
51-
self.curr_idx += 1;
52-
Ok((Token::OpenBrace, self.curr_idx - 1))
53-
}
54-
')' => {
55-
self.curr_idx += 1;
56-
Ok((Token::CloseBrace, self.curr_idx - 1))
57-
}
58-
op @ ('+' | '-' | '*' | '/') => {
59-
self.curr_idx += 1;
60-
Ok((Token::Operator(op), self.curr_idx - 1))
61-
}
47+
self.curr_byte_idx = self.skip_spaces();
48+
let old_value = self.curr_byte_idx;
49+
50+
match self.expr[self.curr_byte_idx..].chars().next().unwrap() {
51+
'(' => Ok((
52+
std::mem::replace(&mut self.curr_byte_idx, old_value + 1),
53+
Token::OpenBrace,
54+
)),
55+
')' => Ok((
56+
std::mem::replace(&mut self.curr_byte_idx, old_value + 1),
57+
Token::CloseBrace,
58+
)),
59+
op @ ('+' | '-' | '*' | '/') => Ok((
60+
std::mem::replace(&mut self.curr_byte_idx, old_value + 1),
61+
Token::Operator(op),
62+
)),
6263
_ => {
63-
let (digit, mut idx) = self.parse_digits()?;
64-
std::mem::swap(&mut self.curr_idx, &mut idx);
65-
Ok((Token::Digit(digit), idx))
64+
let (digit, idx) = self.parse_digits()?;
65+
Ok((
66+
std::mem::replace(&mut self.curr_byte_idx, idx),
67+
Token::Digit(digit),
68+
))
6669
}
6770
}
6871
}
6972
}
7073

7174
impl MathExpressionTokenizer {
72-
pub fn new(math_expr: String) -> Result<Self, MathExpressionTokenizerError> {
73-
if math_expr.is_empty() {
75+
pub fn new(expr: String) -> Result<Self, MathExpressionTokenizerError> {
76+
if expr.is_empty() {
7477
return Err(MathExpressionTokenizerError::InvalidArgument);
7578
}
7679

7780
Ok(Self {
78-
expr: math_expr,
79-
curr_idx: 0,
81+
expr,
82+
curr_byte_idx: 0,
8083
})
8184
}
8285

8386
fn parse_digits(&self) -> Result<(f64, usize), MathExpressionTokenizerError> {
84-
let s = &self.expr[self.curr_idx..];
87+
let s = &self.expr[self.curr_byte_idx..];
8588

8689
let offset = s
8790
.char_indices()
@@ -90,23 +93,24 @@ impl MathExpressionTokenizer {
9093
.unwrap_or(s.len());
9194

9295
match s[..offset].parse::<f64>() {
93-
Ok(number) => Ok((number, self.curr_idx + offset)),
96+
Ok(number) => Ok((number, self.curr_byte_idx + offset)),
9497
Err(_) => Err(MathExpressionTokenizerError::InvalidToken {
95-
idx: self.curr_idx,
98+
idx: self.curr_byte_idx,
9699
ch: s.chars().nth(0).unwrap(),
97100
}),
98101
}
99102
}
100103

101104
fn skip_spaces(&self) -> usize {
102-
self.expr[self.curr_idx..]
105+
self.expr[self.curr_byte_idx..]
103106
.char_indices()
104-
.position(|(_, char)| !char.is_whitespace())
105-
.map(|idx| self.curr_idx + idx)
107+
.find(|(_, char)| !char.is_whitespace())
108+
.map(|(idx, _)| self.curr_byte_idx + idx)
106109
.unwrap_or(self.expr.len())
107110
}
108111
}
109112

113+
// Unit tests
110114
#[cfg(test)]
111115
mod tests {
112116
use super::*;
@@ -121,7 +125,7 @@ mod tests {
121125
fn test_zero_number_tokens() {
122126
let mut tokenizer = MathExpressionTokenizer::new("0".to_string()).unwrap();
123127
assert!(tokenizer.has_token());
124-
let (token, idx) = tokenizer.next_token().unwrap();
128+
let (idx, token) = tokenizer.next_token().unwrap();
125129
assert_eq!(idx, 0);
126130

127131
if let Token::Digit(number) = token {
@@ -132,12 +136,12 @@ mod tests {
132136

133137
let mut tokenizer = MathExpressionTokenizer::new("-0".to_string()).unwrap();
134138
assert!(tokenizer.has_token());
135-
let (token, idx) = tokenizer.next_token().unwrap();
139+
let (idx, token) = tokenizer.next_token().unwrap();
136140
assert_eq!(idx, 0);
137141
assert!(matches!(token, Token::Operator('-')));
138142

139143
assert!(tokenizer.has_token());
140-
let (token, idx) = tokenizer.next_token().unwrap();
144+
let (idx, token) = tokenizer.next_token().unwrap();
141145
assert_eq!(idx, 1);
142146

143147
if let Token::Digit(number) = token {
@@ -152,7 +156,7 @@ mod tests {
152156
fn test_valid_positive_number_tokens(n in any::<f64>().prop_filter("Positive numbers", |&x| x > 0.0)) {
153157
let mut tokenizer = MathExpressionTokenizer::new(format!("{}", n)).unwrap();
154158
assert!(tokenizer.has_token());
155-
let (token, idx) = tokenizer.next_token().unwrap();
159+
let (idx, token) = tokenizer.next_token().unwrap();
156160
assert_eq!(idx, 0);
157161

158162
if let Token::Digit(number) = token
@@ -168,12 +172,12 @@ mod tests {
168172
fn test_valid_negative_number_tokens(n in any::<f64>().prop_filter("Positive numbers", |&x| x < 0.0)) {
169173
let mut tokenizer = MathExpressionTokenizer::new(format!("{}", n)).unwrap();
170174
assert!(tokenizer.has_token());
171-
let (token, idx) = tokenizer.next_token().unwrap();
175+
let (idx, token) = tokenizer.next_token().unwrap();
172176
assert_eq!(idx, 0);
173177
assert!(matches!(token, Token::Operator('-')));
174178

175179
assert!(tokenizer.has_token());
176-
let (token, idx) = tokenizer.next_token().unwrap();
180+
let (idx, token) = tokenizer.next_token().unwrap();
177181
assert_eq!(idx, 1);
178182

179183
if let Token::Digit(number) = token
@@ -186,25 +190,25 @@ mod tests {
186190
}
187191

188192
#[test]
189-
fn test_valid_operator_tokens(s in r"[+\-*/ ]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
193+
fn test_valid_operator_tokens(s in r"[+\-*/\s]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
190194
let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap();
191195
assert!(tokenizer.has_token());
192196

193-
while let Ok((token, idx)) = tokenizer.next_token() {
194-
let op = s.chars().nth(idx).unwrap();
197+
while let Ok((idx, token)) = tokenizer.next_token() {
198+
let op = s[idx..].chars().next().unwrap();
195199
assert_eq!(token, Token::Operator(op));
196200
}
197201

198202
assert!(!tokenizer.has_token());
199203
}
200204

201205
#[test]
202-
fn test_braces_tokens(s in r"[() ]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
206+
fn test_braces_tokens(s in r"[()\s]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
203207
let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap();
204208
assert!(tokenizer.has_token());
205209

206-
while let Ok((token, idx)) = tokenizer.next_token() {
207-
let op = s.chars().nth(idx).unwrap();
210+
while let Ok((idx, token)) = tokenizer.next_token() {
211+
let op = s[idx..].chars().next().unwrap();
208212
if op == '('
209213
{
210214
assert_eq!(token, Token::OpenBrace);
@@ -219,15 +223,12 @@ mod tests {
219223
}
220224

221225
#[test]
222-
fn test_valid_sequence_tokens(s in r"[0-9+\-*/() ]{1,10}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
226+
fn test_valid_sequence_tokens(s in r"[0-9+\-*/()\s]{1,10}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
223227
let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap();
224228
assert!(tokenizer.has_token());
225229

226-
println!("Ch:{}",s);
227-
228-
while let Ok((token, idx)) = tokenizer.next_token() {
229-
let ch = s.chars().nth(idx).unwrap();
230-
println!("Ch: {} - {}", ch, s);
230+
while let Ok((idx, token)) = tokenizer.next_token() {
231+
let ch = s[idx..].chars().next().unwrap();
231232
match token {
232233
Token::OpenBrace => {
233234
assert_eq!(ch, '(');

0 commit comments

Comments
 (0)