@@ -20,12 +20,12 @@ pub enum MathExpressionTokenizerError {
2020
2121pub struct MathExpressionTokenizer {
2222 expr : String ,
23- curr_idx : usize ,
23+ curr_byte_idx : usize ,
2424}
2525
2626pub trait TokenizerTraits {
2727 fn has_token ( & self ) -> bool ;
28- fn next_token ( & mut self ) -> Result < ( Token , usize ) , MathExpressionTokenizerError > ;
28+ fn next_token ( & mut self ) -> Result < ( usize , Token ) , MathExpressionTokenizerError > ;
2929 fn curr_index ( & self ) -> usize ;
3030}
3131
@@ -36,52 +36,55 @@ impl TokenizerTraits for MathExpressionTokenizer {
3636 }
3737
3838 fn curr_index ( & self ) -> usize {
39- self . curr_idx
39+ self . curr_byte_idx
4040 }
4141
42- fn next_token ( & mut self ) -> Result < ( Token , usize ) , MathExpressionTokenizerError > {
42+ fn next_token ( & mut self ) -> Result < ( usize , Token ) , MathExpressionTokenizerError > {
4343 if !self . has_token ( ) {
4444 return Err ( MathExpressionTokenizerError :: NoToken ) ;
4545 }
4646
47- self . curr_idx = self . skip_spaces ( ) ;
48-
49- match self . expr . chars ( ) . nth ( self . curr_idx ) . unwrap ( ) {
50- '(' => {
51- self . curr_idx += 1 ;
52- Ok ( ( Token :: OpenBrace , self . curr_idx - 1 ) )
53- }
54- ')' => {
55- self . curr_idx += 1 ;
56- Ok ( ( Token :: CloseBrace , self . curr_idx - 1 ) )
57- }
58- op @ ( '+' | '-' | '*' | '/' ) => {
59- self . curr_idx += 1 ;
60- Ok ( ( Token :: Operator ( op) , self . curr_idx - 1 ) )
61- }
47+ self . curr_byte_idx = self . skip_spaces ( ) ;
48+ let old_value = self . curr_byte_idx ;
49+
50+ match self . expr [ self . curr_byte_idx ..] . chars ( ) . next ( ) . unwrap ( ) {
51+ '(' => Ok ( (
52+ std:: mem:: replace ( & mut self . curr_byte_idx , old_value + 1 ) ,
53+ Token :: OpenBrace ,
54+ ) ) ,
55+ ')' => Ok ( (
56+ std:: mem:: replace ( & mut self . curr_byte_idx , old_value + 1 ) ,
57+ Token :: CloseBrace ,
58+ ) ) ,
59+ op @ ( '+' | '-' | '*' | '/' ) => Ok ( (
60+ std:: mem:: replace ( & mut self . curr_byte_idx , old_value + 1 ) ,
61+ Token :: Operator ( op) ,
62+ ) ) ,
6263 _ => {
63- let ( digit, mut idx) = self . parse_digits ( ) ?;
64- std:: mem:: swap ( & mut self . curr_idx , & mut idx) ;
65- Ok ( ( Token :: Digit ( digit) , idx) )
64+ let ( digit, idx) = self . parse_digits ( ) ?;
65+ Ok ( (
66+ std:: mem:: replace ( & mut self . curr_byte_idx , idx) ,
67+ Token :: Digit ( digit) ,
68+ ) )
6669 }
6770 }
6871 }
6972}
7073
7174impl MathExpressionTokenizer {
72- pub fn new ( math_expr : String ) -> Result < Self , MathExpressionTokenizerError > {
73- if math_expr . is_empty ( ) {
75+ pub fn new ( expr : String ) -> Result < Self , MathExpressionTokenizerError > {
76+ if expr . is_empty ( ) {
7477 return Err ( MathExpressionTokenizerError :: InvalidArgument ) ;
7578 }
7679
7780 Ok ( Self {
78- expr : math_expr ,
79- curr_idx : 0 ,
81+ expr,
82+ curr_byte_idx : 0 ,
8083 } )
8184 }
8285
8386 fn parse_digits ( & self ) -> Result < ( f64 , usize ) , MathExpressionTokenizerError > {
84- let s = & self . expr [ self . curr_idx ..] ;
87+ let s = & self . expr [ self . curr_byte_idx ..] ;
8588
8689 let offset = s
8790 . char_indices ( )
@@ -90,23 +93,24 @@ impl MathExpressionTokenizer {
9093 . unwrap_or ( s. len ( ) ) ;
9194
9295 match s[ ..offset] . parse :: < f64 > ( ) {
93- Ok ( number) => Ok ( ( number, self . curr_idx + offset) ) ,
96+ Ok ( number) => Ok ( ( number, self . curr_byte_idx + offset) ) ,
9497 Err ( _) => Err ( MathExpressionTokenizerError :: InvalidToken {
95- idx : self . curr_idx ,
98+ idx : self . curr_byte_idx ,
9699 ch : s. chars ( ) . nth ( 0 ) . unwrap ( ) ,
97100 } ) ,
98101 }
99102 }
100103
101104 fn skip_spaces ( & self ) -> usize {
102- self . expr [ self . curr_idx ..]
105+ self . expr [ self . curr_byte_idx ..]
103106 . char_indices ( )
104- . position ( |( _, char) | !char. is_whitespace ( ) )
105- . map ( |idx| self . curr_idx + idx)
107+ . find ( |( _, char) | !char. is_whitespace ( ) )
108+ . map ( |( idx, _ ) | self . curr_byte_idx + idx)
106109 . unwrap_or ( self . expr . len ( ) )
107110 }
108111}
109112
113+ // Unit tests
110114#[ cfg( test) ]
111115mod tests {
112116 use super :: * ;
@@ -121,7 +125,7 @@ mod tests {
121125 fn test_zero_number_tokens ( ) {
122126 let mut tokenizer = MathExpressionTokenizer :: new ( "0" . to_string ( ) ) . unwrap ( ) ;
123127 assert ! ( tokenizer. has_token( ) ) ;
124- let ( token , idx ) = tokenizer. next_token ( ) . unwrap ( ) ;
128+ let ( idx , token ) = tokenizer. next_token ( ) . unwrap ( ) ;
125129 assert_eq ! ( idx, 0 ) ;
126130
127131 if let Token :: Digit ( number) = token {
@@ -132,12 +136,12 @@ mod tests {
132136
133137 let mut tokenizer = MathExpressionTokenizer :: new ( "-0" . to_string ( ) ) . unwrap ( ) ;
134138 assert ! ( tokenizer. has_token( ) ) ;
135- let ( token , idx ) = tokenizer. next_token ( ) . unwrap ( ) ;
139+ let ( idx , token ) = tokenizer. next_token ( ) . unwrap ( ) ;
136140 assert_eq ! ( idx, 0 ) ;
137141 assert ! ( matches!( token, Token :: Operator ( '-' ) ) ) ;
138142
139143 assert ! ( tokenizer. has_token( ) ) ;
140- let ( token , idx ) = tokenizer. next_token ( ) . unwrap ( ) ;
144+ let ( idx , token ) = tokenizer. next_token ( ) . unwrap ( ) ;
141145 assert_eq ! ( idx, 1 ) ;
142146
143147 if let Token :: Digit ( number) = token {
@@ -152,7 +156,7 @@ mod tests {
152156 fn test_valid_positive_number_tokens( n in any:: <f64 >( ) . prop_filter( "Positive numbers" , |& x| x > 0.0 ) ) {
153157 let mut tokenizer = MathExpressionTokenizer :: new( format!( "{}" , n) ) . unwrap( ) ;
154158 assert!( tokenizer. has_token( ) ) ;
155- let ( token , idx ) = tokenizer. next_token( ) . unwrap( ) ;
159+ let ( idx , token ) = tokenizer. next_token( ) . unwrap( ) ;
156160 assert_eq!( idx, 0 ) ;
157161
158162 if let Token :: Digit ( number) = token
@@ -168,12 +172,12 @@ mod tests {
168172 fn test_valid_negative_number_tokens( n in any:: <f64 >( ) . prop_filter( "Positive numbers" , |& x| x < 0.0 ) ) {
169173 let mut tokenizer = MathExpressionTokenizer :: new( format!( "{}" , n) ) . unwrap( ) ;
170174 assert!( tokenizer. has_token( ) ) ;
171- let ( token , idx ) = tokenizer. next_token( ) . unwrap( ) ;
175+ let ( idx , token ) = tokenizer. next_token( ) . unwrap( ) ;
172176 assert_eq!( idx, 0 ) ;
173177 assert!( matches!( token, Token :: Operator ( '-' ) ) ) ;
174178
175179 assert!( tokenizer. has_token( ) ) ;
176- let ( token , idx ) = tokenizer. next_token( ) . unwrap( ) ;
180+ let ( idx , token ) = tokenizer. next_token( ) . unwrap( ) ;
177181 assert_eq!( idx, 1 ) ;
178182
179183 if let Token :: Digit ( number) = token
@@ -186,25 +190,25 @@ mod tests {
186190 }
187191
188192 #[ test]
189- fn test_valid_operator_tokens( s in r"[+\-*/ ]{1,50}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
193+ fn test_valid_operator_tokens( s in r"[+\-*/\s ]{1,50}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
190194 let mut tokenizer = MathExpressionTokenizer :: new( s. clone( ) ) . unwrap( ) ;
191195 assert!( tokenizer. has_token( ) ) ;
192196
193- while let Ok ( ( token , idx ) ) = tokenizer. next_token( ) {
194- let op = s. chars( ) . nth ( idx ) . unwrap( ) ;
197+ while let Ok ( ( idx , token ) ) = tokenizer. next_token( ) {
198+ let op = s[ idx.. ] . chars( ) . next ( ) . unwrap( ) ;
195199 assert_eq!( token, Token :: Operator ( op) ) ;
196200 }
197201
198202 assert!( !tokenizer. has_token( ) ) ;
199203 }
200204
201205 #[ test]
202- fn test_braces_tokens( s in r"[() ]{1,50}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
206+ fn test_braces_tokens( s in r"[()\s ]{1,50}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
203207 let mut tokenizer = MathExpressionTokenizer :: new( s. clone( ) ) . unwrap( ) ;
204208 assert!( tokenizer. has_token( ) ) ;
205209
206- while let Ok ( ( token , idx ) ) = tokenizer. next_token( ) {
207- let op = s. chars( ) . nth ( idx ) . unwrap( ) ;
210+ while let Ok ( ( idx , token ) ) = tokenizer. next_token( ) {
211+ let op = s[ idx.. ] . chars( ) . next ( ) . unwrap( ) ;
208212 if op == '('
209213 {
210214 assert_eq!( token, Token :: OpenBrace ) ;
@@ -219,15 +223,12 @@ mod tests {
219223 }
220224
221225 #[ test]
222- fn test_valid_sequence_tokens( s in r"[0-9+\-*/() ]{1,10}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
226+ fn test_valid_sequence_tokens( s in r"[0-9+\-*/()\s ]{1,10}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
223227 let mut tokenizer = MathExpressionTokenizer :: new( s. clone( ) ) . unwrap( ) ;
224228 assert!( tokenizer. has_token( ) ) ;
225229
226- println!( "Ch:{}" , s) ;
227-
228- while let Ok ( ( token, idx) ) = tokenizer. next_token( ) {
229- let ch = s. chars( ) . nth( idx) . unwrap( ) ;
230- println!( "Ch: {} - {}" , ch, s) ;
230+ while let Ok ( ( idx, token) ) = tokenizer. next_token( ) {
231+ let ch = s[ idx..] . chars( ) . next( ) . unwrap( ) ;
231232 match token {
232233 Token :: OpenBrace => {
233234 assert_eq!( ch, '(' ) ;
0 commit comments