Skip to content

Commit f29fdca

Browse files
committed
Expression engine: refactoring operator lexing.
1 parent 605417f commit f29fdca

4 files changed

Lines changed: 90 additions & 188 deletions

File tree

src/expression/expr_lexer.h

Lines changed: 36 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,29 @@
77
/* TODO: provide feedback to user */
88
#define lex_error trace
99

10+
static expr_op_t op_lookup(etoken t, const char *s)
11+
{
12+
int i;
13+
for (i = 0; i < OP_IF_THEN_ELSE; i++) {
14+
const char *name = op_tbl[i].name;
15+
int len = strlen(name);
16+
if (strncmp(s, name, len) == 0) {
17+
/* check for augmented assignment operator */
18+
if (op_tbl[i].assignment && ('=' == s[len])) {
19+
t->toktype = TOK_ASSIGN_OP;
20+
t->var.op_idx = i;
21+
return len + 1;
22+
}
23+
else {
24+
t->toktype = op_tbl[i].arity > 1 ? TOK_OP : TOK_OP_UNARY;
25+
t->op.idx = i;
26+
return len;
27+
}
28+
}
29+
}
30+
return 0;
31+
}
32+
1033
/* TODO: move to expr_variable.h */
1134
static int var_lookup(etoken tok, const char *s, int len)
1235
{
@@ -151,24 +174,8 @@ static int expr_lex(const char *str, int idx, etoken tok)
151174
c = str[++idx];
152175
etoken_set_dbl(tok, atof(str+i));
153176
return idx;
154-
case '+':
155-
switch (str[++idx]) {
156-
case '+':
157-
tok->toktype = TOK_OP_UNARY;
158-
tok->op.idx = OP_INCREMENT_PRE;
159-
++idx;
160-
break;
161-
case '=':
162-
tok->toktype = TOK_ASSIGN_OP;
163-
tok->var.op_idx = OP_ADD;
164-
++idx;
165-
break;
166-
default:
167-
etoken_set_op(tok, OP_ADD);
168-
break;
169-
}
170-
return idx;
171177
case '-':
178+
/* handle '-' separately (for now) since it could be part of an arrow */
172179
/* could be either subtraction, negation, or lambda */
173180
switch (str[++idx]) {
174181
case '>':
@@ -195,125 +202,6 @@ static int expr_lex(const char *str, int idx, etoken tok)
195202
else
196203
tok->toktype = TOK_NEGATE;
197204
return idx;
198-
case '/':
199-
if (str[++idx] == '=') {
200-
tok->toktype = TOK_ASSIGN_OP;
201-
tok->var.op_idx = OP_DIVIDE;
202-
++idx;
203-
}
204-
else
205-
etoken_set_op(tok, OP_DIVIDE);
206-
return idx;
207-
case '*':
208-
if (str[++idx] == '=') {
209-
tok->toktype = TOK_ASSIGN_OP;
210-
tok->var.op_idx = OP_MULTIPLY;
211-
++idx;
212-
}
213-
else
214-
etoken_set_op(tok, OP_MULTIPLY);
215-
return idx;
216-
case '%':
217-
if (str[++idx] == '=') {
218-
tok->toktype = TOK_ASSIGN_OP;
219-
tok->var.op_idx = OP_MODULO;
220-
++idx;
221-
}
222-
else {
223-
etoken_set_op(tok, OP_MODULO);
224-
}
225-
return idx;
226-
case '=':
227-
/* could be '=', '==' */
228-
if (str[++idx] == '=') {
229-
etoken_set_op(tok, OP_IS_EQUAL);
230-
++idx;
231-
}
232-
else {
233-
tok->toktype = TOK_ASSIGN;
234-
}
235-
return idx;
236-
case '<':
237-
/* could be '<', '<=', '<<' */
238-
etoken_set_op(tok, OP_IS_LESS_THAN);
239-
c = str[++idx];
240-
if (c == '=') {
241-
tok->op.idx = OP_IS_LESS_THAN_OR_EQUAL;
242-
++idx;
243-
}
244-
else if (c == '<') {
245-
tok->op.idx = OP_LEFT_BIT_SHIFT;
246-
++idx;
247-
}
248-
return idx;
249-
case '>':
250-
/* could be '>', '>=', '>>' */
251-
etoken_set_op(tok, OP_IS_GREATER_THAN);
252-
c = str[++idx];
253-
if (c == '=') {
254-
tok->op.idx = OP_IS_GREATER_THAN_OR_EQUAL;
255-
++idx;
256-
}
257-
else if (c == '>') {
258-
tok->op.idx = OP_RIGHT_BIT_SHIFT;
259-
++idx;
260-
}
261-
return idx;
262-
case '!':
263-
/* could be '!', '!=' */
264-
/* TODO: handle factorial case */
265-
c = str[++idx];
266-
if (c == '=') {
267-
etoken_set_op(tok, OP_IS_NOT_EQUAL);
268-
++idx;
269-
}
270-
else {
271-
tok->toktype = TOK_OP_UNARY;
272-
tok->op.idx = OP_LOGICAL_NOT;
273-
}
274-
return idx;
275-
case '&':
276-
/* could be '&', '&&' */
277-
etoken_set_op(tok, OP_BITWISE_AND);
278-
c = str[++idx];
279-
if (c == '&') {
280-
tok->op.idx = OP_LOGICAL_AND;
281-
++idx;
282-
}
283-
else if (c == '=') {
284-
tok->toktype = TOK_ASSIGN_OP;
285-
tok->var.op_idx = OP_BITWISE_AND;
286-
++idx;
287-
}
288-
return idx;
289-
case '|':
290-
/* could be '|', '||' */
291-
etoken_set_op(tok, OP_BITWISE_OR);
292-
c = str[++idx];
293-
if (c == '|') {
294-
tok->op.idx = OP_LOGICAL_OR;
295-
++idx;
296-
}
297-
else if (c == '=') {
298-
tok->toktype = TOK_ASSIGN_OP;
299-
tok->var.op_idx = OP_BITWISE_OR;
300-
++idx;
301-
}
302-
return idx;
303-
case '^':
304-
/* bitwise XOR */
305-
etoken_set_op(tok, OP_BITWISE_XOR);
306-
if (str[++idx] == '=') {
307-
tok->toktype = TOK_ASSIGN_OP;
308-
tok->var.op_idx = OP_BITWISE_XOR;
309-
++idx;
310-
}
311-
return idx;
312-
case '\'':
313-
/* prime */
314-
tok->toktype = TOK_OP;
315-
tok->op.idx = OP_PRIME;
316-
return ++idx;
317205
case '(':
318206
tok->toktype = TOK_OPEN_PAREN;
319207
return ++idx;
@@ -347,15 +235,6 @@ static int expr_lex(const char *str, int idx, etoken tok)
347235
case ',':
348236
tok->toktype = TOK_COMMA;
349237
return ++idx;
350-
case '?':
351-
/* conditional */
352-
etoken_set_op(tok, OP_IF);
353-
c = str[++idx];
354-
if (c == ':') {
355-
tok->op.idx = OP_IF_ELSE;
356-
++idx;
357-
}
358-
return idx;
359238
case ':':
360239
tok->toktype = TOK_COLON;
361240
return ++idx;
@@ -367,8 +246,18 @@ static int expr_lex(const char *str, int idx, etoken tok)
367246
return ++idx;
368247
default:
369248
if (!isalpha(c)) {
370-
lex_error("unknown character '%c' in lexer\n", c);
371-
break;
249+
int len = op_lookup(tok, str+i);
250+
if (len) {
251+
return idx + len;
252+
}
253+
else if ('=' == c) {
254+
tok->toktype = TOK_ASSIGN;
255+
return idx + 1;
256+
}
257+
else {
258+
lex_error("unknown character '%c' in lexer\n", c);
259+
break;
260+
}
372261
}
373262
while (c && (isalpha(c) || isdigit(c) || c == '_'))
374263
c = str[++idx];

src/expression/expr_operator.h

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,34 @@
33

44
typedef enum {
55
OP_UNKNOWN = -1,
6-
OP_LOGICAL_NOT,
6+
OP_INCREMENT_PRE,
7+
OP_DECREMENT_PRE,
8+
OP_INCREMENT_POST,
9+
OP_DECREMENT_POST,
710
OP_MULTIPLY,
811
OP_DIVIDE,
912
OP_MODULO,
1013
OP_ADD,
1114
OP_SUBTRACT,
1215
OP_LEFT_BIT_SHIFT,
1316
OP_RIGHT_BIT_SHIFT,
14-
OP_IS_GREATER_THAN,
1517
OP_IS_GREATER_THAN_OR_EQUAL,
16-
OP_IS_LESS_THAN,
18+
OP_IS_GREATER_THAN,
1719
OP_IS_LESS_THAN_OR_EQUAL,
20+
OP_IS_LESS_THAN,
1821
OP_IS_EQUAL,
1922
OP_IS_NOT_EQUAL,
23+
OP_LOGICAL_AND,
24+
OP_LOGICAL_OR,
2025
OP_BITWISE_AND,
2126
OP_BITWISE_XOR,
2227
OP_BITWISE_OR,
23-
OP_LOGICAL_AND,
24-
OP_LOGICAL_OR,
28+
OP_LOGICAL_NOT,
2529
OP_PRIME,
26-
OP_INCREMENT_PRE,
27-
OP_DECREMENT_PRE,
28-
OP_INCREMENT_POST,
29-
OP_DECREMENT_POST,
30-
OP_IF,
3130
OP_IF_ELSE,
32-
OP_IF_THEN_ELSE
31+
OP_IF,
32+
OP_IF_THEN_ELSE,
33+
N_OP
3334
} expr_op_t;
3435

3536
#define NONE 0x0
@@ -43,36 +44,37 @@ static struct {
4344
uint8_t arity;
4445
uint8_t precedence;
4546
uint16_t optimize_const_ops;
47+
uint8_t assignment;
4648
} op_tbl[] = {
4749
/* left==0 | right==0 | left==1 | right==1 */
48-
{ "!", 1, 11, GET_ONE | GET_ONE <<4 | GET_ZERO <<8 | GET_ZERO <<12 },
49-
{ "*", 2, 10, GET_ZERO | GET_ZERO <<4 | GET_OPER <<8 | GET_OPER <<12 },
50-
{ "/", 2, 10, GET_ZERO | BAD_EXPR <<4 | NONE <<8 | GET_OPER <<12 },
51-
{ "%", 2, 10, GET_ZERO | GET_OPER <<4 | NONE <<8 | NONE <<12 },
52-
{ "+", 2, 9, GET_OPER | GET_OPER <<4 | NONE <<8 | NONE <<12 },
53-
{ "-", 2, 9, NONE | GET_OPER <<4 | NONE <<8 | NONE <<12 },
54-
{ "<<", 2, 8, GET_ZERO | GET_OPER <<4 | NONE <<8 | NONE <<12 },
55-
{ ">>", 2, 8, GET_ZERO | GET_OPER <<4 | NONE <<8 | NONE <<12 },
56-
{ ">", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
57-
{ ">=", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
58-
{ "<", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
59-
{ "<=", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
60-
{ "==", 2, 6, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
61-
{ "!=", 2, 6, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
62-
{ "&", 2, 5, GET_ZERO | GET_ZERO <<4 | NONE <<8 | NONE <<12 },
63-
{ "^", 2, 4, GET_OPER | GET_OPER <<4 | NONE <<8 | NONE <<12 },
64-
{ "|", 2, 3, GET_OPER | GET_OPER <<4 | GET_ONE <<8 | GET_ONE <<12 },
65-
{ "&&", 2, 2, GET_ZERO | GET_ZERO <<4 | NONE <<8 | NONE <<12 },
66-
{ "||", 2, 1, GET_OPER | GET_OPER <<4 | GET_ONE <<8 | GET_ONE <<12 },
67-
{ "'", 1, 12, GET_ZERO | NONE <<4 | GET_ZERO <<8 | NONE <<12 },
68-
{ "++.", 1, 12, GET_ONE | NONE <<4 | NONE <<8 | NONE <<12 },
69-
{ "--.", 1, 12, NONE | NONE <<4 | GET_ZERO <<8 | NONE <<12 },
70-
{ ".++", 1, 12, GET_ONE | NONE <<4 | NONE <<8 | NONE <<12 },
71-
{ ".--", 1, 12, NONE | NONE <<4 | GET_ZERO <<8 | NONE <<12 },
50+
{ "++", 1, 12, GET_ONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
51+
{ "--", 1, 12, NONE | NONE <<4 | GET_ZERO <<8 | NONE <<12, 0 },
52+
{ "++", 1, 12, GET_ONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
53+
{ "--", 1, 12, NONE | NONE <<4 | GET_ZERO <<8 | NONE <<12, 0 },
54+
{ "*", 2, 10, GET_ZERO | GET_ZERO <<4 | GET_OPER <<8 | GET_OPER <<12, 1 },
55+
{ "/", 2, 10, GET_ZERO | BAD_EXPR <<4 | NONE <<8 | GET_OPER <<12, 1 },
56+
{ "%", 2, 10, GET_ZERO | GET_OPER <<4 | NONE <<8 | NONE <<12, 1 },
57+
{ "+", 2, 9, GET_OPER | GET_OPER <<4 | NONE <<8 | NONE <<12, 1 },
58+
{ "-", 2, 9, NONE | GET_OPER <<4 | NONE <<8 | NONE <<12, 1 },
59+
{ "<<", 2, 8, GET_ZERO | GET_OPER <<4 | NONE <<8 | NONE <<12, 0 },
60+
{ ">>", 2, 8, GET_ZERO | GET_OPER <<4 | NONE <<8 | NONE <<12, 0 },
61+
{ ">=", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
62+
{ ">", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
63+
{ "<=", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
64+
{ "<", 2, 7, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
65+
{ "==", 2, 6, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
66+
{ "!=", 2, 6, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
67+
{ "&&", 2, 2, GET_ZERO | GET_ZERO <<4 | NONE <<8 | NONE <<12, 0 },
68+
{ "||", 2, 1, GET_OPER | GET_OPER <<4 | GET_ONE <<8 | GET_ONE <<12, 0 },
69+
{ "&", 2, 5, GET_ZERO | GET_ZERO <<4 | NONE <<8 | NONE <<12, 1 },
70+
{ "^", 2, 4, GET_OPER | GET_OPER <<4 | NONE <<8 | NONE <<12, 1 },
71+
{ "|", 2, 3, GET_OPER | GET_OPER <<4 | GET_ONE <<8 | GET_ONE <<12, 1 },
72+
{ "!", 1, 11, GET_ONE | GET_ONE <<4 | GET_ZERO <<8 | GET_ZERO <<12, 0 },
73+
{ "'", 1, 12, GET_ZERO | NONE <<4 | GET_ZERO <<8 | NONE <<12, 0 },
7274
/* TODO: handle optimization of ternary operator */
73-
{ "IFTHEN", 2, 0, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
74-
{ "IFELSE", 2, 0, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
75-
{ "IFTHENELSE", 3, 0, NONE | NONE <<4 | NONE <<8 | NONE <<12 },
75+
{ "?:", 2, 0, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
76+
{ "?", 2, 0, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
77+
{ "IFTHENELSE", 3, 0, NONE | NONE <<4 | NONE <<8 | NONE <<12, 0 },
7678
};
7779

7880
#endif /* __MPR_EXPR_OPERATOR_H__ */

src/expression/expr_parser.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,12 +1905,12 @@ int expr_parser_build_stack(mpr_expr expr, const char *str,
19051905
arg_substack_len = estack_get_substack_len(out, i - var_substack_len);
19061906

19071907
/* current order of tokens on the expression stack:
1908-
* [i] top of operator assignment subexpression of length N
1908+
* [i] top of augmented assignment operator subexpression of length N
19091909
* [i-N] top of argument subexpression of length M
19101910
* [i-N-M] top of preceding subexpression (if any)
19111911
*
19121912
* need to be expanded to:
1913-
* [i] top of assignment subexpression of length N
1913+
* [i] top of augmented assignment operator subexpression of length N
19141914
* [i-N] operator token
19151915
* [i-N-1] top of argument subexpression of length M
19161916
* [i-N-M-1] top of variable subexpression of length N
@@ -1921,7 +1921,9 @@ int expr_parser_build_stack(mpr_expr expr, const char *str,
19211921
newtok.toktype = TOK_OP;
19221922
newtok.op.idx = t->var.op_idx;
19231923
newtok.gen.datatype = t->gen.datatype;
1924+
newtok.gen.casttype = 0;
19241925
newtok.gen.vec_len = t->gen.vec_len;
1926+
newtok.gen.flags = 0;
19251927

19261928
/* 2) convert assign*_op tokens to assign* */
19271929
j = i;
@@ -1981,7 +1983,9 @@ int expr_parser_build_stack(mpr_expr expr, const char *str,
19811983

19821984
/* copy datatype from variable */
19831985
newtok.gen.datatype = t->gen.datatype;
1986+
newtok.gen.casttype = 0;
19841987
newtok.gen.vec_len = t->gen.vec_len;
1988+
newtok.gen.flags = 0;
19851989

19861990
/* convert OP to literal 1 */
19871991
t->toktype = TOK_LITERAL;

src/expression/expr_token.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,14 @@ static void etoken_print(etoken tok, expr_var_t *vars, int show_locks)
447447
--d;
448448
snprintf(s + d, l - d, "]");
449449
break;
450-
case TOK_OP: snprintf(s, l, "OP\t\t%s", op_tbl[tok->op.idx].name); break;
450+
case TOK_OP:
451+
if (OP_INCREMENT_PRE == tok->op.idx || OP_DECREMENT_PRE == tok->op.idx)
452+
snprintf(s, l, "OP\t\t%s.", op_tbl[tok->op.idx].name);
453+
else if (OP_INCREMENT_POST == tok->op.idx || OP_DECREMENT_POST == tok->op.idx)
454+
snprintf(s, l, "OP\t\t.%s", op_tbl[tok->op.idx].name);
455+
else
456+
snprintf(s, l, "OP\t\t%s", op_tbl[tok->op.idx].name);
457+
break;
451458
case TOK_OPEN_CURLY: snprintf(s, l, "{\t"); break;
452459
case TOK_OPEN_PAREN: snprintf(s, l, "(\t\tarity %d", tok->fn.arity); break;
453460
case TOK_OPEN_SQUARE: snprintf(s, l, "["); break;

0 commit comments

Comments
 (0)