@@ -26,10 +26,18 @@ static Lexer make_lexer(const std::string& input) {
2626static std::vector<Token> tokenize (const std::string& input) {
2727 Lexer lexer (input);
2828 std::vector<Token> tokens;
29- while (!lexer.is_at_end ()) {
30- tokens.push_back (lexer.next_token ());
29+ while (true ) {
30+ Token token = lexer.next_token ();
31+ tokens.push_back (token);
32+ if (token.type () == TokenType::End) {
33+ break ;
34+ }
35+ if (lexer.is_at_end ()) {
36+ // Input exhausted but last token wasn't End - add End token
37+ tokens.push_back (lexer.next_token ());
38+ break ;
39+ }
3140 }
32- tokens.push_back (lexer.next_token ()); // Get End token
3341 return tokens;
3442}
3543
@@ -84,7 +92,7 @@ TEST(LexerTests, KeywordsCaseInsensitive) {
8492
8593TEST (LexerTests, KeywordsVariety) {
8694 auto tokens = tokenize (" SELECT DISTINCT id, name FROM users WHERE age > 18 ORDER BY name ASC" );
87- ASSERT_GE (tokens.size (), 12 );
95+ ASSERT_GE (tokens.size (), 15 );
8896
8997 EXPECT_EQ (tokens[0 ].type (), TokenType::Select);
9098 EXPECT_EQ (tokens[1 ].type (), TokenType::Distinct);
@@ -174,11 +182,15 @@ TEST(LexerTests, EmptyString) {
174182}
175183
176184TEST (LexerTests, StringWithEscapedQuote) {
177- // Note: Lexer doesn't handle SQL-style '' escaping
178- // 'it''s' is parsed as 'it' (Error?) then 's cool'
185+ // Note: Lexer does NOT handle SQL-style '' escaping
186+ // 'it''s cool ' is parsed as two strings: 'it' and 's cool'
179187 auto tokens = tokenize (" 'it''s cool'" );
180- // Multiple strings because '' is two separate quote chars
181- ASSERT_GE (tokens.size (), 2 );
188+ // First token is the string 'it' (lexer stops at second ')
189+ ASSERT_EQ (tokens.size (), 3 ); // 'it', 's cool', End
190+ EXPECT_EQ (tokens[0 ].type (), TokenType::String);
191+ EXPECT_EQ (tokens[0 ].as_string (), " it" );
192+ EXPECT_EQ (tokens[1 ].type (), TokenType::String);
193+ EXPECT_EQ (tokens[1 ].as_string (), " s cool" );
182194}
183195
184196// ============= Operator Tests =============
0 commit comments