Skip to content

Commit 6a9197d

Browse files
committed
Refactor psqlplus_downcase_identifier to include encoding parameter for better handling of multi-byte characters
1 parent 7bcdd6f commit 6a9197d

1 file changed

Lines changed: 32 additions & 7 deletions

File tree

src/bin/psql/psqlplusscan.l

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ static const int NumKeywords = lengthof(psqlplus_keywords);
5656

5757
static const PsqlScanKeyword *PsqlplusKeywordLookup(const char *text);
5858
static int process_integer_literal(const char *token, YYSTYPE *lval);
59-
static char *psqlplus_downcase_identifier(const char *str, int len);
59+
static char *psqlplus_downcase_identifier(const char *str, int len, int encoding);
6060
static void startlit(yyscan_t yyscanner);
6161
static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
6262
static void addlit(char *ytext, int yleng, yyscan_t yyscanner);
@@ -358,7 +358,8 @@ xlinside [^ ]+
358358
return keyword->value;
359359
}
360360

361-
ident = psqlplus_downcase_identifier(yytext, yyleng);
361+
ident = psqlplus_downcase_identifier(yytext, yyleng,
362+
cur_state->encoding);
362363
yylval->str = ident;
363364
cur_state->token_count++;
364365
return IDENT;
@@ -454,18 +455,42 @@ psqlplus_scanner_finish(yyscan_t yyscanner)
454455
* Do downcasing and returns a palloc'd string.
455456
*/
456457
static char *
457-
psqlplus_downcase_identifier(const char *str, int len)
458+
psqlplus_downcase_identifier(const char *str, int len, int encoding)
458459
{
459460
char *result;
460-
int i;
461+
int i = 0;
462+
int out = 0;
463+
bool enc_is_single_byte = (pg_encoding_max_length(encoding) == 1);
461464

462465
result = (char *) pg_malloc(len + 1);
463-
for (i = 0; i < len; i++)
466+
467+
while (i < len)
464468
{
465-
result[i] = (char) pg_tolower((unsigned char) str[i]);
469+
unsigned char ch = (unsigned char) str[i];
470+
471+
if (!enc_is_single_byte && IS_HIGHBIT_SET(ch))
472+
{
473+
int mblen = pg_encoding_mblen(encoding, str + i);
474+
475+
if (mblen <= 0 || i + mblen > len)
476+
mblen = 1;
477+
memcpy(result + out, str + i, mblen);
478+
out += mblen;
479+
i += mblen;
480+
}
481+
else
482+
{
483+
if (ch >= 'A' && ch <= 'Z')
484+
ch += 'a' - 'A';
485+
else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
486+
ch = tolower(ch);
487+
488+
result[out++] = (char) ch;
489+
i++;
490+
}
466491
}
467492

468-
result[i] = '\0';
493+
result[out] = '\0';
469494

470495
return result;
471496
}

0 commit comments

Comments
 (0)