2424
2525#include "oracle_parser/ora_keywords.h"
2626
27+ #include "mb/pg_wchar.h"
28+
2729
2830static PQExpBuffer oraDefaultGetLocalPQExpBuffer (void );
2931
@@ -64,12 +66,13 @@ oraDefaultGetLocalPQExpBuffer(void)
6466 * since we re-use the same return buffer each time.
6567 */
6668const char *
67- ora_fmtId (const char * rawid )
69+ ora_fmtId (const char * rawid , int encoding )
6870{
6971 PQExpBuffer id_return = oraDefaultGetLocalPQExpBuffer ();
7072
7173 const char * cp ;
7274 bool need_quotes = false;
75+ size_t remaining = strlen (rawid );
7376
7477 /*
7578 * These checks need to match the identifier production in scan.l. Don't
@@ -85,7 +88,8 @@ ora_fmtId(const char *rawid)
8588 else if (rawid [0 ] != '"' )
8689 {
8790 /* otherwise check the entire string */
88- for (cp = rawid ; * cp ; cp ++ )
91+ cp = rawid ;
92+ for (size_t i = 0 ; i < remaining ; i ++ , cp ++ )
8993 {
9094 if (!((* cp >= 'a' && * cp <= 'z' )
9195 || (* cp >= '0' && * cp <= '9' )
@@ -121,16 +125,88 @@ ora_fmtId(const char *rawid)
121125 else
122126 {
123127 appendPQExpBufferChar (id_return , '"' );
124- for (cp = rawid ; * cp ; cp ++ )
128+
129+ cp = & rawid [0 ];
130+ while (remaining > 0 )
125131 {
126- /*
127- * Did we find a double-quote in the string? Then make this a
128- * double double-quote per SQL99. Before, we put in a
129- * backslash/double-quote pair. - thomas 2000-08-05
130- */
131- if (* cp == '"' )
132- appendPQExpBufferChar (id_return , '"' );
133- appendPQExpBufferChar (id_return , * cp );
132+ int charlen ;
133+
134+ /* Fast path for plain ASCII */
135+ if (!IS_HIGHBIT_SET (* cp ))
136+ {
137+ /*
138+ * Did we find a double-quote in the string? Then make this a
139+ * double double-quote per SQL99. Before, we put in a
140+ * backslash/double-quote pair. - thomas 2000-08-05
141+ */
142+ if (* cp == '"' )
143+ appendPQExpBufferChar (id_return , '"' );
144+ appendPQExpBufferChar (id_return , * cp );
145+ remaining -- ;
146+ cp ++ ;
147+ continue ;
148+ }
149+
150+ /* Slow path for possible multibyte characters */
151+ charlen = pg_encoding_mblen (encoding , cp );
152+
153+ if (remaining < charlen )
154+ {
155+ /*
156+ * If the character is longer than the available input,
157+ * replace the string with an invalid sequence. The invalid
158+ * sequence ensures that the escaped string will trigger an
159+ * error on the server-side, even if we can't directly report
160+ * an error here.
161+ */
162+ enlargePQExpBuffer (id_return , 2 );
163+ pg_encoding_set_invalid (encoding ,
164+ id_return -> data + id_return -> len );
165+ id_return -> len += 2 ;
166+ id_return -> data [id_return -> len ] = '\0' ;
167+
168+ /* there's no more input data, so we can stop */
169+ break ;
170+ }
171+ else if (pg_encoding_verifymbchar (encoding , cp , charlen ) == -1 )
172+ {
173+ /*
174+ * Multibyte character is invalid. It's important to verify
175+ * that as invalid multi-byte characters could e.g. be used to
176+ * "skip" over quote characters, e.g. when parsing
177+ * character-by-character.
178+ *
179+ * Replace the bytes corresponding to the invalid character
180+ * with an invalid sequence, for the same reason as above.
181+ *
182+ * It would be a bit faster to verify the whole string the
183+ * first time we encounter a set highbit, but this way we can
184+ * replace just the invalid characters, which probably makes
185+ * it easier for users to find the invalidly encoded portion
186+ * of a larger string.
187+ */
188+ enlargePQExpBuffer (id_return , 2 );
189+ pg_encoding_set_invalid (encoding ,
190+ id_return -> data + id_return -> len );
191+ id_return -> len += 2 ;
192+ id_return -> data [id_return -> len ] = '\0' ;
193+
194+ /*
195+ * Copy the rest of the string after the invalid multi-byte
196+ * character.
197+ */
198+ remaining -= charlen ;
199+ cp += charlen ;
200+ }
201+ else
202+ {
203+ for (int i = 0 ; i < charlen ; i ++ )
204+ {
205+ appendPQExpBufferChar (id_return , * cp );
206+ remaining -- ;
207+ cp ++ ;
208+ }
209+ }
134210 }
135211 appendPQExpBufferChar (id_return , '"' );
136212 }
0 commit comments