44 * @author Michal Vasko <mvasko@cesnet.cz>
55 * @brief Generic XML parser implementation for libyang
66 *
7- * Copyright (c) 2015 - 2021 CESNET, z.s.p.o.
7+ * Copyright (c) 2015 - 2026 CESNET, z.s.p.o.
88 *
99 * This source code is licensed under BSD 3-Clause License (the "License").
1010 * You may not use this file except in compliance with the License.
4444 ly_in_skip(c->in, 1); \
4545 }
4646
47- static LY_ERR lyxml_next_attr_content (struct lyxml_ctx * xmlctx , const char * * value , size_t * value_len , ly_bool * ws_only ,
47+ static LY_ERR lyxml_next_attr_content (struct lyxml_ctx * xmlctx , const char * * value , uint32_t * value_len , ly_bool * ws_only ,
4848 ly_bool * dynamic );
4949
5050/**
@@ -57,9 +57,9 @@ static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **val
5757 * @param[in] sectname Section name to refer in error message.
5858 */
5959LY_ERR
60- skip_section (struct lyxml_ctx * xmlctx , const char * delim , size_t delim_len , const char * sectname )
60+ skip_section (struct lyxml_ctx * xmlctx , const char * delim , uint32_t delim_len , const char * sectname )
6161{
62- size_t i ;
62+ uint32_t i ;
6363 register const char * input , * a , * b ;
6464 uint64_t parsed = 0 , newlines = 0 ;
6565
@@ -154,7 +154,7 @@ lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char
154154 * @return LY_ERR values.
155155 */
156156LY_ERR
157- lyxml_ns_add (struct lyxml_ctx * xmlctx , const char * prefix , size_t prefix_len , char * uri )
157+ lyxml_ns_add (struct lyxml_ctx * xmlctx , const char * prefix , uint32_t prefix_len , char * uri )
158158{
159159 LY_ERR rc = LY_SUCCESS ;
160160 struct lyxml_ns * ns ;
@@ -253,7 +253,7 @@ lyxml_ns_rm(struct lyxml_ctx *xmlctx)
253253}
254254
255255const struct lyxml_ns *
256- lyxml_ns_get (const struct ly_set * ns_set , const char * prefix , size_t prefix_len )
256+ lyxml_ns_get (const struct ly_set * ns_set , const char * prefix , uint32_t prefix_len )
257257{
258258 struct lyxml_ns * ns ;
259259 uint32_t u ;
@@ -292,7 +292,7 @@ lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
292292{
293293 const struct ly_ctx * ctx = xmlctx -> ctx ; /* shortcut */
294294 const char * endtag , * sectname ;
295- size_t endtag_len ;
295+ uint32_t endtag_len ;
296296
297297 while (1 ) {
298298 ign_xmlws (xmlctx );
@@ -351,7 +351,7 @@ lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx)
351351 * @return LY_ERR value.
352352 */
353353static LY_ERR
354- lyxml_parse_qname (struct lyxml_ctx * xmlctx , const char * * prefix , size_t * prefix_len , const char * * name , size_t * name_len )
354+ lyxml_parse_qname (struct lyxml_ctx * xmlctx , const char * * prefix , uint32_t * prefix_len , const char * * name , uint32_t * name_len )
355355{
356356 const char * start , * end ;
357357
@@ -361,13 +361,23 @@ lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_
361361 LY_CHECK_RET (lyxml_parse_identifier (xmlctx , & start , & end ));
362362 if (end [0 ] == ':' ) {
363363 /* we have prefixed identifier */
364+ if (end - start > UINT32_MAX ) {
365+ LOGVAL (xmlctx -> ctx , NULL , LYVE_SYNTAX , "XML qualified name prefix too long." );
366+ return LY_EINVAL ;
367+ }
368+
364369 * prefix = start ;
365370 * prefix_len = end - start ;
366371
367372 move_input (xmlctx , 1 );
368373 LY_CHECK_RET (lyxml_parse_identifier (xmlctx , & start , & end ));
369374 }
370375
376+ if (end - start > UINT32_MAX ) {
377+ LOGVAL (xmlctx -> ctx , NULL , LYVE_SYNTAX , "XML qualified name too long." );
378+ return LY_EINVAL ;
379+ }
380+
371381 * name = start ;
372382 * name_len = end - start ;
373383 return LY_SUCCESS ;
@@ -386,8 +396,8 @@ lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_
386396 * @return LY_ERR value.
387397 */
388398static LY_ERR
389- lyxml_parse_value_use_buf (const struct ly_ctx * ctx , const char * * in , size_t * offset , size_t need_space , char * * buf ,
390- size_t * len , size_t * size )
399+ lyxml_parse_value_use_buf (const struct ly_ctx * ctx , const char * * in , uint32_t * offset , uint32_t need_space , char * * buf ,
400+ uint32_t * len , uint32_t * size )
391401{
392402#define BUFSIZE 24
393403#define BUFSIZE_STEP 128
@@ -399,6 +409,12 @@ lyxml_parse_value_use_buf(const struct ly_ctx *ctx, const char **in, size_t *off
399409 * size = BUFSIZE ;
400410 }
401411
412+ /* overflow check */
413+ if (* len + * offset + need_space < * size ) {
414+ LOGVAL (ctx , NULL , LYVE_SYNTAX , "XML value too long." );
415+ return LY_EINVAL ;
416+ }
417+
402418 /* allocate needed space */
403419 while (* len + * offset + need_space >= * size ) {
404420 * buf = ly_realloc (* buf , * size + BUFSIZE_STEP );
@@ -432,14 +448,21 @@ lyxml_parse_value_use_buf(const struct ly_ctx *ctx, const char **in, size_t *off
432448 * @return LY_ERR value.
433449 */
434450static LY_ERR
435- lyxml_parse_value (struct lyxml_ctx * xmlctx , char endchar , char * * value , size_t * length , ly_bool * ws_only , ly_bool * dynamic )
451+ lyxml_parse_value (struct lyxml_ctx * xmlctx , char endchar , char * * value , uint32_t * length , ly_bool * ws_only , ly_bool * dynamic )
436452{
453+ #define ADD_CHECK_OVERFLOW (var , num , err_label ) \
454+ if (var + num < var) { \
455+ LOGVAL(ctx, NULL, LYVE_SYNTAX, "XML value too long."); \
456+ goto err_label; \
457+ } \
458+ var += num;
459+
437460 const struct ly_ctx * ctx = xmlctx -> ctx ; /* shortcut */
438461 const char * in = xmlctx -> in -> current , * start , * in_aux , * p ;
439462 char * buf = NULL ;
440- size_t offset ; /* read offset in input buffer */
441- size_t len ; /* length of the output string (write offset in output buffer) */
442- size_t size = 0 ; /* size of the output buffer */
463+ uint32_t offset ; /* read offset in input buffer */
464+ uint32_t len ; /* length of the output string (write offset in output buffer) */
465+ uint32_t size = 0 ; /* size of the output buffer */
443466 uint32_t n , u ;
444467 ly_bool ws = 1 ;
445468
@@ -460,7 +483,7 @@ lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *
460483 * (one-char) entities and character references */
461484 LY_CHECK_RET (lyxml_parse_value_use_buf (ctx , & in , & offset , 4 , & buf , & len , & size ));
462485
463- ++ offset ;
486+ ADD_CHECK_OVERFLOW ( offset , 1 , error ) ;
464487 if (in [offset ] != '#' ) {
465488 /* entity reference - only predefined references are supported */
466489 if (!strncmp (& in [offset ], "lt;" , ly_strlen_const ("lt;" ))) {
@@ -488,7 +511,7 @@ lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *
488511 } else {
489512 p = & in [offset - 1 ];
490513 /* character reference */
491- ++ offset ;
514+ ADD_CHECK_OVERFLOW ( offset , 1 , error ) ;
492515 if (isdigit (in [offset ])) {
493516 for (n = 0 ; isdigit (in [offset ]); offset ++ ) {
494517 n = (LY_BASE_DEC * n ) + (in [offset ] - '0' );
@@ -514,7 +537,7 @@ lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *
514537 LOGVAL (ctx , NULL , LY_VCODE_INSTREXP , LY_VCODE_INSTREXP_len (& in [offset ]), & in [offset ], ";" );
515538 goto error ;
516539 }
517- ++ offset ;
540+ ADD_CHECK_OVERFLOW ( offset , 1 , error ) ;
518541 if (ly_pututf8 (& buf [len ], n , & u )) {
519542 LOGVAL (ctx , NULL , LYVE_SYNTAX , "Invalid character reference \"%.12s\" (0x%08" PRIx32 ")." , p , n );
520543 goto error ;
@@ -585,7 +608,7 @@ lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *
585608 /* continue */
586609 in_aux = & in [offset ];
587610 LY_CHECK_ERR_GOTO (ly_getutf8 (& in_aux , & n , & u ), LOGVAL (ctx , NULL , LY_VCODE_INCHAR , in [offset ]), error );
588- offset += u ;
611+ ADD_CHECK_OVERFLOW ( offset , u , error ) ;
589612 }
590613 }
591614
@@ -623,7 +646,7 @@ lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *
623646 * @return LY_ERR value.
624647 */
625648static LY_ERR
626- lyxml_close_element (struct lyxml_ctx * xmlctx , const char * prefix , size_t prefix_len , const char * name , size_t name_len ,
649+ lyxml_close_element (struct lyxml_ctx * xmlctx , const char * prefix , uint32_t prefix_len , const char * name , uint32_t name_len ,
627650 ly_bool empty )
628651{
629652 struct lyxml_elem * e ;
@@ -681,14 +704,14 @@ lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_
681704 * @return LY_ERR value.
682705 */
683706static LY_ERR
684- lyxml_open_element (struct lyxml_ctx * xmlctx , const char * prefix , size_t prefix_len , const char * name , size_t name_len )
707+ lyxml_open_element (struct lyxml_ctx * xmlctx , const char * prefix , uint32_t prefix_len , const char * name , uint32_t name_len )
685708{
686709 LY_ERR ret = LY_SUCCESS ;
687710 struct lyxml_elem * e ;
688711 const char * prev_input ;
689712 uint64_t prev_line ;
690713 char * value ;
691- size_t value_len ;
714+ uint32_t value_len ;
692715 ly_bool ws_only , dynamic , is_ns ;
693716 uint32_t c , parsed ;
694717
@@ -768,7 +791,7 @@ lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_l
768791 * @return LY_ERR value.
769792 */
770793static LY_ERR
771- lyxml_next_attr_content (struct lyxml_ctx * xmlctx , const char * * value , size_t * value_len , ly_bool * ws_only , ly_bool * dynamic )
794+ lyxml_next_attr_content (struct lyxml_ctx * xmlctx , const char * * value , uint32_t * value_len , ly_bool * ws_only , ly_bool * dynamic )
772795{
773796 char quot ;
774797
@@ -823,12 +846,12 @@ lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *va
823846 * @return LY_ERR value.
824847 */
825848static LY_ERR
826- lyxml_next_attribute (struct lyxml_ctx * xmlctx , const char * * prefix , size_t * prefix_len , const char * * name , size_t * name_len )
849+ lyxml_next_attribute (struct lyxml_ctx * xmlctx , const char * * prefix , uint32_t * prefix_len , const char * * name , uint32_t * name_len )
827850{
828851 const char * in ;
829852 char * value ;
830853 uint32_t c , parsed ;
831- size_t value_len ;
854+ uint32_t value_len ;
832855 ly_bool ws_only , dynamic ;
833856
834857 /* skip WS */
@@ -879,7 +902,7 @@ lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *pref
879902 * @return LY_ERR value.
880903 */
881904static LY_ERR
882- lyxml_next_element (struct lyxml_ctx * xmlctx , const char * * prefix , size_t * prefix_len , const char * * name , size_t * name_len ,
905+ lyxml_next_element (struct lyxml_ctx * xmlctx , const char * * prefix , uint32_t * prefix_len , const char * * name , uint32_t * name_len ,
883906 ly_bool * closing )
884907{
885908 /* skip WS until EOF or after opening tag '<' */
@@ -1089,7 +1112,7 @@ lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next)
10891112{
10901113 LY_ERR ret = LY_SUCCESS ;
10911114 const char * prefix , * name , * prev_input ;
1092- size_t prefix_len , name_len ;
1115+ uint32_t prefix_len , name_len ;
10931116 ly_bool closing ;
10941117
10951118 prev_input = xmlctx -> in -> current ;
0 commit comments