From 8c88dc5b8cf57b09912b836dfd81f3dfc2660da7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 21:11:39 -0500 Subject: [PATCH 1/3] maybe fix the string printing --- src/nanoarrow/common/schema.c | 169 ++++++++++++++++++++-------------- 1 file changed, 98 insertions(+), 71 deletions(-) diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c index bd9733b6e..e06ffc31d 100644 --- a/src/nanoarrow/common/schema.c +++ b/src/nanoarrow/common/schema.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -1370,47 +1371,34 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, return NANOARROW_OK; } -static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view, - char* out, int64_t n) { - const char* type_string = ArrowTypeString(schema_view->type); - switch (schema_view->type) { - case NANOARROW_TYPE_DECIMAL32: - case NANOARROW_TYPE_DECIMAL64: - case NANOARROW_TYPE_DECIMAL128: - case NANOARROW_TYPE_DECIMAL256: - return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string, - schema_view->decimal_precision, schema_view->decimal_scale); - case NANOARROW_TYPE_TIMESTAMP: - return snprintf(out, n, "%s('%s', '%s')", type_string, - ArrowTimeUnitString(schema_view->time_unit), schema_view->timezone); - case NANOARROW_TYPE_TIME32: - case NANOARROW_TYPE_TIME64: - case NANOARROW_TYPE_DURATION: - return snprintf(out, n, "%s('%s')", type_string, - ArrowTimeUnitString(schema_view->time_unit)); - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - case NANOARROW_TYPE_FIXED_SIZE_LIST: - return snprintf(out, n, "%s(%" PRId32 ")", type_string, schema_view->fixed_size); - case NANOARROW_TYPE_SPARSE_UNION: - case NANOARROW_TYPE_DENSE_UNION: - return snprintf(out, n, "%s([%s])", type_string, schema_view->union_type_ids); - default: - return snprintf(out, n, "%s", type_string); +static inline void ArrowSchemaToStringSNPrintF(char** out, int64_t* n_remaining, + int64_t* n_chars, const char* fmt, ...) { + // With _FORTIFY_SOURCE=3, vsnprintf on a null output warns, so ensure we never pass + // a NULL to vsnprintf + char tmp_not_null[1]; + char* out_not_null; + size_t out_not_null_size; + if (*out == NULL) { + out_not_null = tmp_not_null; + out_not_null_size = sizeof(tmp_not_null); + } else { + out_not_null = *out; + out_not_null_size = (size_t)*n_remaining; } -} -// Helper for bookkeeping to emulate sprintf()-like behaviour spread -// among multiple sprintf calls. -static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, - int64_t* n_remaining, int64_t* n_chars) { + va_list args; + va_start(args, fmt); + int n_chars_last = vsnprintf(out_not_null, out_not_null_size, fmt, args); + va_end(args); + // In the unlikely snprintf() returning a negative value (encoding error), // ensure the result won't cause an out-of-bounds access. if (n_chars_last < 0) { n_chars_last = 0; } - *n_chars += n_chars_last; *n_remaining -= n_chars_last; + *n_chars += n_chars_last; // n_remaining is never less than 0 if (*n_remaining < 0) { @@ -1423,93 +1411,132 @@ static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, } } -int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, - char recursive) { +static void ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view, + char** out, int64_t* n_remaining, + int64_t* n_chars) { + const char* type_string = ArrowTypeString(schema_view->type); + switch (schema_view->type) { + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: + case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_DECIMAL256: + ArrowSchemaToStringSNPrintF( + out, n_remaining, n_chars, "%s(%" PRId32 ", %" PRId32 ")", type_string, + schema_view->decimal_precision, schema_view->decimal_scale); + return; + case NANOARROW_TYPE_TIMESTAMP: + ArrowSchemaToStringSNPrintF( + out, n_remaining, n_chars, "%s('%s', '%s')", type_string, + ArrowTimeUnitString(schema_view->time_unit), schema_view->timezone); + return; + case NANOARROW_TYPE_TIME32: + case NANOARROW_TYPE_TIME64: + case NANOARROW_TYPE_DURATION: + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "%s('%s')", type_string, + ArrowTimeUnitString(schema_view->time_unit)); + return; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + case NANOARROW_TYPE_FIXED_SIZE_LIST: + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "%s(%" PRId32 ")", + type_string, schema_view->fixed_size); + return; + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_DENSE_UNION: + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "%s([%s])", type_string, + schema_view->union_type_ids); + return; + default: + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "%s", type_string); + return; + } +} + +static void ArrowSchemaToStringInternal(const struct ArrowSchema* schema, char** out, + int64_t* n_remaining, int64_t* n_chars, + char recursive) { if (schema == NULL) { - return snprintf(out, n, "[invalid: pointer is null]"); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "[invalid: pointer is null]"); + return; } if (schema->release == NULL) { - return snprintf(out, n, "[invalid: schema is released]"); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, + "[invalid: schema is released]"); + return; } struct ArrowSchemaView schema_view; struct ArrowError error; if (ArrowSchemaViewInit(&schema_view, schema, &error) != NANOARROW_OK) { - return snprintf(out, n, "[invalid: %s]", ArrowErrorMessage(&error)); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "[invalid: %s]", + ArrowErrorMessage(&error)); + return; } // Extension type and dictionary should include both the top-level type // and the storage type. int is_extension = schema_view.extension_name.size_bytes > 0; int is_dictionary = schema->dictionary != NULL; - int64_t n_chars = 0; - int64_t n_chars_last = 0; // Uncommon but not technically impossible that both are true if (is_extension && is_dictionary) { - n_chars_last = snprintf( - out, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.size_bytes, - schema_view.extension_name.data, ArrowTypeString(schema_view.storage_type)); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "%.*s{dictionary(%s)<", + (int)schema_view.extension_name.size_bytes, + schema_view.extension_name.data, + ArrowTypeString(schema_view.storage_type)); } else if (is_extension) { - n_chars_last = snprintf(out, n, "%.*s{", (int)schema_view.extension_name.size_bytes, - schema_view.extension_name.data); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "%.*s{", + (int)schema_view.extension_name.size_bytes, + schema_view.extension_name.data); } else if (is_dictionary) { - n_chars_last = - snprintf(out, n, "dictionary(%s)<", ArrowTypeString(schema_view.storage_type)); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "dictionary(%s)<", + ArrowTypeString(schema_view.storage_type)); } - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); - if (!is_dictionary) { - n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n); + ArrowSchemaTypeToStringInternal(&schema_view, out, n_remaining, n_chars); } else { - n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive); + ArrowSchemaToStringInternal(schema->dictionary, out, n_remaining, n_chars, recursive); } - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); - if (recursive && schema->format[0] == '+') { - n_chars_last = snprintf(out, n, "<"); - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "<"); for (int64_t i = 0; i < schema->n_children; i++) { if (i > 0) { - n_chars_last = snprintf(out, n, ", "); - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, ", "); } // ArrowSchemaToStringInternal() will validate the child and print the error, // but we need the name first if (schema->children[i] != NULL && schema->children[i]->release != NULL && schema->children[i]->name != NULL) { - n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name); - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, + "%s: ", schema->children[i]->name); } - n_chars_last = ArrowSchemaToString(schema->children[i], out, n, recursive); - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + ArrowSchemaToStringInternal(schema->children[i], out, n_remaining, n_chars, + recursive); } - n_chars_last = snprintf(out, n, ">"); - ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, ">"); } if (is_extension && is_dictionary) { - n_chars += snprintf(out, n, ">}"); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, ">}"); } else if (is_extension) { - n_chars += snprintf(out, n, "}"); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, "}"); } else if (is_dictionary) { - n_chars += snprintf(out, n, ">"); + ArrowSchemaToStringSNPrintF(out, n_remaining, n_chars, ">"); } +} - // Ensure that we always return a positive result - if (n_chars > 0) { - return n_chars; - } else { - return 0; - } +int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, + char recursive) { + int64_t n_chars = 0; + ArrowSchemaToStringInternal(schema, &out, &n, &n_chars, recursive); + return n_chars; } ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, From 6511d09cdd84a61fd73a52c5773e1ebf2df9ccb8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 21:17:50 -0500 Subject: [PATCH 2/3] add a dcheck --- src/nanoarrow/common/schema.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c index e06ffc31d..f1b3d87ba 100644 --- a/src/nanoarrow/common/schema.c +++ b/src/nanoarrow/common/schema.c @@ -1534,6 +1534,8 @@ static void ArrowSchemaToStringInternal(const struct ArrowSchema* schema, char** int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, char recursive) { + NANOARROW_DCHECK(n > 0); + NANOARROW_DCHECK(out != NULL || n == 0); int64_t n_chars = 0; ArrowSchemaToStringInternal(schema, &out, &n, &n_chars, recursive); return n_chars; From 99618838a17efdbd1b387c186f6698a255673bae Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Jun 2026 21:27:29 -0500 Subject: [PATCH 3/3] fix guard --- src/nanoarrow/common/schema.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c index f1b3d87ba..76d79b01c 100644 --- a/src/nanoarrow/common/schema.c +++ b/src/nanoarrow/common/schema.c @@ -1534,7 +1534,7 @@ static void ArrowSchemaToStringInternal(const struct ArrowSchema* schema, char** int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, char recursive) { - NANOARROW_DCHECK(n > 0); + NANOARROW_DCHECK(n >= 0); NANOARROW_DCHECK(out != NULL || n == 0); int64_t n_chars = 0; ArrowSchemaToStringInternal(schema, &out, &n, &n_chars, recursive);