Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions cpp/src/parquet/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
#include "parquet/metadata.h"

#include <algorithm>
#include <cerrno>
#include <cinttypes>
#include <cstdlib>
#include <limits>
#include <memory>
#include <ostream>
#include <random>
Expand Down Expand Up @@ -1274,6 +1277,24 @@ ApplicationVersion::ApplicationVersion(std::string application, int major, int m
: application_(std::move(application)), version{major, minor, patch, "", "", ""} {}

namespace {
// Parse a digit-only ASCII string into an int, saturating to INT_MAX on
// overflow. The version components in a file's `created_by` string are
// attacker-controlled, and std::atoi has undefined behavior when the
// converted value falls outside the range of int (see [c.strtoint]).
int ParseUnsignedVersionComponent(const std::string& s) {
if (s.empty()) {
return 0;
}
errno = 0;
char* end = nullptr;
unsigned long value = std::strtoul(s.c_str(), &end, 10);
if (errno == ERANGE ||
value > static_cast<unsigned long>(std::numeric_limits<int>::max())) {
return std::numeric_limits<int>::max();
}
return static_cast<int>(value);
}

// Parse the application version format and set parsed values to
// ApplicationVersion.
//
Expand Down Expand Up @@ -1441,7 +1462,7 @@ class ApplicationVersionParser {
}
auto version_major_string = version_string_.substr(
version_major_start, version_major_end - version_major_start);
application_version_.version.major = atoi(version_major_string.c_str());
application_version_.version.major = ParseUnsignedVersionComponent(version_major_string);
return true;
}

Expand All @@ -1466,7 +1487,7 @@ class ApplicationVersionParser {
}
auto version_minor_string = version_string_.substr(
version_minor_start, version_minor_end - version_minor_start);
application_version_.version.minor = atoi(version_minor_string.c_str());
application_version_.version.minor = ParseUnsignedVersionComponent(version_minor_string);
return true;
}

Expand All @@ -1484,7 +1505,7 @@ class ApplicationVersionParser {
}
auto version_patch_string = version_string_.substr(
version_patch_start, version_patch_end - version_patch_start);
application_version_.version.patch = atoi(version_patch_string.c_str());
application_version_.version.patch = ParseUnsignedVersionComponent(version_patch_string);
version_parsing_position_ = version_patch_end;
return true;
}
Expand Down
28 changes: 28 additions & 0 deletions cpp/src/parquet/metadata_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include "parquet/metadata.h"

#include <limits>

#include <gtest/gtest.h>

#include "arrow/util/key_value_metadata.h"
Expand Down Expand Up @@ -731,6 +733,32 @@ TEST(ApplicationVersion, VersionNoUnknownBuildInfoPreRelease) {
ASSERT_EQ("cd-cdh5.5.0", version.version.build_info);
}

TEST(ApplicationVersion, VersionComponentOverflow) {
// Version components in `created_by` are attacker-controlled. std::atoi
// exhibits undefined behavior when the converted value overflows int, so
// the parser must clamp instead of calling atoi.
ApplicationVersion version(
"parquet-mr version 99999999999999999999.88888888888888888888."
"77777777777777777777 (build abcd)");

ASSERT_EQ("parquet-mr", version.application_);
ASSERT_EQ("abcd", version.build_);
ASSERT_EQ(std::numeric_limits<int>::max(), version.version.major);
ASSERT_EQ(std::numeric_limits<int>::max(), version.version.minor);
ASSERT_EQ(std::numeric_limits<int>::max(), version.version.patch);

// Boundary cases: INT_MAX is representable, INT_MAX+1 saturates.
ApplicationVersion at_max("parquet-mr version 2147483647.2147483647.2147483647");
ASSERT_EQ(std::numeric_limits<int>::max(), at_max.version.major);
ASSERT_EQ(std::numeric_limits<int>::max(), at_max.version.minor);
ASSERT_EQ(std::numeric_limits<int>::max(), at_max.version.patch);

ApplicationVersion just_over("parquet-mr version 2147483648.2147483648.2147483648");
ASSERT_EQ(std::numeric_limits<int>::max(), just_over.version.major);
Comment on lines +750 to +757
ASSERT_EQ(std::numeric_limits<int>::max(), just_over.version.minor);
ASSERT_EQ(std::numeric_limits<int>::max(), just_over.version.patch);
}

TEST(ApplicationVersion, FullWithSpaces) {
ApplicationVersion version(
" parquet-mr \t version \v 1.5.3ab-cdh5.5.0+cd \r (build \n abcd \f) ");
Expand Down
Loading