Skip to content

Commit 4470589

Browse files
Merge pull request #73 from GalaxP/main
Clickhouse - nullable types
2 parents cb6b392 + 6990f81 commit 4470589

6 files changed

Lines changed: 86 additions & 46 deletions

File tree

modules/clickhouse/src/config.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,22 @@ static void parseColumns(const YAML::Node& columnsNode, Config& config)
110110
Config::Column column;
111111
size_t const spacePos = colValue.find(' ');
112112

113-
std::string const type = colValue.substr(0, spacePos);
113+
std::string type = colValue.substr(0, spacePos);
114114
std::string name = colValue.substr(spacePos + 1);
115115

116+
// Check for ! suffix indicating non-nullable column
117+
column.nullable = true;
118+
if (!type.empty() && type.back() == '!') {
119+
column.nullable = false;
120+
type.pop_back(); // Remove the '!' suffix
121+
}
122+
116123
try {
117124
column.type = g_string_to_columntype.at(type);
118125

119126
} catch (std::out_of_range& ex) {
120127
std::stringstream sstream;
121-
sstream << "Incorrect column type: " << colValue.substr(0, spacePos);
128+
sstream << "Incorrect column type: " << type;
122129
throw std::runtime_error(sstream.str());
123130
}
124131

modules/clickhouse/src/config.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ struct Config {
7979
std::string name; ///< column name
8080
ColumnType type; ///< column type
8181
ur_field_id_t fieldID; ///< column unirec id
82+
bool nullable = true; ///< whether the column accepts NULL values (default: true)
8283
};
8384

8485
/**

modules/clickhouse/src/datatype.cpp

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -593,23 +593,31 @@ static std::shared_ptr<clickhouse::Column> makeArrColumn(ColumnType type)
593593
return column;
594594
}
595595

596-
static std::shared_ptr<clickhouse::Column> makeNonArrColumn(ColumnType type)
596+
static std::shared_ptr<clickhouse::Column> makeNonArrColumn(ColumnType type, bool nullable)
597597
{
598598
std::shared_ptr<clickhouse::Column> column;
599-
visit(type, [&](auto traits) {
600-
using ColType = clickhouse::ColumnNullableT<typename decltype(traits)::ColumnType>;
601-
column = std::make_shared<ColType>();
602-
});
599+
600+
if (nullable) {
601+
visit(type, [&](auto traits) {
602+
using ColType = clickhouse::ColumnNullableT<typename decltype(traits)::ColumnType>;
603+
column = std::make_shared<ColType>();
604+
});
605+
} else {
606+
visit(type, [&](auto traits) {
607+
using ColType = typename decltype(traits)::ColumnType;
608+
column = std::make_shared<ColType>();
609+
});
610+
}
603611

604612
return column;
605613
}
606614

607-
std::shared_ptr<clickhouse::Column> makeColumn(ColumnType type)
615+
std::shared_ptr<clickhouse::Column> makeColumn(ColumnType type, bool nullable)
608616
{
609617
if (isArr(type)) {
610618
return makeArrColumn(type);
611619
}
612-
return makeNonArrColumn(type);
620+
return makeNonArrColumn(type, nullable);
613621
}
614622

615623
GetterFn makeGetter(ColumnType type)
@@ -644,40 +652,68 @@ static ColumnWriterFn makeArrColumnwriter(ColumnType type)
644652
return columnwriter;
645653
}
646654

647-
static ColumnWriterFn makeNonArrColumnwriter(ColumnType type)
655+
static ColumnWriterFn makeNonArrColumnwriter(ColumnType type, bool nullable)
648656
{
649657
ColumnWriterFn columnwriter;
650658

651-
visitNonArr(type, [&](auto traits) {
652-
columnwriter = [](ValueVariant* value, clickhouse::Column& column) {
653-
using ColumnType = clickhouse::ColumnNullableT<typename decltype(traits)::ColumnType>;
654-
using ValueType = std::invoke_result_t<
655-
decltype(decltype(traits)::GETTER),
656-
Nemea::UnirecRecordView&,
657-
ur_field_type_t>;
658-
auto* col = dynamic_cast<ColumnType*>(&column);
659-
if (!value) {
660-
col->Append(std::nullopt);
661-
} else {
662-
col->Append(std::get<ValueType>(*value));
663-
}
664-
};
665-
});
659+
if (nullable) {
660+
visitNonArr(type, [&](auto traits) {
661+
columnwriter = [](ValueVariant* value, clickhouse::Column& column) {
662+
using ColumnType = clickhouse::ColumnNullableT<typename decltype(traits)::ColumnType>;
663+
using ValueType = std::invoke_result_t<
664+
decltype(decltype(traits)::GETTER),
665+
Nemea::UnirecRecordView&,
666+
ur_field_type_t>;
667+
auto* col = dynamic_cast<ColumnType*>(&column);
668+
if (!value) {
669+
col->Append(std::nullopt);
670+
} else {
671+
col->Append(std::get<ValueType>(*value));
672+
}
673+
};
674+
});
675+
} else {
676+
visitNonArr(type, [&](auto traits) {
677+
columnwriter = [](ValueVariant* value, clickhouse::Column& column) {
678+
using ColumnType = typename decltype(traits)::ColumnType;
679+
using ValueType = std::invoke_result_t<
680+
decltype(decltype(traits)::GETTER),
681+
Nemea::UnirecRecordView&,
682+
ur_field_type_t>;
683+
auto* col = dynamic_cast<ColumnType*>(&column);
684+
if (!value) {
685+
throw std::runtime_error("NULL value for non-nullable column");
686+
}
687+
// Cast to avoid sign conversion warnings for DateTime64
688+
if constexpr (std::is_same_v<ValueType, uint64_t> && std::is_same_v<ColumnType, ColumnDateTime64<g_TIME_PRECISION>>) {
689+
col->Append(static_cast<int64_t>(std::get<ValueType>(*value)));
690+
} else {
691+
col->Append(std::get<ValueType>(*value));
692+
}
693+
};
694+
});
695+
}
666696

667697
return columnwriter;
668698
}
669699

670-
ColumnWriterFn makeColumnwriter(ColumnType type)
700+
ColumnWriterFn makeColumnwriter(ColumnType type, bool nullable)
671701
{
672702
if (isArr(type)) {
673703
return makeArrColumnwriter(type);
674704
}
675-
return makeNonArrColumnwriter(type);
705+
return makeNonArrColumnwriter(type, nullable);
676706
}
677707

678-
std::string typeToClickhouse(ColumnType type)
708+
std::string typeToClickhouse(ColumnType type, bool nullable)
679709
{
680710
std::string result;
681711
visit(type, [&](auto traits) { result = traits.CLICKHOUSE_TYPE_NAME; });
712+
713+
// Don't wrap arrays in Nullable
714+
if (nullable && !isArr(type)) {
715+
result = "Nullable(" + result + ")";
716+
}
717+
682718
return result;
683719
}

modules/clickhouse/src/datatype.hpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ struct ColumnCtx {
8282
std::string name; ///< Column name
8383
ColumnType type; ///< Column type
8484
ur_field_id_t fieldID; ///< unirec template field id
85+
bool nullable = true; ///< whether the column is nullable
8586

8687
ColumnFactoryFn columnFactory = nullptr; ///< lambda for creating columns
8788
GetterFn getter = nullptr; ///< lambda for converting unirec data to clickhouse column
@@ -121,9 +122,10 @@ const int g_TIME_PRECISION = 9;
121122
* @brief Make a ClickHouse column that is able to store values of the supplied data type
122123
*
123124
* @param type The data type
125+
* @param nullable Whether the column should be nullable
124126
* @return The ClickHouse column object
125127
*/
126-
std::shared_ptr<clickhouse::Column> makeColumn(ColumnType type);
128+
std::shared_ptr<clickhouse::Column> makeColumn(ColumnType type, bool nullable = true);
127129

128130
/**
129131
* @brief Makes a function (lambda) which converts unirec column data into clickhouse column
@@ -137,14 +139,16 @@ GetterFn makeGetter(ColumnType type);
137139
* @brief Converts Columntype into clickhouse string specification of column
138140
*
139141
* @param type The data type
142+
* @param nullable Whether the column is nullable
140143
* @return The ClickHouse column name
141144
*/
142-
ColumnWriterFn makeColumnwriter(ColumnType type);
145+
ColumnWriterFn makeColumnwriter(ColumnType type, bool nullable = true);
143146

144147
/**
145148
* @brief Converts Columntype into clickhouse string specification of column
146149
*
147150
* @param type The data type
151+
* @param nullable Whether the column is nullable
148152
* @return The ClickHouse column name
149153
*/
150-
std::string typeToClickhouse(ColumnType type);
154+
std::string typeToClickhouse(ColumnType type, bool nullable = true);

modules/clickhouse/src/inserter.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ static void ensureSchema(
124124
sstream << "CREATE TABLE " << table << "(\n";
125125
size_t columnIndex = 0;
126126
for (const auto& column : columns) {
127-
const auto& clickhouseType = typeToClickhouse(columns[columnIndex].type);
127+
const auto& clickhouseType = typeToClickhouse(columns[columnIndex].type, columns[columnIndex].nullable);
128128
sstream << " \"" << column.name << "\" " << clickhouseType
129129
<< (columnIndex < columns.size() - 1 ? "," : "") << '\n';
130130
columnIndex++;
@@ -143,18 +143,9 @@ static void ensureSchema(
143143

144144
for (size_t i = 0; i < dbColumns.size(); i++) {
145145
const auto& expectedName = columns[i].name;
146-
const auto& expectedType = typeToClickhouse(columns[i].type);
146+
const auto& expectedType = typeToClickhouse(columns[i].type, columns[i].nullable);
147147
const auto& [actual_name, actual_type] = dbColumns[i];
148148

149-
// strip Nullable(...) wrapper for comparison
150-
std::string actualBaseType = actual_type;
151-
static const std::string nullablePrefix = "Nullable(";
152-
if (actual_type.rfind(nullablePrefix, 0) == 0 && actual_type.back() == ')') {
153-
actualBaseType = actual_type.substr(
154-
nullablePrefix.size(),
155-
actual_type.size() - nullablePrefix.size() - 1);
156-
}
157-
158149
if (expectedName != actual_name) {
159150
std::stringstream sstream;
160151
sstream << "Expected column #" << i << " in table \"" << table << "\" to be named \""
@@ -163,8 +154,8 @@ static void ensureSchema(
163154
throw std::runtime_error(sstream.str());
164155
}
165156

166-
// compare expected to stripped actual type
167-
if (expectedType != actualBaseType) {
157+
// Compare expected type with actual type (exact match required for nullable vs non-nullable)
158+
if (expectedType != actual_type) {
168159
std::stringstream sstream;
169160
sstream << "Expected column #" << i << " in table \"" << table << "\" to be of type \""
170161
<< expectedType << "\" but it is \"" << actual_type << "\"\n"

modules/clickhouse/src/manager.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ static std::vector<ColumnCtx> prepareColumns(const std::vector<Config::Column>&
3131
column.name = columnCfg.name;
3232
column.type = columnCfg.type;
3333
column.fieldID = columnCfg.type;
34+
column.nullable = columnCfg.nullable;
3435

3536
column.getter = makeGetter(columnCfg.type);
36-
column.columnWriter = makeColumnwriter(columnCfg.type);
37-
column.columnFactory = [=]() { return makeColumn(columnCfg.type); };
37+
column.columnWriter = makeColumnwriter(columnCfg.type, columnCfg.nullable);
38+
column.columnFactory = [=]() { return makeColumn(columnCfg.type, columnCfg.nullable); };
3839

3940
columns.emplace_back(std::move(column));
4041
}

0 commit comments

Comments
 (0)