Skip to content

Commit 766e02b

Browse files
authored
Merge pull request #58 from CESNET/clickhouse
Clickhouse module
2 parents 098327d + 76d1cb5 commit 766e02b

21 files changed

Lines changed: 2348 additions & 2 deletions

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ This repository contains basic modules of the [NEMEA
44
system](https://github.com/CESNET/Nemea). The modules and their
55
functionality/purposes are:
66

7-
* [ListDetector](modules/listdetector/): forwards records that match rules list.
7+
* [Clickhouse](modules/clickhouse/): converts unirec into clickhouse DB.
8+
* [Deduplicator](modules/deduplicator/): omit duplicate records.
9+
* [ListDetector](modules/listDetector/): forwards records that match rules list.
810
* [Sampler](modules/sampler/): sample records at the given rate.
911
* [Telemetry](modules/telemetry/): provides unirec telemetry of the input interface.
10-
* [Deduplicator](modules/deduplicator/): omit duplicate records.

common/external/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ include(spdlog.cmake)
88
include(rapidcsv.cmake)
99
include(argparse.cmake)
1010
include(xxhash.cmake)
11+
include(clickhouse_cpp.cmake)
12+
include(yaml_cpp.cmake)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# clickhouse-cpp library (C++ client for ClickHouse)
2+
include(FetchContent)
3+
4+
FetchContent_Declare(
5+
clickhouse_cpp
6+
GIT_REPOSITORY "https://github.com/SiskaPavel/clickhouse-cpp.git"
7+
GIT_TAG "65205a8"
8+
GIT_SHALLOW ON
9+
)
10+
11+
set(DEBUG_DEPENDENCIES OFF)
12+
set(CLICKHOUSE_INSTALL_TARGETS OFF)
13+
14+
add_compile_options(-Wno-pedantic -Wno-conversion -Wno-sign-conversion)
15+
16+
FetchContent_MakeAvailable(clickhouse_cpp)
17+
18+
add_library(clickhouse_cpp::client ALIAS clickhouse-cpp-lib)

common/external/yaml_cpp.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Yaml-cpp library
2+
#
3+
# yaml-cpp is a YAML parser and emitter in C++ matching the YAML 1.2 spec.
4+
#
5+
# "yaml-cpp" is exposed to be used as a dependency in other CMake targets
6+
# example usage: target_link_libraries(my_target PRIVATE yaml-cpp)
7+
8+
include(FetchContent)
9+
10+
FetchContent_Declare(
11+
yaml-cpp
12+
GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git
13+
GIT_TAG f732014 # yaml-cpp-0.8.0
14+
)
15+
16+
# Make sure that subproject accepts predefined build options without warnings.
17+
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
18+
19+
# Library does not compile with -Werror that we use in some builds
20+
string(REPLACE "-Werror " " " CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ")
21+
string(REPLACE "-Werror " " " CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ")
22+
string(REPLACE "-Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ")
23+
string(REPLACE "-Wconversion " " " CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ")
24+
string(REPLACE "-Wconversion " " " CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ")
25+
string(REPLACE "-Wconversion " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ")
26+
set(YAML_CPP_BUILD_TESTS OFF)
27+
set(YAML_CPP_BUILD_TOOLS OFF)
28+
set(YAML_CPP_INSTALL OFF)
29+
30+
FetchContent_MakeAvailable(yaml-cpp)

modules/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ add_subdirectory(listDetector)
22
add_subdirectory(sampler)
33
add_subdirectory(telemetry)
44
add_subdirectory(deduplicator)
5+
add_subdirectory(clickhouse)

modules/clickhouse/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
add_subdirectory(src)

modules/clickhouse/README.md

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# clickhouse output module
2+
Converts Unirec records into clickhouse format and stores them into database/s.
3+
- When multiple database endpoints are specified data is sent only to one of them.
4+
By default it is the first one and the others are used if the previous ones fail.
5+
6+
## Interfaces
7+
- Input: 1
8+
- Output: 0
9+
10+
## Parameters
11+
### Common TRAP parameters
12+
- `-h [trap,1]` Print help message for this module / for libtrap specific parameters.
13+
- `-i IFC_SPEC` Specification of interface types and their parameters.
14+
- `-v` Be verbose.
15+
- `-vv` Be more verbose.
16+
- `-vvv` Be even more verbose.
17+
18+
### Module specific parameters
19+
- `-c, --config <int>` YAML config specifying connections params and data columns
20+
21+
## Usage
22+
The module expects the ClickHouse database to already contain the table with
23+
appropriate schema corresponding to the configuration entered. The existence
24+
and schema of the table is checked after initiating connection to the database
25+
and an error is displayed if there is a mismatch. The table is not
26+
automatically created.
27+
28+
### Unirec to clickhouse type conversion
29+
| Unirec | Clickhouse | | Unirec | Clickhouse |
30+
|---------|---------------|-|----------|----------------------|
31+
| int8 | Int8 | | int8* | Array(Int8) |
32+
| int16 | Int16 | | int16* | Array(Int16) |
33+
| int32 | Int32 | | int32* | Array(Int32) |
34+
| int64 | Int64 | | int64* | Array(Int64) |
35+
| uint8 | UInt8 | | uint8* | Array(UInt8) |
36+
| uint16 | UInt16 | | uint16* | Array(UInt16) |
37+
| uint32 | UInt32 | | uint32* | Array(UInt32) |
38+
| uint64 | UInt64 | | uint64* | Array(UInt64) |
39+
| char | UInt8 | | char* | Array(UInt8) |
40+
| float | Float32 | | float* | Array(Float32) |
41+
| double | Float64 | | double* | Array(Float64) |
42+
| ipaddr | IPv6 | | ipaddr* | Array(IPv6) |
43+
| macaddr | Array(UInt8) | | macaddr* | Array(Array(UInt8)) |
44+
| time | DateTime64(9) | | time* | Array(DateTime64(9)) |
45+
| string | String | | | |
46+
| bytes | Array(UInt8) | | | |
47+
48+
### Clickhouse database and table creation example
49+
```SQL
50+
CREATE DATABASE IF NOT EXISTS clickhouse;
51+
CREATE TABLE clickhouse.flows(
52+
"DST_IP" IPv6,
53+
"SRC_IP" IPv6,
54+
"BYTES" UInt64,
55+
"BYTES_REV" UInt64,
56+
"LINK_BIT_FIELD" UInt64,
57+
"TIME_FIRST" DateTime64(9),
58+
"TIME_LAST" DateTime64(9),
59+
"PACKETS" UInt32,
60+
"PACKETS_REV" UInt32,
61+
"DST_PORT" UInt16,
62+
"SRC_PORT" UInt16,
63+
"FLOW_END_REASON" UInt8,
64+
"PROTOCOL" UInt8,
65+
"TCP_FLAGS" UInt8,
66+
"TCP_FLAGS_REV" UInt8,
67+
"IDP_CONTENT" Array(UInt8),
68+
"IDP_CONTENT_REV" Array(UInt8),
69+
"PPI_PKT_DIRECTIONS" Array(Int8),
70+
"PPI_PKT_FLAGS" Array(UInt8),
71+
"TLS_JA3_FINGERPRINT" Array(UInt8),
72+
"TLS_SNI" String,
73+
"PPI_PKT_LENGTHS" Array(UInt16),
74+
"DBI_BRST_BYTES" Array(UInt32),
75+
"DBI_BRST_PACKETS" Array(UInt32),
76+
"D_PHISTS_IPT" Array(UInt32),
77+
"D_PHISTS_SIZES" Array(UInt32),
78+
"SBI_BRST_BYTES" Array(UInt32),
79+
"SBI_BRST_PACKETS" Array(UInt32),
80+
"S_PHISTS_IPT" Array(UInt32),
81+
"S_PHISTS_SIZES" Array(UInt32),
82+
"DBI_BRST_TIME_START" Array(DateTime64(9)),
83+
"DBI_BRST_TIME_STOP" Array(DateTime64(9)),
84+
"PPI_PKT_TIMES" Array(DateTime64(9)),
85+
"SBI_BRST_TIME_START" Array(DateTime64(9)),
86+
"SBI_BRST_TIME_STOP" Array(DateTime64(9))
87+
)
88+
ENGINE = MergeTree
89+
ORDER BY TIME_FIRST
90+
```
91+
92+
## Configuration
93+
YAML config
94+
95+
### Config specification
96+
| Parameter | Description | Default |
97+
|-----------|-------------|---------|
98+
| **connection** | The database connection parameters. | |
99+
| connection.endpoints | The possible endpoints data can be sent to, i.e., all the replicas of a particular shard. In case one endpoint is unreachable, another one is used. | |
100+
| connection.endpoints.endpoint | Connection parameters of one endpoint. | |
101+
| connection.endpoints.endpoint.host | The ClickHouse database host as a domain name or an IP address. | |
102+
| connection.endpoints.endpoint.port | The port of the ClickHouse database. | 9000 |
103+
| connection.username | The database username. | |
104+
| connection.password | The database password. | |
105+
| connection.database | The database name where the specified table is present. | |
106+
| connection.table | The name of the table to insert the data into. | |
107+
| **blocks** | Number of data blocks in circulation. Each block is de-facto a memory buffer that the rows are written to before being sent out to the ClickHouse database. | 64 |
108+
| **inserterThreads** | Number of threads used for data insertion to ClickHouse. In other words, the number of ClickHouse connections that are concurrently used. | 8 |
109+
| **blockInsertThreshold** | Number of rows to be buffered into a block before the block is sent out to be inserted into the database. | 100000 |
110+
| **blockInsertMaxDelaySecs** | Maximum number of seconds to wait before a block gets sent out to be inserted into the database even if the threshold has not been reached yet. | 10 |
111+
| **columns** | List of fields which each row consists of. It is in unirec template format. ([TYPE] [NAME]) | |
112+
113+
114+
### Example configuration
115+
```YAML
116+
connection:
117+
endpoints:
118+
- host: localhost
119+
port: 9000
120+
username: clickhouse
121+
password: clickhouse
122+
database: clickhouse
123+
table: flows
124+
125+
inserterThreads: 32
126+
blocks: 1024
127+
blockInsertThreshold: 100000
128+
129+
columns:
130+
- ipaddr DST_IP
131+
- ipaddr SRC_IP
132+
- uint64 BYTES
133+
- uint64 BYTES_REV
134+
- uint64 LINK_BIT_FIELD
135+
- time TIME_FIRST
136+
- time TIME_LAST
137+
- uint32 PACKETS
138+
- uint32 PACKETS_REV
139+
- uint16 DST_PORT
140+
- uint16 SRC_PORT
141+
- uint8 FLOW_END_REASON
142+
- uint8 PROTOCOL
143+
- uint8 TCP_FLAGS
144+
- uint8 TCP_FLAGS_REV
145+
- bytes IDP_CONTENT
146+
- bytes IDP_CONTENT_REV
147+
- int8* PPI_PKT_DIRECTIONS
148+
- uint8* PPI_PKT_FLAGS
149+
- bytes TLS_JA3_FINGERPRINT
150+
- string TLS_SNI
151+
- uint16* PPI_PKT_LENGTHS
152+
- uint32* DBI_BRST_BYTES
153+
- uint32* DBI_BRST_PACKETS
154+
- uint32* D_PHISTS_IPT
155+
- uint32* D_PHISTS_SIZES
156+
- uint32* SBI_BRST_BYTES
157+
- uint32* SBI_BRST_PACKETS
158+
- uint32* S_PHISTS_IPT
159+
- uint32* S_PHISTS_SIZES
160+
- time* DBI_BRST_TIME_START
161+
- time* DBI_BRST_TIME_STOP
162+
- time* PPI_PKT_TIMES
163+
- time* SBI_BRST_TIME_START
164+
- time* SBI_BRST_TIME_STOP
165+
```
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
add_executable(clickhouse
2+
main.cpp
3+
config.cpp
4+
datatype.cpp
5+
inserter.cpp
6+
manager.cpp
7+
)
8+
9+
target_link_libraries(clickhouse PRIVATE
10+
clickhouse_cpp::client
11+
common
12+
unirec::unirec++
13+
unirec::unirec
14+
trap::trap
15+
argparse
16+
yaml-cpp
17+
)
18+
19+
install(TARGETS clickhouse DESTINATION ${INSTALL_DIR_BIN})
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma GCC diagnostic push
2+
#pragma GCC diagnostic ignored "-Wpedantic"
3+
#pragma GCC diagnostic ignored "-Wsign-conversion"
4+
#include <clickhouse/client.h>
5+
#pragma GCC diagnostic pop

0 commit comments

Comments
 (0)