-
Notifications
You must be signed in to change notification settings - Fork 72
Expand file tree
/
Copy pathDockerfile.clickhouse-auto-import
More file actions
74 lines (65 loc) · 2.69 KB
/
Dockerfile.clickhouse-auto-import
File metadata and controls
74 lines (65 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
FROM debian:bookworm-slim
# Build arguments for multi-arch support
ARG TARGETARCH
# Install required packages and ClickHouse
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
jq \
bc \
sqlite3 \
unzip \
python3 \
python3-pip \
python3-venv \
&& rm -rf /var/lib/apt/lists/* \
&& case ${TARGETARCH} in \
amd64) ARCH_SUFFIX=amd64 ;; \
arm64) ARCH_SUFFIX=arm64 ;; \
*) echo "Unsupported architecture: ${TARGETARCH}" && exit 1 ;; \
esac \
&& curl -LO https://github.com/ClickHouse/ClickHouse/releases/download/v26.3.9.8-lts/clickhouse-common-static-26.3.9.8-${ARCH_SUFFIX}.tgz \
&& tar -xzf clickhouse-common-static-26.3.9.8-${ARCH_SUFFIX}.tgz \
&& mv ./clickhouse-common-static-26.3.9.8/usr/bin/clickhouse usr/local/bin/ \
&& rm -r clickhouse-common-static-26.3.9.8-${ARCH_SUFFIX}.tgz ./clickhouse-common-static-26.3.9.8
# Install DuckDB
RUN case ${TARGETARCH} in \
amd64) DUCKDB_ARCH=amd64 ;; \
arm64) DUCKDB_ARCH=aarch64 ;; \
*) echo "Unsupported architecture: ${TARGETARCH}" && exit 1 ;; \
esac \
&& curl -LO https://github.com/duckdb/duckdb/releases/download/v1.1.3/duckdb_cli-linux-${DUCKDB_ARCH}.zip \
&& unzip duckdb_cli-linux-${DUCKDB_ARCH}.zip \
&& mv duckdb /usr/local/bin/ \
&& chmod +x /usr/local/bin/duckdb \
&& rm duckdb_cli-linux-${DUCKDB_ARCH}.zip
# Install PyIceberg for optional Iceberg metadata generation
# Use virtual environment to avoid system package conflicts
RUN python3 -m venv /app/venv && \
/app/venv/bin/pip install --no-cache-dir \
pyiceberg[duckdb,pyarrow,sql]==0.9.1 \
sqlalchemy \
psycopg2-binary \
pandas \
fsspec[http] \
fastavro \
pyyaml
# Create necessary directories
WORKDIR /app
RUN mkdir -p data/parquet/imported
# Copy the necessary scripts and libraries
COPY src/database/clickhouse/schema.sql /app/src/database/clickhouse/schema.sql
COPY src/database/clickhouse/ttl-schema.sql /app/src/database/clickhouse/ttl-schema.sql
COPY scripts/clickhouse-auto-import /app/scripts/
COPY scripts/clickhouse-import /app/scripts/
COPY scripts/clickhouse-load-ttl-rules.py /app/scripts/
COPY scripts/parquet-export /app/scripts/
COPY scripts/generate-iceberg-metadata /app/scripts/
COPY scripts/lib /app/scripts/lib/
# Make scripts executable
RUN chmod +x /app/scripts/clickhouse-auto-import /app/scripts/clickhouse-import /app/scripts/clickhouse-load-ttl-rules.py /app/scripts/parquet-export /app/scripts/generate-iceberg-metadata
# Environment variables
ENV ADMIN_API_KEY=""
ENV PATH="/app/venv/bin:$PATH"
# Run the auto-import script
CMD ["/app/scripts/clickhouse-auto-import"]