Skip to content

Commit 14ab9a9

Browse files
committed
docker update
1 parent 363a482 commit 14ab9a9

3 files changed

Lines changed: 23 additions & 8 deletions

File tree

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,6 @@ tf_apply:
1414

1515
bigquery_export_deploy:
1616
cd infra/bigquery-export && npm run buildpack
17+
18+
bigquery_export_spark_deploy:
19+
cd infra/bigquery_export_spark && gcloud builds submit --region=global --tag us-docker.pkg.dev/httparchive/bigquery-spark-procedures/firestore_export:latest

infra/bigquery_export_spark/Dockerfile

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
# Dataproc image example: https://cloud.google.com/dataproc-serverless/docs/guides/custom-containers
12
# Recommendation: Use Debian 12.
2-
FROM python:3.12-slim
3+
FROM debian:12-slim
4+
# python:3.12-slim
35

46
# Suppress interactive prompts
57
ENV DEBIAN_FRONTEND=noninteractive
@@ -10,13 +12,20 @@ RUN apt update && apt install -y procps tini libjemalloc2
1012
# Enable jemalloc2 as default memory allocator
1113
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
1214

13-
# Install packages.
14-
RUN pip install --no-cache-dir \
15-
google-cloud-bigquery \
16-
google-cloud-storage \
17-
google-cloud-firestore \
18-
grpcio \
19-
google-cloud-secret-manager
15+
# Install and configure Miniconda3.
16+
ENV CONDA_HOME=/opt/miniforge3
17+
ENV PYSPARK_PYTHON=${CONDA_HOME}/bin/python
18+
ENV PATH=${CONDA_HOME}/bin:${PATH}
19+
ADD https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh .
20+
RUN bash Miniforge3-Linux-x86_64.sh -b -p /opt/miniforge3 \
21+
&& ${CONDA_HOME}/bin/conda config --system --set always_yes True \
22+
&& ${CONDA_HOME}/bin/conda config --system --set auto_update_conda False \
23+
&& ${CONDA_HOME}/bin/conda config --system --set channel_priority strict
24+
25+
COPY . .
26+
27+
# Install pip packages.
28+
RUN ${PYSPARK_PYTHON} -m pip install -r requirements.txt
2029

2130
# Create the 'spark' group/user.
2231
# The GID and UID must be 1099. Home directory is required.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
google-cloud-bigquery==3.23
2+
google-cloud-storage==2.16
3+
google-cloud-firestore==2.20.1

0 commit comments

Comments
 (0)