Skip to content

Commit 50b4296

Browse files
authored
Feat: Support Iceberg format in the local Airflow setup (SQLMesh#1755)
1 parent b82bb12 commit 50b4296

3 files changed

Lines changed: 13 additions & 8 deletions

File tree

examples/airflow/Dockerfile.template

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM apache/spark-py:latest AS spark
1+
FROM apache/spark-py:v3.4.0 AS spark
22

33
FROM apache/airflow:$AIRFLOW_VERSION
44

@@ -22,8 +22,9 @@ RUN chown -R airflow /opt/spark
2222
ENV SPARK_HOME="/opt/spark"
2323
ENV PATH="$PATH:$SPARK_HOME/bin"
2424

25-
# Install Postgres driver for Spark
26-
RUN curl https://jdbc.postgresql.org/download/postgresql-42.5.0.jar -o /opt/spark/jars/postgresql-42.5.0.jar
25+
# Install Postgres driver and Iceberg for Spark
26+
RUN curl https://jdbc.postgresql.org/download/postgresql-42.5.0.jar -o /opt/spark/jars/postgresql-42.5.0.jar && \
27+
curl -L https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/1.4.2/iceberg-spark-runtime-3.4_2.12-1.4.2.jar -o /opt/spark/jars/iceberg-spark-runtime-3.4_2.12-1.4.2.jar
2728

2829
# Install Hadoop
2930
RUN curl https://dlcdn.apache.org/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz -o hadoop-3.3.4.tar.gz && \
Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1-
spark.hadoop.hive.exec.dynamic.partition true
2-
spark.hadoop.hive.exec.dynamic.partition.mode nonstrict
3-
spark.sql.sources.partitionOverwriteMode dynamic
1+
spark.hadoop.hive.exec.dynamic.partition true
2+
spark.hadoop.hive.exec.dynamic.partition.mode nonstrict
3+
spark.sql.sources.partitionOverwriteMode dynamic
4+
5+
spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
6+
spark.sql.catalog.spark_catalog org.apache.iceberg.spark.SparkSessionCatalog
7+
spark.sql.catalog.spark_catalog.type hive

examples/sushi/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,15 @@
5454
},
5555
)
5656
),
57-
model_defaults=ModelDefaultsConfig(dialect="duckdb"),
57+
model_defaults=ModelDefaultsConfig(dialect="duckdb", storage_format="iceberg"),
5858
)
5959

6060

6161
# Due to a 3.7 mypy bug we ignore. Can remove once 3.7 support is dropped.
6262
airflow_config_docker = Config( # type: ignore
6363
default_scheduler=AirflowSchedulerConfig(airflow_url="http://airflow-webserver:8080/"),
6464
gateways=GatewayConfig(connection=SparkConnectionConfig()),
65-
model_defaults=ModelDefaultsConfig(dialect="duckdb"),
65+
model_defaults=ModelDefaultsConfig(dialect="duckdb", storage_format="iceberg"),
6666
)
6767

6868
# A DuckDB config with a physical schema map.

0 commit comments

Comments
 (0)