@@ -13,7 +13,7 @@ flags {
1313 enableFailOnError = ${?ENABLE_FAIL_ON_ERROR}
1414 enableUniqueCheck = true
1515 enableUniqueCheck = ${?ENABLE_UNIQUE_CHECK}
16- enableSinkMetadata = true
16+ enableSinkMetadata = false
1717 enableSinkMetadata = ${?ENABLE_SINK_METADATA}
1818 enableSaveReports = true
1919 enableSaveReports = ${?ENABLE_SAVE_REPORTS}
@@ -23,6 +23,8 @@ flags {
2323 enableGenerateValidations = ${?ENABLE_GENERATE_VALIDATIONS}
2424 enableAlerts = false
2525 enableAlerts = ${?ENABLE_ALERTS}
26+ enableUniqueCheckOnlyInBatch = false
27+ enableUniqueCheckOnlyInBatch = ${?ENABLE_UNIQUE_CHECK_ONLY_IN_BATCH}
2628}
2729
2830folders {
@@ -32,6 +34,8 @@ folders {
3234 planFilePath = ${?PLAN_FILE_PATH}
3335 taskFolderPath = "/opt/app/custom/task"
3436 taskFolderPath = ${?TASK_FOLDER_PATH}
37+ validationFolderPath = "/opt/app/custom/validation/csv"
38+ validationFolderPath = ${?VALIDATION_FOLDER_PATH}
3539 recordTrackingFolderPath = "/opt/app/custom/recordTracking"
3640 recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH}
3741 recordTrackingForValidationFolderPath = "/opt/app/custom/validation/recordTracking"
@@ -80,22 +84,28 @@ runtime {
8084 master = "local[*]"
8185 master = ${?DATA_CATERER_MASTER}
8286 config {
87+ "spark.driver.memory" = "6g",
88+ "spark.executor.memory" = "6g",
89+ "spark.executor.memoryOverhead" = "512m",
90+ "spark.memory.fraction" = "0.6",
91+ "spark.memory.storageFraction" = "0.5",
92+ "spark.memory.offHeap.size" = "1g",
93+ "spark.sql.shuffle.partitions" = "10",
8394 "spark.sql.cbo.enabled": "true",
84- "spark.sql.adaptive.enabled": "true",
85- "spark.sql.cbo.planStats.enabled": "true",
86- "spark.sql.legacy.allowUntypedScalaUDF": "true",
87- "spark.sql.legacy.allowParameterlessCount": "true",
88- "spark.sql.statistics.histogram.enabled": "true",
89- "spark.sql.shuffle.partitions": "10",
90- "spark.sql.catalog.postgres": "",
91- "spark.sql.catalog.cassandra": "com.datastax.spark.connector.datasource.CassandraCatalog",
92- "spark.sql.catalog.iceberg": "org.apache.iceberg.spark.SparkCatalog",
93- "spark.sql.catalog.iceberg.type": "hadoop",
94- "spark.hadoop.fs.s3a.directory.marker.retention": "keep",
95- "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled": "true",
96- "spark.hadoop.fs.hdfs.impl": "org.apache.hadoop.hdfs.DistributedFileSystem",
97- "spark.hadoop.fs.file.impl": "com.globalmentor.apache.hadoop.fs.BareLocalFileSystem",
98- "spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension,org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
95+ "spark.sql.adaptive.enabled" = "true",
96+ "spark.sql.cbo.planStats.enabled" = "true",
97+ "spark.sql.legacy.allowUntypedScalaUDF" = "true",
98+ "spark.sql.legacy.allowParameterlessCount" = "true",
99+ "spark.sql.statistics.histogram.enabled" = "true",
100+ "spark.sql.catalog.postgres" = "",
101+ "spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog",
102+ "spark.sql.catalog.iceberg" = "org.apache.iceberg.spark.SparkCatalog",
103+ "spark.sql.catalog.iceberg.type" = "hadoop",
104+ "spark.hadoop.fs.s3a.directory.marker.retention" = "keep",
105+ "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true",
106+ "spark.hadoop.fs.hdfs.impl" = "org.apache.hadoop.hdfs.DistributedFileSystem",
107+ "spark.hadoop.fs.file.impl" = "com.globalmentor.apache.hadoop.fs.BareLocalFileSystem",
108+ "spark.sql.extensions" = "io.delta.sql.DeltaSparkSessionExtension,org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
99109 }
100110}
101111
0 commit comments