|
96 | 96 | * # --- Recommended: configurable partition value format --- |
97 | 97 | * # iceberg.partition.value.datetime.format is a DateTimeFormatter pattern applied to the output |
98 | 98 | * # partition value used in the filter expression. |
99 | | - * # When CURRENT_DATE is used, the reference datetime is LocalDateTime.now(), so a pattern |
100 | | - * # with HH will embed the current hour automatically — no separate hour config needed. |
101 | 99 | * # When set, it supersedes iceberg.hourly.partition.enabled. |
| 100 | + * # |
| 101 | + * # CURRENT_DATE behaviour: |
| 102 | + * # - With this property set → LocalDateTime.now(), so HH embeds the live clock-hour. |
| 103 | + * # - Without this property (legacy) → LocalDate.now() at midnight, HH stays -00 (backward compat). |
102 | 104 | * |
103 | 105 | * # Standard hourly partitions (yyyy-MM-dd-HH) — CURRENT_DATE picks up live hour |
104 | 106 | * iceberg.partition.value.datetime.format=yyyy-MM-dd-HH # → "2025-04-01-14" (current hour) |
@@ -152,10 +154,18 @@ public class IcebergSource extends FileBasedSource<String, FileAwareInputStream> |
152 | 154 | /** |
153 | 155 | * Optional {@link DateTimeFormatter} pattern controlling how the partition value is rendered. |
154 | 156 | * |
155 | | - * <p>When {@code iceberg.filter.date=CURRENT_DATE} the reference datetime is |
156 | | - * {@link java.time.LocalDateTime#now()}, so a pattern that includes {@code HH} will embed |
157 | | - * the current clock-hour automatically — no separate hour config is needed. |
158 | | - * For a specific date (e.g. {@code 2025-04-03}), the time defaults to midnight (00:00). |
| 157 | + * <p><b>CURRENT_DATE behaviour differs between the two paths:</b> |
| 158 | + * <ul> |
| 159 | + * <li>When this property <em>is</em> set, {@code CURRENT_DATE} resolves to |
| 160 | + * {@link java.time.LocalDateTime#now()}, so a pattern that includes {@code HH} embeds the |
| 161 | + * live clock-hour automatically — useful for truly hourly-partitioned tables.</li> |
| 162 | + * <li>When this property is <em>absent</em> (legacy path), {@code CURRENT_DATE} resolves to |
| 163 | + * {@link java.time.LocalDate#now()} at midnight (00:00), preserving the pre-PR behaviour |
| 164 | + * where the hour suffix was always {@code -00}. This is the right choice for tables whose |
| 165 | + * partitions are daily but formatted as {@code yyyy-MM-dd-00}.</li> |
| 166 | + * </ul> |
| 167 | + * For a static date value (e.g. {@code 2025-04-03}), the time always defaults to midnight (00:00) |
| 168 | + * regardless of which path is used. |
159 | 169 | * |
160 | 170 | * <p>Examples: |
161 | 171 | * <ul> |
@@ -284,28 +294,28 @@ public Extractor<String, FileAwareInputStream> getExtractor(WorkUnitState state) |
284 | 294 | * (defaults to {@value #DEFAULT_DATE_PARTITION_COLUMN}). The date value is specified separately via |
285 | 295 | * {@code iceberg.filter.date} in standard format ({@code yyyy-MM-dd}). |
286 | 296 | * |
287 | | - * <p><b>Partition Value Format:</b> Both the input date ({@code iceberg.filter.date}) and the output |
288 | | - * partition value use the pattern specified by {@code iceberg.partition.value.datetime.format} |
289 | | - * (a standard {@link java.time.format.DateTimeFormatter} pattern). Use {@code CURRENT_DATE} as the |
290 | | - * date value to resolve the reference datetime to {@link java.time.LocalDateTime#now()} automatically, |
291 | | - * embedding the current hour when the pattern includes {@code HH}. Examples: |
| 297 | + * <p><b>Partition Value Format:</b> The output partition value format is controlled by |
| 298 | + * {@code iceberg.partition.value.datetime.format} (a standard {@link java.time.format.DateTimeFormatter} |
| 299 | + * pattern). When absent, the legacy {@code iceberg.hourly.partition.enabled} flag drives the format. |
| 300 | + * |
| 301 | + * <p><b>{@code CURRENT_DATE} resolution:</b> |
292 | 302 | * <ul> |
293 | | - * <li>{@code yyyy-MM-dd-HH} with date {@code 2025-04-01-05} → {@code 2025-04-01-05}</li> |
294 | | - * <li>{@code dd-MM-yyyy-HH} with date {@code 01-04-2025-00} → {@code 01-04-2025-00}</li> |
295 | | - * <li>{@code yyyyMMdd} with date {@code 20250401} → {@code 20250401} (compact daily)</li> |
| 303 | + * <li>With {@code iceberg.partition.value.datetime.format} set → {@link java.time.LocalDateTime#now()}, |
| 304 | + * so a pattern including {@code HH} embeds the live clock-hour (e.g. {@code 2025-04-08-14}).</li> |
| 305 | + * <li>Without that property (legacy) → {@link java.time.LocalDate#now()} at midnight, so the |
| 306 | + * hour is always {@code 00} (e.g. {@code 2025-04-08-00}). This preserves the pre-PR |
| 307 | + * behaviour for tables that store daily data in {@code yyyy-MM-dd-00} partitions.</li> |
296 | 308 | * </ul> |
297 | | - * When {@code iceberg.partition.value.datetime.format} is set it supersedes |
298 | | - * {@code iceberg.hourly.partition.enabled}. When absent, the legacy |
299 | | - * {@code iceberg.hourly.partition.enabled} behaviour is preserved for backward compatibility. |
| 309 | + * Static date values always default to midnight regardless of which path is used. |
300 | 310 | * |
301 | 311 | * <p><b>Configuration Examples:</b> |
302 | 312 | * <ul> |
303 | 313 | * <li>Standard daily: {@code iceberg.partition.value.datetime.format=yyyy-MM-dd, iceberg.filter.date=2025-04-03, |
304 | 314 | * iceberg.lookback.days=3} → partitions: {@code 2025-04-03, 2025-04-02, 2025-04-01}</li> |
305 | | - * <li>Reversed-date hourly: {@code iceberg.partition.value.datetime.format=dd-MM-yyyy-HH, |
306 | | - * iceberg.filter.date=CURRENT_DATE} → {@code 03-04-2025-14, 02-04-2025-14, 01-04-2025-14}</li> |
307 | | - * <li>Dynamic daily: {@code iceberg.filter.date=CURRENT_DATE, iceberg.lookback.days=1} |
308 | | - * → today's partition only (resolved at runtime)</li> |
| 315 | + * <li>Truly-hourly (live hour): {@code iceberg.partition.value.datetime.format=yyyy-MM-dd-HH, |
| 316 | + * iceberg.filter.date=CURRENT_DATE} → {@code 2025-04-08-14, 2025-04-07-14, 2025-04-06-14}</li> |
| 317 | + * <li>Daily-at-midnight (legacy default): {@code iceberg.filter.date=CURRENT_DATE, iceberg.lookback.days=1} |
| 318 | + * → {@code 2025-04-08-00} (hour always 00, backward compat)</li> |
309 | 319 | * </ul> |
310 | 320 | * |
311 | 321 | * @param state source state containing filter configuration |
@@ -337,13 +347,27 @@ private List<IcebergTable.FilePathWithPartition> discoverPartitionFilePaths(Sour |
337 | 347 | DateTimeFormatter partitionFormatter = resolvePartitionFormatter(state); |
338 | 348 |
|
339 | 349 | // Resolve the reference datetime for the filter. |
340 | | - // CURRENT_DATE uses LocalDateTime.now() so a formatter pattern that includes HH will |
341 | | - // embed the current clock-hour automatically. For a specific date (yyyy-MM-dd) the time |
342 | | - // defaults to midnight (00:00). |
| 350 | + // For a specific date (yyyy-MM-dd) the time always defaults to midnight (00:00). |
| 351 | + // For CURRENT_DATE: |
| 352 | + // - Custom format path (iceberg.partition.value.datetime.format set): LocalDateTime.now() so |
| 353 | + // a pattern that includes HH will embed the live clock-hour automatically. |
| 354 | + // - Legacy path (no custom format): LocalDate.now().atStartOfDay() (midnight) to preserve the |
| 355 | + // pre-PR behavior where CURRENT_DATE always produced a -00 suffix. Users who genuinely need |
| 356 | + // the live hour should migrate to iceberg.partition.value.datetime.format=yyyy-MM-dd-HH. |
343 | 357 | LocalDateTime startDateTime; |
344 | 358 | if (CURRENT_DATE_PLACEHOLDER.equalsIgnoreCase(dateValue)) { |
345 | | - startDateTime = LocalDateTime.now(); |
346 | | - log.info("Resolved {} placeholder to current datetime: {}", CURRENT_DATE_PLACEHOLDER, startDateTime); |
| 359 | + boolean isCustomFormat = state.contains(ICEBERG_PARTITION_VALUE_DATETIME_FORMAT); |
| 360 | + if (isCustomFormat) { |
| 361 | + startDateTime = LocalDateTime.now(); |
| 362 | + log.info("Resolved {} to current datetime with live hour (custom format='{}'): {}", |
| 363 | + CURRENT_DATE_PLACEHOLDER, state.getProp(ICEBERG_PARTITION_VALUE_DATETIME_FORMAT), startDateTime); |
| 364 | + } else { |
| 365 | + // Legacy backward-compat: always midnight so the yyyy-MM-dd-HH pattern keeps the old -00 suffix. |
| 366 | + startDateTime = LocalDate.now().atStartOfDay(); |
| 367 | + log.info("Resolved {} to current date at midnight (legacy mode, -00 preserved): {}. " |
| 368 | + + "Set {} to use the live hour.", |
| 369 | + CURRENT_DATE_PLACEHOLDER, startDateTime, ICEBERG_PARTITION_VALUE_DATETIME_FORMAT); |
| 370 | + } |
347 | 371 | } else { |
348 | 372 | // When iceberg.partition.value.datetime.format is explicitly set, the input date must match |
349 | 373 | // that pattern (consistent input/output format). Legacy path keeps accepting yyyy-MM-dd for |
|
0 commit comments