Skip to content

Commit 9e62b66

Browse files
authored
Merge pull request #117 from data-catering/feature/transformations
Update version to 0.17.2, introduce custom transformations for post-g…
2 parents 6b48377 + 83a243c commit 9e62b66

63 files changed

Lines changed: 5541 additions & 1093 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ example/build
5656
example/.gradle
5757
example/benchmark/build
5858
example/benchmark/jars
59+
.tmp_prev_class_name
5960

6061
*.class
6162
*.log

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ RUN addgroup -S app \
66
&& apk update --no-cache \
77
&& apk update --no-cache openssl \
88
&& apk add --no-cache libc6-compat bash \
9-
&& mkdir -p /opt/app /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report \
9+
&& mkdir -p /opt/app /opt/app/custom /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report \
1010
&& chown -R app:app /opt/app /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report
1111
COPY --chown=app:app misc/docker-image app/src/main/resources app/build/libs /opt/app/
1212

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ and deep dive into issues [from the generated report](https://data.catering/late
3838

3939
1. Docker
4040
```shell
41-
docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer:0.17.1
41+
docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer:0.17.2
4242
```
4343
[Open localhost:9898](http://localhost:9898).
4444
1. [Run Scala/Java examples](#run-scalajava-examples)

api/src/main/scala/io/github/datacatering/datacaterer/api/TaskBuilder.scala

Lines changed: 164 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import io.github.datacatering.datacaterer.api.HttpMethodEnum.HttpMethodEnum
55
import io.github.datacatering.datacaterer.api.HttpQueryParameterStyleEnum.HttpQueryParameterStyleEnum
66
import io.github.datacatering.datacaterer.api.converter.Converters.{toScalaList, toScalaMap}
77
import io.github.datacatering.datacaterer.api.model.Constants._
8-
import io.github.datacatering.datacaterer.api.model.{ArrayType, Count, DataType, DoubleType, Field, HeaderType, PerFieldCount, Step, StringType, Task, TaskSummary}
8+
import io.github.datacatering.datacaterer.api.model.{ArrayType, Count, DataType, DoubleType, Field, HeaderType, PerFieldCount, Step, StringType, Task, TaskSummary, TransformationConfig}
99

1010
import scala.annotation.varargs
1111
import scala.collection.JavaConverters._
@@ -168,6 +168,88 @@ case class TaskBuilder(task: Task = Task()) {
168168
)
169169
this.modify(_.task.steps)(_ ++ List(placeholderStep))
170170
}
171+
172+
/**
173+
* Configure custom transformation for all steps in this task. Defaults to whole-file mode.
174+
* Transformations execute after files are written ("last mile" transformation).
175+
* Step-level transformations will override this task-level configuration.
176+
*
177+
* @param className Fully qualified class name of the transformer
178+
* @return TaskBuilder
179+
*/
180+
def transformation(className: String): TaskBuilder =
181+
this.modify(_.task.transformation).setTo(Some(TransformationConfig(className = className)))
182+
183+
/**
184+
* Configure per-record transformation for all steps. Transforms each record/line individually.
185+
*
186+
* @param className Fully qualified class name of the transformer
187+
* @param methodName Method name to invoke (default: "transformRecord")
188+
* @return TaskBuilder
189+
*/
190+
def transformationPerRecord(className: String, methodName: String = "transformRecord"): TaskBuilder =
191+
this.modify(_.task.transformation).setTo(Some(TransformationConfig(
192+
className = className,
193+
methodName = methodName,
194+
mode = "per-record"
195+
)))
196+
197+
/**
198+
* Configure whole-file transformation for all steps. Transforms entire files as units.
199+
*
200+
* @param className Fully qualified class name of the transformer
201+
* @param methodName Method name to invoke (default: "transformFile")
202+
* @return TaskBuilder
203+
*/
204+
def transformationWholeFile(className: String, methodName: String = "transformFile"): TaskBuilder =
205+
this.modify(_.task.transformation).setTo(Some(TransformationConfig(
206+
className = className,
207+
methodName = methodName,
208+
mode = "whole-file"
209+
)))
210+
211+
/**
212+
* Set output path for transformation. If specified, creates new files instead of replacing originals.
213+
*
214+
* @param outputPath Path where transformed output should be written
215+
* @param deleteOriginal Whether to delete original files after transformation (default: false)
216+
* @return TaskBuilder
217+
*/
218+
def transformationOutput(outputPath: String, deleteOriginal: Boolean = false): TaskBuilder =
219+
this.task.transformation match {
220+
case Some(config) =>
221+
this.modify(_.task.transformation).setTo(Some(config.copy(outputPath = Some(outputPath), deleteOriginal = deleteOriginal)))
222+
case None =>
223+
this
224+
}
225+
226+
/**
227+
* Add options to transformation configuration.
228+
*
229+
* @param options Options to pass to the transformer
230+
* @return TaskBuilder
231+
*/
232+
def transformationOptions(options: Map[String, String]): TaskBuilder =
233+
this.task.transformation match {
234+
case Some(config) =>
235+
this.modify(_.task.transformation).setTo(Some(config.copy(options = config.options ++ options)))
236+
case None =>
237+
this
238+
}
239+
240+
/**
241+
* Explicitly enable or disable transformation for all steps in this task.
242+
*
243+
* @param enabled Whether transformation is enabled
244+
* @return TaskBuilder
245+
*/
246+
def enableTransformation(enabled: Boolean): TaskBuilder =
247+
this.task.transformation match {
248+
case Some(config) =>
249+
this.modify(_.task.transformation).setTo(Some(config.copy(enabled = enabled)))
250+
case None =>
251+
this
252+
}
171253
}
172254

173255
case class StepBuilder(step: Step = Step(), optValidation: Option[DataSourceValidationBuilder] = None) {
@@ -490,6 +572,87 @@ case class StepBuilder(step: Step = Step(), optValidation: Option[DataSourceVali
490572
@varargs def excludeFieldPatterns(patterns: String*): StepBuilder =
491573
this.modify(_.step.options)(_ ++ Map(EXCLUDE_FIELD_PATTERNS -> patterns.mkString(",")))
492574

575+
/**
576+
* Configure custom transformation for this step. Defaults to whole-file mode.
577+
* Transformations execute after the file is written ("last mile" transformation).
578+
*
579+
* @param className Fully qualified class name of the transformer
580+
* @return StepBuilder
581+
*/
582+
def transformation(className: String): StepBuilder =
583+
this.modify(_.step.transformation).setTo(Some(TransformationConfig(className = className)))
584+
585+
/**
586+
* Configure per-record transformation. Transforms each record/line in the file individually.
587+
*
588+
* @param className Fully qualified class name of the transformer
589+
* @param methodName Method name to invoke (default: "transformRecord")
590+
* @return StepBuilder
591+
*/
592+
def transformationPerRecord(className: String, methodName: String = "transformRecord"): StepBuilder =
593+
this.modify(_.step.transformation).setTo(Some(TransformationConfig(
594+
className = className,
595+
methodName = methodName,
596+
mode = "per-record"
597+
)))
598+
599+
/**
600+
* Configure whole-file transformation. Transforms the entire file as a unit.
601+
*
602+
* @param className Fully qualified class name of the transformer
603+
* @param methodName Method name to invoke (default: "transformFile")
604+
* @return StepBuilder
605+
*/
606+
def transformationWholeFile(className: String, methodName: String = "transformFile"): StepBuilder =
607+
this.modify(_.step.transformation).setTo(Some(TransformationConfig(
608+
className = className,
609+
methodName = methodName,
610+
mode = "whole-file"
611+
)))
612+
613+
/**
614+
* Set output path for transformation. If specified, creates a new file instead of replacing the original.
615+
*
616+
* @param outputPath Path where transformed output should be written
617+
* @param deleteOriginal Whether to delete the original file after transformation (default: false)
618+
* @return StepBuilder
619+
*/
620+
def transformationOutput(outputPath: String, deleteOriginal: Boolean = false): StepBuilder =
621+
this.step.transformation match {
622+
case Some(config) =>
623+
this.modify(_.step.transformation).setTo(Some(config.copy(outputPath = Some(outputPath), deleteOriginal = deleteOriginal)))
624+
case None =>
625+
this
626+
}
627+
628+
/**
629+
* Add options to transformation configuration.
630+
*
631+
* @param options Options to pass to the transformer
632+
* @return StepBuilder
633+
*/
634+
def transformationOptions(options: Map[String, String]): StepBuilder =
635+
this.step.transformation match {
636+
case Some(config) =>
637+
this.modify(_.step.transformation).setTo(Some(config.copy(options = config.options ++ options)))
638+
case None =>
639+
this
640+
}
641+
642+
/**
643+
* Explicitly enable or disable transformation for this step.
644+
*
645+
* @param enabled Whether transformation is enabled
646+
* @return StepBuilder
647+
*/
648+
def enableTransformation(enabled: Boolean): StepBuilder =
649+
this.step.transformation match {
650+
case Some(config) =>
651+
this.modify(_.step.transformation).setTo(Some(config.copy(enabled = enabled)))
652+
case None =>
653+
this
654+
}
655+
493656
private def getValidation: DataSourceValidationBuilder = optValidation.getOrElse(DataSourceValidationBuilder())
494657
}
495658

api/src/main/scala/io/github/datacatering/datacaterer/api/connection/ConnectionBuilder.scala

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,76 @@ trait ConnectionTaskBuilder[T] {
294294
this
295295
}
296296

297+
/**
298+
* Configure a transformation to apply to the generated data.
299+
* By default, uses whole-file mode with method name "transformFile".
300+
*
301+
* @param className The fully qualified class name of the transformer
302+
* @return The connection task builder
303+
*/
304+
def transformation(className: String): ConnectionTaskBuilder[T] = {
305+
this.step = Some(getStep.transformation(className))
306+
this
307+
}
308+
309+
/**
310+
* Configure a per-record transformation to apply to the generated data.
311+
*
312+
* @param className The fully qualified class name of the transformer
313+
* @param methodName The method name to invoke (default: "transform")
314+
* @return The connection task builder
315+
*/
316+
def transformationPerRecord(className: String, methodName: String = "transform"): ConnectionTaskBuilder[T] = {
317+
this.step = Some(getStep.transformationPerRecord(className, methodName))
318+
this
319+
}
320+
321+
/**
322+
* Configure a whole-file transformation to apply to the generated data.
323+
*
324+
* @param className The fully qualified class name of the transformer
325+
* @param methodName The method name to invoke (default: "transformFile")
326+
* @return The connection task builder
327+
*/
328+
def transformationWholeFile(className: String, methodName: String = "transformFile"): ConnectionTaskBuilder[T] = {
329+
this.step = Some(getStep.transformationWholeFile(className, methodName))
330+
this
331+
}
332+
333+
/**
334+
* Configure the output path and deletion behavior for transformation.
335+
*
336+
* @param outputPath The path where the transformed file should be written
337+
* @param deleteOriginal Whether to delete the original file after transformation (default: false)
338+
* @return The connection task builder
339+
*/
340+
def transformationOutput(outputPath: String, deleteOriginal: Boolean = false): ConnectionTaskBuilder[T] = {
341+
this.step = Some(getStep.transformationOutput(outputPath, deleteOriginal))
342+
this
343+
}
344+
345+
/**
346+
* Add options for the transformation.
347+
*
348+
* @param options Map of option key-value pairs
349+
* @return The connection task builder
350+
*/
351+
def transformationOptions(options: Map[String, String]): ConnectionTaskBuilder[T] = {
352+
this.step = Some(getStep.transformationOptions(options))
353+
this
354+
}
355+
356+
/**
357+
* Enable or disable the transformation.
358+
*
359+
* @param enabled Whether the transformation should be enabled
360+
* @return The connection task builder
361+
*/
362+
def enableTransformation(enabled: Boolean): ConnectionTaskBuilder[T] = {
363+
this.step = Some(getStep.enableTransformation(enabled))
364+
this
365+
}
366+
297367
def toTasksBuilder: Option[TasksBuilder] = {
298368
val dataSourceName = connectionConfigWithTaskBuilder.dataSourceName
299369
val format = connectionConfigWithTaskBuilder.options(FORMAT)

api/src/main/scala/io/github/datacatering/datacaterer/api/model/PlanModels.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ case class TaskSummary(
4747

4848
case class Task(
4949
name: String = DEFAULT_TASK_NAME,
50-
steps: List[Step] = List()
50+
steps: List[Step] = List(),
51+
transformation: Option[TransformationConfig] = None
5152
)
5253

5354
case class Step(
@@ -56,7 +57,8 @@ case class Step(
5657
count: Count = Count(),
5758
options: Map[String, String] = Map(),
5859
fields: List[Field] = List(),
59-
enabled: Boolean = DEFAULT_STEP_ENABLED
60+
enabled: Boolean = DEFAULT_STEP_ENABLED,
61+
transformation: Option[TransformationConfig] = None
6062
)
6163

6264
case class Count(
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package io.github.datacatering.datacaterer.api.model
2+
3+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties
4+
5+
/**
6+
* Configuration for custom transformation of generated data.
7+
* Transformations can operate in two modes:
8+
* - "per-record": Transform each record/line in the file individually
9+
* - "whole-file": Transform the entire file as a unit
10+
*
11+
* Both modes execute after the file has been written ("last mile" transformation).
12+
*/
13+
@JsonIgnoreProperties(ignoreUnknown = true)
14+
case class TransformationConfig(
15+
className: String = "",
16+
methodName: String = "transform",
17+
mode: String = "whole-file", // "per-record" or "whole-file"
18+
outputPath: Option[String] = None, // Output path (if different from input)
19+
deleteOriginal: Boolean = false, // Delete original file after transformation
20+
options: Map[String, String] = Map(),
21+
enabled: Boolean = true
22+
) {
23+
/**
24+
* Check if transformation is configured and enabled
25+
*/
26+
def isEnabled: Boolean = className.nonEmpty && enabled
27+
28+
/**
29+
* Check if mode is per-record
30+
*/
31+
def isPerRecord: Boolean = mode.equalsIgnoreCase("per-record")
32+
33+
/**
34+
* Check if mode is whole-file
35+
*/
36+
def isWholeFile: Boolean = mode.equalsIgnoreCase("whole-file")
37+
}

0 commit comments

Comments
 (0)