Skip to content

Commit a9a7941

Browse files
Merge pull request #29 from Kpler/feat/PTFM-7378/add-support-for-automatic-schema-generation
feat: add new hook to automatically generate kafka schemas
2 parents 779309a + ec0f9ec commit a9a7941

4 files changed

Lines changed: 197 additions & 0 deletions

File tree

.pre-commit-hooks.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,11 @@
3434
stages: [commit]
3535
always_run: true
3636
pass_filenames: false
37+
38+
- id: kafka-check-schemas
39+
name: Check if local Kafka Schemas are up to date with code
40+
language: script
41+
entry: ./kafka/check-local-schemas.sh
42+
stages: [commit]
43+
files: ^(schemas/|src/main/scala/[^/]+/models/)
44+
pass_filenames: false

kafka/check-local-schemas.sh

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env bash
2+
3+
# Safety measures
4+
set -o errexit # Leave immediately if a command returns an error
5+
set -o nounset # Leave immediately if an unitialized value is used
6+
set -o pipefail # Leave immediately if a command fails in a pipe
7+
8+
[[ "${BASH_VERSION}" =~ ^(5|4\.[0-9]).* ]] && shopt -s inherit_errexit
9+
10+
SCRIPT_DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11+
12+
#####################################################################
13+
# Helper functions
14+
#####################################################################
15+
16+
error() {
17+
local msg="$1" exit_code="${2:-1}"
18+
echo "ERROR: ${msg}">&2
19+
exit "${exit_code}"
20+
}
21+
22+
check_binary_exists() {
23+
local binary="$1"
24+
command -v "${binary}" &>/dev/null || error "${binary} is required but it's not installed"
25+
}
26+
27+
clean_temporary_folder() {
28+
[[ -z "${generator_source_folder:-}" ]] || rm -rf "${generator_source_folder}"
29+
}
30+
31+
is_git_tracked() {
32+
git ls-files --error-unmatch "$1" &> /dev/null || return 1
33+
}
34+
35+
get_md5sum() {
36+
local file="$1"
37+
md5sum "${file}" | awk '{ print $1}'
38+
}
39+
40+
find_schema_class_file() {
41+
# The schema class heuristic is a bit hacky for now, we try to find a file
42+
# where a class has been annotated with the schema annotation
43+
# Otherwise we fallback on finding the filename containing the schema code
44+
# to end with Schema or is named InputModel
45+
# We might want to improve this in the future
46+
schema_class_file="$(grep -lr "^@schema" src | head -n 1 || return 0)"
47+
48+
if [[ -z "${schema_class_file}" ]]; then
49+
schema_class_file="$(find src -name "*Schema.scala" -o -name "InputModel.scala" | head -n 1 || return 0)"
50+
fi
51+
52+
echo "${schema_class_file}"
53+
54+
}
55+
56+
find_schema_class() {
57+
local schema_class_file="$1"
58+
schema_class_name="$(basename "${schema_class_file}" .scala)"
59+
schema_package="$(awk ' $1 == "package" { print $2 }' "${schema_class_file}")"
60+
61+
echo "${schema_package}.${schema_class_name}"
62+
}
63+
64+
find_avro_library() {
65+
local schema_class_file="$1"
66+
67+
if grep -q "import com.sksamuel.avro4s" "${schema_class_file}"; then
68+
echo "avro4s"
69+
elif grep -q "import vulcan" "${schema_class_file}"; then
70+
echo "vulcan"
71+
else
72+
error "Could not find any avro library import in ${schema_class_file}"
73+
fi
74+
75+
}
76+
77+
generate_schema_generator_code() {
78+
local schema_class="$1" schema_library="$2"
79+
80+
schema_class_name="${schema_class##*.}"
81+
schema_package="${schema_class%.*}"
82+
83+
# only schema class using vulcan are supported for now
84+
# but we might add support for avro4s in the future
85+
sed \
86+
-e "s/__SCHEMA_CLASS_NAME__/${schema_class_name}/g" \
87+
-e "s/__SCHEMA_PACKAGE__/${schema_package}/g" \
88+
"${SCRIPT_DIR}/generators/${schema_library^}SchemaGenerator.tmpl.scala"
89+
}
90+
91+
run_schema_generator_code() {
92+
local generator_code_file="$1" target_schema_file="$2"
93+
94+
generator_source_folder="$(dirname "${generator_code_file}")"
95+
96+
sbt_command=""
97+
# When fork is enabled, it seems we can't avoid all sbt logs to be printed
98+
# so we just disable it
99+
sbt_command+="set fork := false;"
100+
# We tell sbt to look for our generator code in the temporary folder in addition
101+
# to the existing source code, so we can run our generator code alongside the existing code
102+
# We need that as the generator code import the schema class
103+
sbt_command+="set Compile / unmanagedSourceDirectories += file(\"${generator_source_folder}\");"
104+
sbt_command+="runMain kp_pre_commit_hooks.generateSchemaFile ${target_schema_file}"
105+
106+
sbt -batch -error "${sbt_command}"
107+
# Add a last linefeed to make pre-commit end-of-line fixer happy
108+
echo >> "${target_schema_file}"
109+
}
110+
111+
#####################################################################
112+
# Main code
113+
#####################################################################
114+
115+
trap clean_temporary_folder EXIT
116+
117+
check_binary_exists "sbt"
118+
119+
target_schema_file="schemas/schema.avsc"
120+
121+
generator_source_folder="$(mktemp -d)"
122+
generator_code_file="${generator_source_folder}/SchemaGenerator.scala"
123+
124+
[[ ! -f "${target_schema_file}" ]] || checksum_before="$(get_md5sum "${target_schema_file}")"
125+
126+
schema_class_file="$(find_schema_class_file)"
127+
[[ -n "${schema_class_file}" ]] || error "Could not find any schema class file"
128+
129+
schema_class="$(find_schema_class "${schema_class_file}")"
130+
schema_library="$(find_avro_library "${schema_class_file}")"
131+
132+
generate_schema_generator_code "${schema_class}" "${schema_library}" > "${generator_code_file}"
133+
run_schema_generator_code "${generator_code_file}" "${target_schema_file}"
134+
135+
if ! is_git_tracked "${target_schema_file}"; then
136+
error "Schema file \"${target_schema_file}\" is not tracked by git. Please commit it."
137+
fi
138+
139+
checksum_after="$(get_md5sum "${target_schema_file}")"
140+
if [[ "${checksum_after}" != "${checksum_before:-}" ]]; then
141+
error "Schema file \"${target_schema_file}\" was missing or not consistent with code. Please commit the updated version."
142+
fi
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package kp_pre_commit_hooks
2+
3+
import org.apache.avro.Schema
4+
import __SCHEMA_PACKAGE__.__SCHEMA_CLASS_NAME__
5+
6+
import com.sksamuel.avro4s.AvroSchema
7+
8+
def schemaToString(schema: Schema): String = ujson.write(ujson.read(schema.toString()), indent = 4)
9+
10+
def writeSchema(schema: Schema, schemaFilename: String) = {
11+
Console.println(s"Writing ${schema.getName} schema to ${schemaFilename}")
12+
val schemaFilePath = os.Path(schemaFilename, os.pwd)
13+
os.write.over(schemaFilePath, schemaToString(schema), createFolders = true)
14+
}
15+
16+
@main def generateSchemaFile(schemaFilename: String) = {
17+
18+
val generatedSchema = AvroSchema[__SCHEMA_CLASS_NAME__]
19+
writeSchema(generatedSchema, schemaFilename)
20+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package kp_pre_commit_hooks
2+
3+
import org.apache.avro.Schema
4+
import __SCHEMA_PACKAGE__.__SCHEMA_CLASS_NAME__
5+
6+
import vulcan._
7+
8+
def schemaToString(schema: Schema): String = ujson.write(ujson.read(schema.toString()), indent = 4)
9+
10+
def writeSchema(schema: Schema, schemaFilename: String) = {
11+
Console.println(s"Writing ${schema.getName} schema to ${schemaFilename}")
12+
val schemaFilePath = os.Path(schemaFilename, os.pwd)
13+
os.write.over(schemaFilePath, schemaToString(schema), createFolders = true)
14+
}
15+
16+
@main def generateSchemaFile(schemaFilename: String) = {
17+
18+
val generatedSchema = summon[Codec[__SCHEMA_CLASS_NAME__]].schema
19+
20+
generatedSchema match {
21+
case Right(schema) => writeSchema(schema, schemaFilename)
22+
case Left(error) => {
23+
System.err.println(s"ERROR: Couldn't generate the schema because of the following error: ${error.toString}")
24+
System.exit(2)
25+
}
26+
}
27+
}

0 commit comments

Comments
 (0)