Skip to content

Commit 4103439

Browse files
committed
feat: add automatic discovery of the schema class
Plus some refactoring and improving of error messages.
1 parent 4160b00 commit 4103439

4 files changed

Lines changed: 137 additions & 32 deletions

File tree

.pre-commit-hooks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
- id: kafka-check-schemas
3939
name: Check if local Kafka Schemas are up to date with code
4040
language: script
41-
entry: ./kafka-check-local-schemas.sh
41+
entry: ./kafka/check-local-schemas.sh
4242
stages: [commit]
4343
files: ^(schemas/|src/main/scala/[^/]+/models/)
4444
pass_filenames: false

kafka-check-local-schemas.sh

Lines changed: 0 additions & 31 deletions
This file was deleted.

kafka/check-local-schemas.sh

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env bash
2+
3+
# Safety measures
4+
set -o errexit # Leave immediately if a command returns an error
5+
set -o nounset # Leave immediately if an unitialized value is used
6+
set -o pipefail # Leave immediately if a command fails in a pipe
7+
8+
[[ "${BASH_VERSION}" =~ ^(5|4\.[0-9]).* ]] && shopt -s inherit_errexit
9+
10+
SCRIPT_DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11+
12+
#####################################################################
13+
# Helper functions
14+
#####################################################################
15+
16+
error() {
17+
local msg="$1" exit_code="${2:-1}"
18+
echo "ERROR: ${msg}">&2
19+
exit "${exit_code}"
20+
}
21+
22+
check_binary_exists() {
23+
local binary="$1"
24+
command -v "${binary}" &>/dev/null || error "${binary} is required but it's not installed"
25+
}
26+
27+
clean_temporary_folder() {
28+
[[ -z "${generator_source_folder:-}" ]] || rm -rf "${generator_source_folder}"
29+
}
30+
31+
is_git_tracked() {
32+
git ls-files --error-unmatch "$1" &> /dev/null || return 1
33+
}
34+
35+
get_md5sum() {
36+
local file="$1"
37+
md5sum "${file}" | awk '{ print $1}'
38+
}
39+
40+
41+
find_schema_class() {
42+
# The schema class heuristic is a bit hacky for now, we just expect
43+
# the filename containing the schema code to end with Schema or is named InputModel
44+
# We might want to improve this in the future
45+
schema_class_file="$(find src -name "*Schema.scala" -o -name "InputModel.scala" | head -n 1)"
46+
schema_class_name="$(basename "${schema_class_file}" .scala)"
47+
schema_package="$(awk ' $1 == "package" { print $2 }' "${schema_class_file}")"
48+
49+
echo "${schema_package}.${schema_class_name}"
50+
}
51+
52+
generate_schema_generator_code() {
53+
local schema_class="$1"
54+
55+
schema_class_name="${schema_class##*.}"
56+
schema_package="${schema_class%.*}"
57+
58+
# only schema class using vulcan are supported for now
59+
# but we might add support for avro4s in the future
60+
sed \
61+
-e "s/__SCHEMA_CLASS_NAME__/${schema_class_name}/g" \
62+
-e "s/__SCHEMA_PACKAGE__/${schema_package}/g" \
63+
"${SCRIPT_DIR}/generators/VulcanSchemaGenerator.tmpl.scala"
64+
}
65+
66+
run_schema_generator_code() {
67+
local generator_code_file="$1" target_schema_file="$2"
68+
69+
generator_source_folder="$(dirname "${generator_code_file}")"
70+
71+
sbt_command=""
72+
# When fork is enabled, it seems we can't avoid all sbt logs to be printed
73+
# so we just disable it
74+
sbt_command+="set fork := false;"
75+
# We tell sbt to look for our generator code in the temporary folder in addition
76+
# to the existing source code, so we can run our generator code alongside the existing code
77+
# We need that as the generator code import the schema class
78+
sbt_command+="set Compile / unmanagedSourceDirectories += file(\"${generator_source_folder}\");"
79+
sbt_command+="runMain kp_pre_commit_hooks.generateSchemaFile ${target_schema_file}"
80+
81+
sbt -batch -error "${sbt_command}"
82+
}
83+
84+
#####################################################################
85+
# Main code
86+
#####################################################################
87+
88+
trap clean_temporary_folder EXIT
89+
90+
check_binary_exists "sbt"
91+
92+
target_schema_file="schemas/schema.avsc"
93+
94+
generator_source_folder="$(mktemp -d)"
95+
generator_code_file="${generator_source_folder}/SchemaGenerator.scala"
96+
97+
[[ ! -f "${target_schema_file}" ]] || checksum_before="$(get_md5sum "${target_schema_file}")"
98+
99+
generate_schema_generator_code "$(find_schema_class)" > "${generator_code_file}"
100+
run_schema_generator_code "${generator_code_file}" "${target_schema_file}"
101+
102+
if ! is_git_tracked "${target_schema_file}"; then
103+
error "Schema file \"${target_schema_file}\" is not tracked by git. Please commit it."
104+
fi
105+
106+
checksum_after="$(get_md5sum "${target_schema_file}")"
107+
if [[ "${checksum_after}" != "${checksum_before:-}" ]]; then
108+
error "Schema file \"${target_schema_file}\" was not consistent with code. Please commit the updated version."
109+
fi
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package kp_pre_commit_hooks
2+
3+
import org.apache.avro.Schema
4+
import __SCHEMA_PACKAGE__.__SCHEMA_CLASS_NAME__
5+
6+
import vulcan._
7+
8+
def schemaToString(schema: Schema): String = ujson.write(ujson.read(schema.toString()), indent = 4)
9+
10+
def writeSchema(schema: Schema, schemaFilename: String) = {
11+
Console.println(s"Writing ${schema.getName} schema to ${schemaFilename}")
12+
val schemaFilePath = os.Path(schemaFilename, os.pwd)
13+
os.write.over(schemaFilePath, schemaToString(schema), createFolders = true)
14+
}
15+
16+
@main def generateSchemaFile(schemaFilename: String) = {
17+
18+
val generatedSchema = summon[Codec[__SCHEMA_CLASS_NAME__]].schema
19+
20+
generatedSchema match {
21+
case Right(schema) => writeSchema(schema, schemaFilename)
22+
case Left(error) => {
23+
System.err.println(s"ERROR: Couldn't generate the schema because of the following error: ${error.toString}")
24+
System.exit(2)
25+
}
26+
}
27+
}

0 commit comments

Comments
 (0)