@@ -5,20 +5,26 @@ set -o errexit # Leave immediately if a command returns an error
55set -o nounset # Leave immediately if an unitialized value is used
66set -o pipefail # Leave immediately if a command fails in a pipe
77
8+ shopt -s nullglob
9+
810[[ " ${BASH_VERSION} " =~ ^(5| 4\. [0-9]).* ]] && shopt -s inherit_errexit
911
10- SCRIPT_DIR=" $( cd -P " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
1112
1213# ####################################################################
1314# Helper functions
1415# ####################################################################
1516
16- error () {
17+ fatal () {
1718 local msg=" $1 " exit_code=" ${2:- 1} "
18- echo " ERROR : ${msg} " >&2
19+ echo " FATAL : ${msg} " >&2
1920 exit " ${exit_code} "
2021}
2122
23+ error () {
24+ local msg=" $1 "
25+ echo " ERROR: ${msg} " >&2
26+ }
27+
2228check_binary_exists () {
2329 local binary=" $1 "
2430 command -v " ${binary} " & > /dev/null || error " ${binary} is required but it's not installed"
@@ -36,99 +42,65 @@ get_repository_url() {
3642 git remote get-url origin
3743}
3844
39- get_md5sum () {
40- local file=" $1 "
41- md5sum " ${file} " | awk ' { print $1}'
45+ md5sum_files () {
46+ [[ -z " $* " ]] || md5sum " $@ "
4247}
4348
44- find_schema_class_file () {
45- # The schema class heuristic is a bit hacky for now, we try to find a file
46- # where a class has been annotated with the schema annotation
47- # Otherwise we fallback on finding the filename containing the schema code
48- # to end with Schema or is named InputModel
49- # We might want to improve this in the future
50- schema_class_file=" $( grep -lr " ^@schema" src | head -n 1 || return 0) "
51-
52- if [[ -z " ${schema_class_file} " ]]; then
53- schema_class_file=" $( find src -name " *Schema.scala" -o -name " InputModel.scala" | head -n 1 || return 0) "
54- fi
55-
56- echo " ${schema_class_file} "
5749
50+ get_md5sum () {
51+ local file=" $1 " checksums=" $2 "
52+ awk -v file=" ${file} " ' $2 == file { print $1 }' <<< " ${checksums}"
5853}
5954
60- find_schema_class () {
61- local schema_class_file=" $1 "
62- schema_class_name=" $( basename " ${schema_class_file} " .scala) "
63- schema_package=" $( awk ' $1 == "package" { print $2 }' " ${schema_class_file} " ) "
64-
65- echo " ${schema_package} .${schema_class_name} "
55+ detect_current_project_language () {
56+ if [[ -n " ${PROJECT_LANGUAGE:- } " ]]; then
57+ echo " ${PROJECT_LANGUAGE} "
58+ elif [[ -f " build.sbt" ]]; then
59+ echo " scala"
60+ else
61+ echo " unknown"
62+ fi
6663}
6764
68- is_library_used () {
69- local library=" $1 " candidate_class_file=" $2 "
65+ fix_end_of_file () {
66+ local file=" $1 "
67+ [[ $( tail -c1 " ${file} " ) == " " ]] || echo >> " ${file} "
68+ }
7069
71- # if the library is not directly found in the candidate class file
72- # we fallback on checking the build.sbt file itself
73- # This doesn't fully protect against from indirect library loading
74- # but it's a good enough heuristic for now
75- for candidate in " ${candidate_class_file} " build.sbt; do
76- if grep -q -E " [^#]*${library} " " ${candidate} " ; then
77- return 0
78- fi
70+ fix_kafka_schemas_end_of_file () {
71+ for schema_file in $( find_schema_files) ; do
72+ fix_end_of_file " ${schema_file} "
7973 done
80- return 1
8174}
8275
83- find_avro_library () {
84- local schema_class_file=" $1 "
85-
86- if is_library_used " com.sksamuel.avro4s" " ${schema_class_file} " ; then
87- echo " avro4s"
88- elif is_library_used " vulcan" " ${schema_class_file} " ; then
89- echo " vulcan"
90- else
91- error " Could not find any avro library import in ${schema_class_file} "
92- fi
93-
76+ find_schema_files () {
77+ find schemas -type f -name ' *.avsc' | sort
9478}
9579
96- generate_schema_generator_code () {
97- local schema_class=" $1 " schema_library=" $2 "
98-
99- schema_class_name=" ${schema_class##* .} "
100- schema_package=" ${schema_class% .* } "
101-
102- # only schema class using vulcan are supported for now
103- # but we might add support for avro4s in the future
104- sed \
105- -e " s/__SCHEMA_CLASS_NAME__/${schema_class_name} /g" \
106- -e " s/__SCHEMA_PACKAGE__/${schema_package} /g" \
107- " ${SCRIPT_DIR} /generators/${schema_library^} SchemaGenerator.tmpl.scala"
80+ find_obsolete_schema_files () {
81+ local date=" $1 "
82+ find schemas -type f -name ' *.avsc' -not -newermt " ${date} "
10883}
10984
110- run_schema_generator_code () {
111- local generator_code_file=" $1 " target_schema_file=" $2 "
112-
113- generator_source_folder=" $( dirname " ${generator_code_file} " ) "
114-
115- sbt_command=" "
116- # When fork is enabled, it seems we can't avoid all sbt logs to be printed
117- # so we just disable it
118- sbt_command+=" set fork := false;"
119- # We tell sbt to look for our generator code in the temporary folder in addition
120- # to the existing source code, so we can run our generator code alongside the existing code
121- # We need that as the generator code import the schema class
122- sbt_command+=" set Compile / unmanagedSourceDirectories += file(\" ${generator_source_folder} \" );"
123- # Dynamically add the required dependencies to the build.sbt file
124- sbt_command+=" set libraryDependencies += \" com.lihaoyi\" %% \" upickle\" % \" 3.1.3\" ;"
125- sbt_command+=" set libraryDependencies += \" com.lihaoyi\" %% \" os-lib\" % \" 0.9.1\" ;"
126-
127- sbt_command+=" runMain kp_pre_commit_hooks.generateSchemaFile ${target_schema_file} "
85+ generate_kafka_schemas_for_scala () {
86+ if ! sbt " tasks -V" | grep -qE " ^ *generateKafkaSchemas " ; then
87+ error " The project does not have a sbt generateKafkaSchemas task"
88+ fi
89+ sbt -batch -error " set fork := false; generateKafkaSchemas"
90+ }
12891
129- sbt -batch -error " ${sbt_command} "
130- # Add a last linefeed to make pre-commit end-of-line fixer happy
131- echo >> " ${target_schema_file} "
92+ run_schema_generation_task () {
93+ local language=" $1 "
94+ case " ${language} " in
95+ scala)
96+ check_binary_exists " sbt"
97+ generate_kafka_schemas_for_scala
98+ fix_kafka_schemas_end_of_file
99+ ;;
100+ * )
101+ error " Unsupported language: ${language} "
102+ ;;
103+ esac
132104}
133105
134106# ####################################################################
@@ -137,32 +109,42 @@ run_schema_generator_code() {
137109
138110trap clean_temporary_folder EXIT
139111
140- # We don't want to run on template repositories
141- [[ " $( get_repository_url) " != " git@github.com:Kpler/template-" * ]] || exit 0
112+ language=" $( detect_current_project_language) "
113+
114+ before_schema_generation=" $( date --date=' -1 second' +' %Y-%m-%d %H:%M:%S' ) "
142115
143- check_binary_exists " sbt"
116+ # shellcheck disable=SC2046
117+ schema_md5sum_before=" $( md5sum_files $( find_schema_files) ) "
144118
145- target_schema_file= " schemas/schema.avsc "
119+ run_schema_generation_task " ${language} "
146120
147- generator_source_folder= " $( mktemp -d ) "
148- generator_code_file= " ${generator_source_folder} /SchemaGenerator.scala "
121+ schema_files_generated= $( find_schema_files )
122+ [[ -n " ${schema_files_generated} " ]] || fatal " No schema files found were generated "
149123
150- [[ ! -f " ${target_schema_file} " ]] || checksum_before=" $( get_md5sum " ${target_schema_file} " ) "
124+ # shellcheck disable=SC2086
125+ schema_md5sum_after=" $( md5sum_files ${schema_files_generated} ) "
151126
152- schema_class_file=" $( find_schema_class_file) "
153- [[ -n " ${schema_class_file} " ]] || error " Could not find any schema class file"
127+ error_found=" false"
154128
155- schema_class=" $( find_schema_class " ${schema_class_file} " ) "
156- schema_library=" $( find_avro_library " ${schema_class_file} " ) "
129+ for schema_file in ${schema_files_generated} ; do
130+ if ! is_git_tracked " ${schema_file} " ; then
131+ error " Schema file \" ${schema_file} \" is not tracked by git. Please commit it."
132+ error_found=" true"
133+ fi
157134
158- generate_schema_generator_code " ${schema_class} " " ${schema_library} " > " ${generator_code_file} "
159- run_schema_generator_code " ${generator_code_file} " " ${target_schema_file} "
135+ checksum_after=" $( get_md5sum " ${schema_file} " " ${schema_md5sum_after} " ) "
136+ checksum_before=" $( get_md5sum " ${schema_file} " " ${schema_md5sum_before} " ) "
137+ if [[ " ${checksum_after} " != " ${checksum_before} " ]]; then
138+ error " Schema file \" ${schema_file} \" is not consistent with code. Please commit the updated version."
139+ error_found=" true"
140+ fi
141+ done
160142
161- if ! is_git_tracked " ${target_schema_file} " ; then
162- error " Schema file \" ${target_schema_file} \" is not tracked by git. Please commit it."
143+ obsolete_schemas_files=$( find_obsolete_schema_files " ${before_schema_generation} " )
144+ if [[ -n " ${obsolete_schemas_files} " ]]; then
145+ error " The following schema files seem obsolete: ${obsolete_schemas_files} . Please delete them."
146+ error_found=" true"
163147fi
164148
165- checksum_after=" $( get_md5sum " ${target_schema_file} " ) "
166- if [[ " ${checksum_after} " != " ${checksum_before:- } " ]]; then
167- error " Schema file \" ${target_schema_file} \" was missing or not consistent with code. Please commit the updated version."
168- fi
149+ [[ " ${error_found} " == " false" ]] || exit 1
150+
0 commit comments