diff --git a/scripts/repository.lic b/scripts/repository.lic index 6c5cc62c4..f7c4782f9 100644 --- a/scripts/repository.lic +++ b/scripts/repository.lic @@ -10,9 +10,12 @@ game: any tags: core required: Lich > 5.0.1 - version: 2.72 + version: 2.73 changelog: + 2.73 (2026-05-12): + Validate scripts and map databases for non-ASCII characters before upload + Aborts upload with a per-character report (line, column, codepoint) when found 2.72 (2026-04-11): Fix to have DRT mapdb auto-update 2.71 (2026-02-02): @@ -1048,6 +1051,8 @@ module RepositoryTillmen filename, file_path = find_file(file) return false unless filename && file_path + return false unless check_non_ascii(file_path) + md5sum = Digest::MD5.file(file_path).to_s comments = File.open(file_path, 'rb') { |f| CommentParser.extract_comments(f.read(20_000)) } @@ -1087,6 +1092,8 @@ module RepositoryTillmen return false end + return false unless check_non_ascii(filename) + author = @options.author || Char.name password = @options.password || Settings["password:#{author.downcase.gsub(/[^a-z]/, '')}"] @@ -1264,6 +1271,48 @@ module RepositoryTillmen end end + # Scans a file for non-ASCII characters. Returns an Array of Hashes: + # { line: Integer, col: Integer, char: String, codepoint: Integer } + # Reads line-by-line to keep memory bounded for large files (e.g. map JSON). + # Lines that aren't valid UTF-8 fall back to a byte-wise scan so the bad + # bytes are still reported rather than crashing the check. + def non_ascii_violations(file_path) + violations = [] + File.open(file_path, 'rb') do |f| + f.each_line.with_index(1) do |line, line_no| + decoded = line.dup.force_encoding(Encoding::UTF_8) + if decoded.valid_encoding? + decoded.each_char.with_index(1) do |ch, col| + violations << { line: line_no, col: col, char: ch, codepoint: ch.ord } if ch.ord > 127 + end + else + col = 0 + line.each_byte do |b| + col += 1 + violations << { line: line_no, col: col, char: b.chr, codepoint: b } if b > 127 + end + end + end + end + violations + end + + # Reports any non-ASCII characters found in file_path. Returns true when the + # file is clean (safe to upload), false when violations were found (caller + # should abort). + def check_non_ascii(file_path) + violations = non_ascii_violations(file_path) + return true if violations.empty? + + noun = violations.size == 1 ? 'character' : 'characters' + echo "error: non-ASCII characters detected in #{file_path}" + echo "found #{violations.size} non-ASCII #{noun}; upload aborted" + violations.each do |v| + echo format(' line %d, col %d: %s (U+%04X)', v[:line], v[:col], v[:char], v[:codepoint]) + end + false + end + def determine_default_game if XMLData.game =~ /^GS/ 'gs'