From f897396afab2ea6d86f284bc68168669ad8b95fe Mon Sep 17 00:00:00 2001 From: "Daniel (dB.) Doubrovkine" Date: Sun, 12 Apr 2026 10:28:01 -0400 Subject: [PATCH] Fix ArgumentError: invalid byte sequence in UTF-8 when processing non-UTF-8 files Files with non-UTF-8 encoding (e.g., GB2312/GB18030 commonly used in Chinese codebases) would cause an ArgumentError when fui tried to match import patterns against file contents. Read files in binary mode and transcode to UTF-8, replacing invalid byte sequences. This allows fui to gracefully handle files with any encoding while still finding #import references. Fixes #37. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 1 + lib/fui/finder.rb | 29 +++++++++++++---------------- spec/fixtures/non_utf8/main.m | 3 +++ spec/fixtures/non_utf8/used_class.h | 8 ++++++++ spec/fui/finder_spec.rb | 15 +++++++++++++++ 5 files changed, 40 insertions(+), 16 deletions(-) create mode 100644 spec/fixtures/non_utf8/main.m create mode 100644 spec/fixtures/non_utf8/used_class.h diff --git a/CHANGELOG.md b/CHANGELOG.md index d82f97a..b2c2d91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ### 0.5.1 (Next) * [#45](https://github.com/dblock/fui/pull/45): Migrated from Travis CI to GitHub Actions with danger-pr-comment workflow - [@dblock](https://github.com/dblock). +* [#37](https://github.com/dblock/fui/issues/37): Fixed `ArgumentError: invalid byte sequence in UTF-8` when processing files with non-UTF-8 encoding - [@dblock](https://github.com/dblock). * Your contribution here. ### 0.5.0 (2018/12/19) diff --git a/lib/fui/finder.rb b/lib/fui/finder.rb index 41c009b..8a686ba 100644 --- a/lib/fui/finder.rb +++ b/lib/fui/finder.rb @@ -84,26 +84,23 @@ def global_imported(file_contents, header) end def process_code(references, path) - File.open(path) do |file| - yield path if block_given? - headers.each do |header| - filename_without_extension = File.basename(path, File.extname(path)) - file_contents = File.read(file) - global_import_exists = global_imported(file_contents, header) - local_import_exists = local_imported(file_contents, header) - references[header] << path if filename_without_extension != header.filename_without_extension && (local_import_exists || global_import_exists) - end + yield path if block_given? + file_contents = File.read(path, encoding: 'binary').encode('UTF-8', invalid: :replace, undef: :replace) + headers.each do |header| + filename_without_extension = File.basename(path, File.extname(path)) + global_import_exists = global_imported(file_contents, header) + local_import_exists = local_imported(file_contents, header) + references[header] << path if filename_without_extension != header.filename_without_extension && (local_import_exists || global_import_exists) end end def process_xml(references, path) - File.open(path) do |file| - yield path if block_given? - headers.each do |header| - filename_without_extension = File.basename(path, File.extname(path)) - check_xibs = !options['ignore-xib-files'] - references[header] << path if (check_xibs || filename_without_extension != header.filename_without_extension) && File.read(file).include?("customClass=\"#{header.filename_without_extension}\"") - end + yield path if block_given? + file_contents = File.read(path, encoding: 'binary').encode('UTF-8', invalid: :replace, undef: :replace) + headers.each do |header| + filename_without_extension = File.basename(path, File.extname(path)) + check_xibs = !options['ignore-xib-files'] + references[header] << path if (check_xibs || filename_without_extension != header.filename_without_extension) && file_contents.include?("customClass=\"#{header.filename_without_extension}\"") end end end diff --git a/spec/fixtures/non_utf8/main.m b/spec/fixtures/non_utf8/main.m new file mode 100644 index 0000000..11d5a54 --- /dev/null +++ b/spec/fixtures/non_utf8/main.m @@ -0,0 +1,3 @@ +#import "used_class.h" +// ÄãºÃ +- void main() {} diff --git a/spec/fixtures/non_utf8/used_class.h b/spec/fixtures/non_utf8/used_class.h new file mode 100644 index 0000000..5a77e0d --- /dev/null +++ b/spec/fixtures/non_utf8/used_class.h @@ -0,0 +1,8 @@ +// +// UsedClass.h +// + +#import + +@interface UsedClass +@end diff --git a/spec/fui/finder_spec.rb b/spec/fui/finder_spec.rb index 69e50c3..1ad66bf 100644 --- a/spec/fui/finder_spec.rb +++ b/spec/fui/finder_spec.rb @@ -179,4 +179,19 @@ end end end + context 'files with non-UTF-8 encoding' do + before :each do + @fixtures_dir = File.expand_path(File.join(__FILE__, '../../fixtures/non_utf8')) + end + describe '#references' do + it 'handles non-UTF-8 encoded files without raising an error' do + finder = Fui::Finder.new(@fixtures_dir) + expect { finder.references }.not_to raise_error + end + it 'finds references in files with non-UTF-8 encoding' do + finder = Fui::Finder.new(@fixtures_dir) + expect(Hash[finder.references.map { |k, v| [k.filename, v.count] }]).to eq('used_class.h' => 1) + end + end + end end