diff --git a/.github/workflows/_publish-code.yml b/.github/workflows/_publish-code.yml index 00ff4db8..31a65647 100644 --- a/.github/workflows/_publish-code.yml +++ b/.github/workflows/_publish-code.yml @@ -5,14 +5,13 @@ on: workflow_dispatch: jobs: - linting: + publish: name: Publish to Rubygems runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 with: submodules: recursive - - name: Set up Ruby uses: ruby/setup-ruby@v1 with: @@ -28,10 +27,13 @@ jobs: echo -e "---\n:rubygems_api_key: ${RUBYGEMS_API_KEY}" > ~/.gem/credentials chmod 600 ~/.gem/credentials - - name: Build the Gem + - name: Build the Gems run: | - gem build --strict --output mindee.gem + gem build mindee.gemspec --strict --output mindee.gem + gem build mindee-lite.gemspec --strict --output mindee-lite.gem - - name: Publish the Gem + - name: Publish the Gems run: | + # Push both generated files to Rubygems gem push mindee.gem + gem push mindee-lite.gem diff --git a/.github/workflows/_test-integration-lite.yml b/.github/workflows/_test-integration-lite.yml new file mode 100644 index 00000000..f283269a --- /dev/null +++ b/.github/workflows/_test-integration-lite.yml @@ -0,0 +1,50 @@ +# +# Run integration tests. +# +name: Integration Tests - Minimal Dependencies + +on: + workflow_call: + workflow_dispatch: + +env: + MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_LOG_LEVEL: DEBUG + MINDEE_GEM_NAME: mindee-lite + +jobs: + integration-tests: + name: Run Integration Tests + timeout-minutes: 30 + runs-on: ${{ matrix.os }} + strategy: + max-parallel: 3 + matrix: + os: + - "ubuntu-24.04" + - "macos-latest" + ruby: + - "4.0" + steps: + - uses: actions/checkout@v5 + with: + submodules: recursive + + - name: set up Ruby ${{ matrix.ruby }} + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + + - name: Run Rspec for integration tests + run: | + bundle exec rake integration diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integration.yml similarity index 98% rename from .github/workflows/_test-integrations.yml rename to .github/workflows/_test-integration.yml index 0a75c361..32e06f69 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integration.yml @@ -1,7 +1,7 @@ # # Run integration tests. # -name: Integration Tests +name: Integration Tests - All Dependencies on: workflow_call: diff --git a/.github/workflows/_test-units-lite.yml b/.github/workflows/_test-units-lite.yml new file mode 100644 index 00000000..80277c9c --- /dev/null +++ b/.github/workflows/_test-units-lite.yml @@ -0,0 +1,40 @@ +# +# Run lite unit tests. +# +name: Unit Tests - Minimal Dependencies + +on: + workflow_call: + +env: + MINDEE_GEM_NAME: mindee-lite + +jobs: + tests: + name: Run Unit Tests + timeout-minutes: 30 + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - "ubuntu-24.04" + - "macos-latest" + ruby: + - "3.2" + - "4.0" + steps: + - uses: actions/checkout@v5 + with: + submodules: recursive + + - name: set up Ruby ${{ matrix.ruby }} + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + + - name: Run Rspec + env: + MINDEE_LOG_LEVEL: DEBUG + run: | + bundle exec rake spec diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index 58811cf4..5769e6e1 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -1,7 +1,7 @@ # -# Run unit tests. +# Run full unit tests. # -name: Tests +name: Unit Tests - All Dependencies on: workflow_call: @@ -15,7 +15,6 @@ jobs: matrix: os: - "ubuntu-24.04" - - "ubuntu-22.04" - "macos-latest" ruby: - "3.2" diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 656762ad..5f5edf04 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -10,19 +10,27 @@ permissions: jobs: static-analysis: uses: ./.github/workflows/_static-analysis.yml - test-units: + test-unit: uses: ./.github/workflows/_test-units.yml needs: static-analysis secrets: inherit - test-integrations: - uses: ./.github/workflows/_test-integrations.yml - needs: test-units + test-unit-lite: + uses: ./.github/workflows/_test-units.yml + needs: static-analysis + secrets: inherit + test-integration: + uses: ./.github/workflows/_test-integration.yml + needs: test-unit + secrets: inherit + test-integration-lite: + uses: ./.github/workflows/_test-integration-lite.yml + needs: test-unit-lite secrets: inherit test-smoke: uses: ./.github/workflows/_test-smoke.yml - needs: test-units + needs: test-unit secrets: inherit test-cli: uses: ./.github/workflows/_test-cli.yml - needs: test-units + needs: test-unit secrets: inherit diff --git a/Gemfile b/Gemfile index 122f66f4..e7cc339d 100644 --- a/Gemfile +++ b/Gemfile @@ -3,4 +3,12 @@ source 'https://rubygems.org' # Specify your gem's dependencies in mindee.gemspec -gemspec + +gemspec name: ENV.fetch('MINDEE_GEM_NAME', 'mindee') + +group :development, :test do + gem 'openssl', '~> 4.0' + gem 'prism', '~> 1.3' + gem 'rake', '~> 13.3' + gem 'rspec', '~> 3.13' +end diff --git a/Rakefile b/Rakefile index d91990c6..6c296cca 100644 --- a/Rakefile +++ b/Rakefile @@ -2,33 +2,39 @@ require 'rake' require 'rspec/core/rake_task' -require 'yard' + +is_lite_mode = ENV.fetch('MINDEE_GEM_NAME', 'mindee') == 'mindee-lite' begin require 'bundler/setup' - Bundler::GemHelper.install_tasks + require 'bundler/gem_helper' + Bundler::GemHelper.install_tasks(name: ENV.fetch('MINDEE_GEM_NAME', 'mindee')) rescue LoadError puts 'although not required, bundler is recommended for running the tests' end task default: :spec +exclusion_opts = is_lite_mode ? ['--tag', '~all_deps'] : [] +RSpec::Core::RakeTask.new(:spec) do |t| + t.rspec_opts = exclusion_opts +end +unless is_lite_mode + require 'yard' + desc 'Generate documentation' + YARD::Rake::YardocTask.new(:doc) do |task| + task.files = ['lib/**/*.rb'] + end -RSpec::Core::RakeTask.new(:spec) - -desc 'Generate documentation' -YARD::Rake::YardocTask.new(:doc) do |task| - task.files = ['lib/**/*.rb'] + Rake::Task[:doc].enhance do + FileUtils.cp_r( + File.join('docs', 'code_samples'), + File.join('docs', '_build') + ) + end end desc 'Run integration tests' RSpec::Core::RakeTask.new(:integration) do |t| t.pattern = 'spec/**/*_integration.rb' - t.rspec_opts = ['--require', 'integration_helper'] -end - -Rake::Task[:doc].enhance do - FileUtils.cp_r( - File.join('docs', 'code_samples'), - File.join('docs', '_build') - ) + t.rspec_opts = ['--require', 'integration_helper'] + exclusion_opts end diff --git a/examples/auto_invoice_splitter_extraction.rb b/examples/auto_invoice_splitter_extraction.rb index 9056272c..d82bc601 100644 --- a/examples/auto_invoice_splitter_extraction.rb +++ b/examples/auto_invoice_splitter_extraction.rb @@ -6,7 +6,7 @@ def invoice_splitter_auto_extraction(file_path) mindee_client = Mindee::V1::Client.new(api_key: 'my-api-key') input_source = mindee_client.source_from_path(file_path) - if input_source.pdf? && input_source.count_pages > 1 + if input_source.pdf? && input_source.page_count > 1 parse_multi_page(mindee_client, input_source) else parse_single_page(mindee_client, input_source) diff --git a/lib/mindee.rb b/lib/mindee.rb index bf8c6b3b..b6f6f969 100644 --- a/lib/mindee.rb +++ b/lib/mindee.rb @@ -6,6 +6,10 @@ require 'mindee/v2' module Mindee + # Dependency management + module Dependency + end + # Mindee internal error module. module Error end diff --git a/lib/mindee/dependency.rb b/lib/mindee/dependency.rb new file mode 100644 index 00000000..6b3ce528 --- /dev/null +++ b/lib/mindee/dependency.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Mindee + # Centralized check for optional heavy dependencies + module Dependency + def self.check_all_dependencies + require 'origami' + require 'mini_magick' + require 'pdf-reader' + true + rescue LoadError + false + end + + @all_deps_available = check_all_dependencies + + def self.all_deps_available? + check_all_dependencies + end + + def self.require_all_deps! + raise LoadError, MINDEE_DEPENDENCIES_LOAD_ERROR unless all_deps_available? + end + + MINDEE_DEPENDENCIES_LOAD_ERROR = 'Attempted to load Mindee PDF/Image tools without required dependencies. ' \ + "If you need to process local files, please replace the 'mindee-lite' gem " \ + "with the standard 'mindee' gem in your Gemfile." + end +end diff --git a/lib/mindee/image/image_extractor.rb b/lib/mindee/image/image_extractor.rb index 98f68537..e8231a36 100644 --- a/lib/mindee/image/image_extractor.rb +++ b/lib/mindee/image/image_extractor.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +Mindee::Dependency.require_all_deps! require 'mini_magick' require 'origami' require 'stringio' diff --git a/lib/mindee/input/sources/local_input_source.rb b/lib/mindee/input/sources/local_input_source.rb index 7e64c41a..ebaabf1b 100644 --- a/lib/mindee/input/sources/local_input_source.rb +++ b/lib/mindee/input/sources/local_input_source.rb @@ -4,8 +4,9 @@ require 'marcel' require 'fileutils' -require_relative '../../pdf' -require_relative '../../image' +require_relative '../../dependency' +require_relative '../../pdf' if Mindee::Dependency.all_deps_available? +require_relative '../../image' if Mindee::Dependency.all_deps_available? module Mindee module Input @@ -142,6 +143,9 @@ def write_to_file(path) # Defaults to one for images. # @return [Integer] def page_count + unless Mindee::Dependency.all_deps_available? + raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR + end return 1 unless pdf? @io_stream.seek(0) @@ -149,14 +153,6 @@ def page_count pdf_processor.pages.size end - # Returns the page count for a document. - # Defaults to one for images. - # @return [Integer] - # @deprecated Use {#page_count} instead. - def count_pages - page_count - end - # Compresses the file, according to the provided info. # @param [Integer] quality Quality of the output file. # @param [Integer, nil] max_width Maximum width (Ignored for PDFs). @@ -167,6 +163,10 @@ def count_pages # @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or # not. Needs force_source_text to work. def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true) + unless Mindee::Dependency.all_deps_available? + raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR + end + buffer = if pdf? Mindee::PDF::PDFCompressor.compress_pdf( @io_stream, @@ -189,6 +189,10 @@ def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: f # Checks whether the file has source text if it is a pdf. `false` otherwise # @return [bool] `true` if the file is a PDF and has source text. def source_text? + unless Mindee::Dependency.all_deps_available? + raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR + end + Mindee::PDF::PDFTools.source_text?(@io_stream) end end diff --git a/lib/mindee/pdf/pdf_compressor.rb b/lib/mindee/pdf/pdf_compressor.rb index 33a105e0..c93f2369 100644 --- a/lib/mindee/pdf/pdf_compressor.rb +++ b/lib/mindee/pdf/pdf_compressor.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true +Mindee::Dependency.require_all_deps! require 'pdf-reader' + # Shorthand for pdf-reader's PDF namespace, to avoid mixups with the local Origami fork. PDFReader = PDF diff --git a/lib/mindee/pdf/pdf_extractor.rb b/lib/mindee/pdf/pdf_extractor.rb index 3e7f760e..259bec80 100644 --- a/lib/mindee/pdf/pdf_extractor.rb +++ b/lib/mindee/pdf/pdf_extractor.rb @@ -7,6 +7,10 @@ module PDF class PDFExtractor # @param local_input [Mindee::Input::Source::LocalInputSource] def initialize(local_input) + unless Mindee::Dependency.all_deps_available? + raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR + end + @filename = local_input.filename if local_input.pdf? @source_pdf = local_input.io_stream diff --git a/lib/mindee/pdf/pdf_processor.rb b/lib/mindee/pdf/pdf_processor.rb index 77841ab8..a40a79fe 100644 --- a/lib/mindee/pdf/pdf_processor.rb +++ b/lib/mindee/pdf/pdf_processor.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +Mindee::Dependency.require_all_deps! require 'origami' require_relative 'pdf_tools' diff --git a/lib/mindee/pdf/pdf_tools.rb b/lib/mindee/pdf/pdf_tools.rb index e2ee8bf6..8a2d851e 100644 --- a/lib/mindee/pdf/pdf_tools.rb +++ b/lib/mindee/pdf/pdf_tools.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +Mindee::Dependency.require_all_deps! require 'origami' module Mindee diff --git a/lib/mindee/v1.rb b/lib/mindee/v1.rb index 2ffa5b17..9214cafb 100644 --- a/lib/mindee/v1.rb +++ b/lib/mindee/v1.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require_relative 'v1/client' -require_relative 'v1/extraction' +require_relative 'v1/extraction' if Mindee::Dependency.all_deps_available? require_relative 'v1/http' require_relative 'v1/parsing' require_relative 'v1/product' diff --git a/lib/mindee/v1/extraction/multi_receipts_extractor.rb b/lib/mindee/v1/extraction/multi_receipts_extractor.rb index fa3dbb6b..fad71dbb 100644 --- a/lib/mindee/v1/extraction/multi_receipts_extractor.rb +++ b/lib/mindee/v1/extraction/multi_receipts_extractor.rb @@ -17,7 +17,7 @@ def self.extract_receipts(input_source, inference) 'No possible receipts candidates found for Multi-Receipts extraction.' end - (0...input_source.count_pages).each do |page_id| + (0...input_source.page_count).each do |page_id| receipt_positions = inference.pages[page_id].prediction.receipts.map(&:bounding_box) images.concat( Mindee::Image::ImageExtractor.extract_multiple_images_from_source(input_source, page_id + 1, diff --git a/lib/mindee/v2.rb b/lib/mindee/v2.rb index bdb3fcc3..94303ea9 100644 --- a/lib/mindee/v2.rb +++ b/lib/mindee/v2.rb @@ -2,6 +2,6 @@ require_relative 'v2/client' require_relative 'v2/http' -require_relative 'v2/file_operation' +require_relative 'v2/file_operation' if Mindee::Dependency.all_deps_available? require_relative 'v2/parsing' require_relative 'v2/product' diff --git a/lib/mindee/v2/client.rb b/lib/mindee/v2/client.rb index 12d6d2d6..24cbf4c1 100644 --- a/lib/mindee/v2/client.rb +++ b/lib/mindee/v2/client.rb @@ -10,12 +10,12 @@ module Mindee module V2 # Mindee V2 API Client. class Client - # @return [V2::HTTP::MindeeApi] + # @return [V2::HTTP::MindeeApiV2] private attr_reader :mindee_api # @param api_key [String] def initialize(api_key: '') - @mindee_api = Mindee::V2::HTTP::MindeeApi.new(api_key: api_key) + @mindee_api = Mindee::V2::HTTP::MindeeApiV2.new(api_key: api_key) end # Retrieves a result from a given queue or URL to the result. diff --git a/lib/mindee/v2/http.rb b/lib/mindee/v2/http.rb index 7627f044..45d7948e 100644 --- a/lib/mindee/v2/http.rb +++ b/lib/mindee/v2/http.rb @@ -1,4 +1,4 @@ # frozen_string_literal: true -require_relative 'http/api_settings' -require_relative 'http/mindee_api' +require_relative 'http/api_v2_settings' +require_relative 'http/mindee_api_v2' diff --git a/lib/mindee/v2/http/api_settings.rb b/lib/mindee/v2/http/api_v2_settings.rb similarity index 98% rename from lib/mindee/v2/http/api_settings.rb rename to lib/mindee/v2/http/api_v2_settings.rb index d689fd7c..1b3e4864 100644 --- a/lib/mindee/v2/http/api_settings.rb +++ b/lib/mindee/v2/http/api_v2_settings.rb @@ -6,7 +6,7 @@ module Mindee module V2 module HTTP # API client for version 2. - class ApiSettings + class ApiV2Settings # V2 API key's default environment key name. MINDEE_V2_API_KEY_ENV_NAME = 'MINDEE_V2_API_KEY' # V2 API key's default value. diff --git a/lib/mindee/v2/http/mindee_api.rb b/lib/mindee/v2/http/mindee_api_v2.rb similarity index 98% rename from lib/mindee/v2/http/mindee_api.rb rename to lib/mindee/v2/http/mindee_api_v2.rb index 9ccbed79..29631f18 100644 --- a/lib/mindee/v2/http/mindee_api.rb +++ b/lib/mindee/v2/http/mindee_api_v2.rb @@ -8,13 +8,13 @@ module Mindee module V2 module HTTP # API client for version 2. - class MindeeApi - # @return [ApiSettings] + class MindeeApiV2 + # @return [ApiV2Settings] attr_reader :settings # @param api_key [String, nil] def initialize(api_key: nil) - @settings = ApiSettings.new(api_key: api_key) + @settings = ApiV2Settings.new(api_key: api_key) end # Sends a file to the queue. diff --git a/lib/mindee/version.rb b/lib/mindee/version.rb index ab6545c5..16c9cec1 100644 --- a/lib/mindee/version.rb +++ b/lib/mindee/version.rb @@ -3,7 +3,7 @@ # Mindee module Mindee # Current version. - VERSION = '5.0.0-alpha.1' + VERSION = '5.0.0.alpha1' # Finds and return the current platform. # @return [Symbol, Hash[String | Symbol, Regexp], Nil?] diff --git a/mindee-lite.gemspec b/mindee-lite.gemspec new file mode 100644 index 00000000..5d894ea8 --- /dev/null +++ b/mindee-lite.gemspec @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require_relative 'lib/mindee/version' + +Gem::Specification.new do |spec| + spec.name = 'mindee-lite' + spec.version = Mindee::VERSION + spec.authors = ['Mindee, SA'] + spec.email = ['opensource@mindee.co'] + + spec.summary = 'Mindee API Helper Library for Ruby (Lite)' + spec.description = "Quickly and easily connect to Mindee's API services using Ruby. This lite version omits " \ + 'heavy image and PDF processing dependencies.' + spec.homepage = 'https://github.com/mindee/mindee-api-ruby' + spec.license = 'MIT' + + spec.metadata['homepage_uri'] = 'https://mindee.com/' + spec.metadata['source_code_uri'] = 'https://github.com/mindee/mindee-api-ruby' + spec.metadata['changelog_uri'] = 'https://github.com/mindee/mindee-api-ruby/blob/main/CHANGELOG.md' + spec.metadata['rubygems_mfa_required'] = 'true' + + spec.files = Dir.chdir(File.expand_path(__dir__)) do + `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(.github|spec|features)/}) } + end + spec.bindir = 'bin' + spec.executables = Dir.children('bin') + .select { |f| File.file?(File.join('bin', f)) } + .reject { |f| f == 'products.rb' } + spec.require_paths = ['lib'] + + spec.required_ruby_version = Gem::Requirement.new('>= 3.2') + + spec.add_dependency 'base64', '~> 0.3' + spec.add_dependency 'logger', '~> 1.7' + spec.add_dependency 'marcel', '~> 1.1' +end diff --git a/mindee.gemspec b/mindee.gemspec index 090f3005..ed46b531 100644 --- a/mindee.gemspec +++ b/mindee.gemspec @@ -30,17 +30,14 @@ Gem::Specification.new do |spec| spec.required_ruby_version = Gem::Requirement.new('>= 3.2') spec.add_dependency 'base64', '~> 0.3' - spec.add_dependency 'bundle-audit', '~> 0.2.0' + spec.add_dependency 'logger', '~> 1.7' spec.add_dependency 'marcel', '~> 1.1' spec.add_dependency 'mini_magick', '>= 4', '< 6' spec.add_dependency 'origamindee', '~> 4.0' spec.add_dependency 'pdf-reader', '~> 2.15' - spec.add_development_dependency 'openssl', '~> 4.0' - spec.add_development_dependency 'prism', '~> 1.3' - spec.add_development_dependency 'rake', '~> 13.3' + spec.add_development_dependency 'bundle-audit', '~> 0.2.0' spec.add_development_dependency 'rbs', '~> 3.10' - spec.add_development_dependency 'rspec', '~> 3.13' spec.add_development_dependency 'rubocop', '~> 1.86' spec.add_development_dependency 'steep', '~> 1.10' spec.add_development_dependency 'yard', '~> 0.9' diff --git a/sig/mindee/dependency.rbs b/sig/mindee/dependency.rbs new file mode 100644 index 00000000..cb1da820 --- /dev/null +++ b/sig/mindee/dependency.rbs @@ -0,0 +1,13 @@ +module Mindee + module Dependency + MINDEE_DEPENDENCIES_LOAD_ERROR: String + + self.@all_deps_available: bool + + def self.check_all_dependencies: -> bool + + def self.all_deps_available?: -> bool + def self.require_all_deps!: -> void + + end +end diff --git a/sig/mindee/input/sources/local_input_source.rbs b/sig/mindee/input/sources/local_input_source.rbs index 76ea2854..103ae5d8 100644 --- a/sig/mindee/input/sources/local_input_source.rbs +++ b/sig/mindee/input/sources/local_input_source.rbs @@ -20,7 +20,6 @@ module Mindee def process_pdf: (PageOptions) -> StringIO? def read_contents: (?close: bool) -> [String?, Hash[:filename, String]] def page_count: -> Integer - def count_pages: -> Integer def write_to_file: (String?) -> void def compress!: (?quality: Integer, ?max_width: Integer?, ?max_height: Integer?, ?force_source_text: bool, ?disable_source_text: bool) -> Integer def source_text?: -> bool? diff --git a/sig/mindee/v2/client.rbs b/sig/mindee/v2/client.rbs index d4dd683b..143e185f 100644 --- a/sig/mindee/v2/client.rbs +++ b/sig/mindee/v2/client.rbs @@ -5,7 +5,7 @@ OTS_OWNER: String module Mindee module V2 class Client - private attr_reader mindee_api: V2::HTTP::MindeeApi + private attr_reader mindee_api: V2::HTTP::MindeeApiV2 def logger: () -> Logger diff --git a/sig/mindee/v2/http/api_settings.rbs b/sig/mindee/v2/http/api_v2_settings.rbs similarity index 90% rename from sig/mindee/v2/http/api_settings.rbs rename to sig/mindee/v2/http/api_v2_settings.rbs index 2c4cc2b7..e55580ae 100644 --- a/sig/mindee/v2/http/api_settings.rbs +++ b/sig/mindee/v2/http/api_v2_settings.rbs @@ -1,8 +1,8 @@ -# lib/mindee/v2/http/api_settings.rb +# lib/mindee/v2/http/api_v2_settings.rb module Mindee module V2 module HTTP - class ApiSettings + class ApiV2Settings MINDEE_V2_API_KEY_ENV_NAME: String MINDEE_V2_API_KEY_DEFAULT: String? MINDEE_V2_BASE_URL_ENV_NAME: String diff --git a/sig/mindee/v2/http/mindee_api.rbs b/sig/mindee/v2/http/mindee_api_v2.rbs similarity index 93% rename from sig/mindee/v2/http/mindee_api.rbs rename to sig/mindee/v2/http/mindee_api_v2.rbs index e6d77cec..9cbbfe71 100644 --- a/sig/mindee/v2/http/mindee_api.rbs +++ b/sig/mindee/v2/http/mindee_api_v2.rbs @@ -1,4 +1,4 @@ -# lib/mindee/v2/http/mindee_api.rbs +# lib/mindee/v2/http/mindee_api_v2.rbs module Mindee module V2 module HTTP @@ -14,8 +14,8 @@ module Mindee def params_type: () -> singleton(Input::BaseParameters) end - class MindeeApi - attr_reader settings: ApiSettings + class MindeeApiV2 + attr_reader settings: ApiV2Settings def initialize: (?api_key: String?) -> void diff --git a/spec/dependency_spec.rb b/spec/dependency_spec.rb new file mode 100644 index 00000000..6f80cbdd --- /dev/null +++ b/spec/dependency_spec.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +require 'mindee' +describe Mindee::Dependency do + before(:each) do + if Mindee::Dependency.instance_variable_defined?(:@all_deps_available) + Mindee::Dependency.remove_instance_variable(:@all_deps_available) + end + end + + describe '.all_deps_available?' do + context 'when evaluating the full mindee gem' do + before do + allow(Mindee::Dependency).to receive(:require).and_return(true) + + Mindee::Dependency.instance_variable_set(:@all_deps_available, Mindee::Dependency.check_all_dependencies) + end + + it 'returns true' do + expect(Mindee::Dependency.all_deps_available?).to be true + end + end + + context 'when evaluating the mindee-lite gem' do + before do + allow(Mindee::Dependency).to receive(:require).and_raise(LoadError) + + Mindee::Dependency.instance_variable_set(:@all_deps_available, Mindee::Dependency.check_all_dependencies) + end + + it 'returns false' do + expect(Mindee::Dependency.all_deps_available?).to be false + end + end + end +end + +describe 'Mindee PDF Module Loading' do + let(:pdf_tools_module_path) { File.expand_path('../lib/mindee/pdf/pdf_tools.rb', __dir__) } + + context 'when initialized in a mindee-lite environment' do + before do + allow(Mindee::Dependency).to receive(:all_deps_available?).and_return(false) + end + + it 'raises a LoadError with the lite exception message' do + expect do + load pdf_tools_module_path + end.to raise_error(LoadError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR) + end + end + + context 'when initialized in a full mindee environment' do + around do |example| + original_require = Kernel.instance_method(:require) + + Kernel.define_method(:require) do |name| + ['origami', 'mini_magick', 'pdf-reader'].include?(name) || original_require.bind_call(self, name) + end + + begin + example.run + ensure + Kernel.define_method(:require, original_require) # Restore original require + end + end + + before do + allow(Mindee::Dependency).to receive(:all_deps_available?).and_return(true) + end + + it 'loads the module successfully without raising errors' do + expect do + load pdf_tools_module_path + end.not_to raise_error + end + end +end diff --git a/spec/image/extracted_image_spec.rb b/spec/image/extracted_image_spec.rb index 1fbba185..82ddbda3 100644 --- a/spec/image/extracted_image_spec.rb +++ b/spec/image/extracted_image_spec.rb @@ -3,10 +3,16 @@ require 'mindee' require 'pathname' require 'fileutils' -require 'mini_magick' require_relative '../data' -describe Mindee::Image::ExtractedImage do +describe 'Mindee::Image::ExtractedImage', :all_deps do + require 'mini_magick' if Mindee::Dependency.all_deps_available? + # Workaround for mindee-lite + if Mindee::Dependency.all_deps_available? + let(:described_class) do + Mindee::Image::ExtractedImage + end + end let(:file_path) do File.join(V1_DATA_DIR, 'products', 'invoices', 'default_sample.jpg') end diff --git a/spec/image/image_compressor_spec.rb b/spec/image/image_compressor_spec.rb index 342dff08..dfb4ad55 100644 --- a/spec/image/image_compressor_spec.rb +++ b/spec/image/image_compressor_spec.rb @@ -4,7 +4,7 @@ require_relative '../data' -describe Mindee::Image::ImageCompressor do +describe Mindee::Image::ImageCompressor, :all_deps do describe 'Image Quality Compression' do let(:input_receipt_path) { "#{FILE_TYPES_DIR}/receipt.jpg" } let(:output_dir) { "#{ROOT_DATA_DIR}/output/" } diff --git a/spec/image/image_extractor_spec.rb b/spec/image/image_extractor_spec.rb index 98b2f5e7..a22030a6 100644 --- a/spec/image/image_extractor_spec.rb +++ b/spec/image/image_extractor_spec.rb @@ -4,7 +4,7 @@ require 'mindee/input/sources' require_relative '../data' -describe Mindee::Image do +describe Mindee::Image, :all_deps do include Mindee::Image let(:barcode_path) do diff --git a/spec/image/image_utils_spec.rb b/spec/image/image_utils_spec.rb index 7304a4a3..1f2f94a0 100644 --- a/spec/image/image_utils_spec.rb +++ b/spec/image/image_utils_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true # spec/image_utils_spec.rb -require 'mini_magick' require 'stringio' require 'mindee' -describe Mindee::Image::ImageUtils do +describe 'Mindee::Image::ImageUtils', :all_deps do + require 'mini_magick' if Mindee::Dependency.all_deps_available? let(:sample_image_path) { "#{FILE_TYPES_DIR}/receipt.jpg" } let(:sample_image) { MiniMagick::Image.open(sample_image_path) } diff --git a/spec/input/sources/sources_spec.rb b/spec/input/sources/sources_spec.rb index 777cb9f3..caf00697 100644 --- a/spec/input/sources/sources_spec.rb +++ b/spec/input/sources/sources_spec.rb @@ -3,7 +3,7 @@ require 'mindee' require 'mindee/input/sources' require 'mindee/error' -require 'pdf-reader' +require 'pdf-reader' if Mindee::Dependency.all_deps_available? require_relative '../../data' @@ -15,7 +15,7 @@ ) expect(input_source.file_mimetype).to eq('image/jpeg') expect(input_source.filename).to eq('receipt.jpg') - expect(input_source.page_count).to eq(1) + expect(input_source.page_count).to eq(1) if Mindee::Dependency.all_deps_available? expect(input_source.pdf?).to eq(false) input_source = Mindee::Input::Source::PathInputSource.new( @@ -23,7 +23,7 @@ ) expect(input_source.file_mimetype).to eq('image/jpeg') expect(input_source.filename).to eq('receipt.jpga') - expect(input_source.page_count).to eq(1) + expect(input_source.page_count).to eq(1) if Mindee::Dependency.all_deps_available? expect(input_source.pdf?).to eq(false) end @@ -33,7 +33,7 @@ ) expect(input_source.file_mimetype).to eq('image/tiff') expect(input_source.filename).to eq('receipt.tif') - expect(input_source.page_count).to eq(1) + expect(input_source.page_count).to eq(1) if Mindee::Dependency.all_deps_available? expect(input_source.pdf?).to eq(false) input_source = Mindee::Input::Source::PathInputSource.new( @@ -41,11 +41,11 @@ ) expect(input_source.file_mimetype).to eq('image/tiff') expect(input_source.filename).to eq('receipt.tiff') - expect(input_source.page_count).to eq(1) + expect(input_source.page_count).to eq(1) if Mindee::Dependency.all_deps_available? expect(input_source.pdf?).to eq(false) end - it 'should load a HEIC from a path' do + it 'should load a HEIC from a path', :all_deps do input_source = Mindee::Input::Source::PathInputSource.new( File.join(FILE_TYPES_DIR, 'receipt.heic') ) @@ -56,7 +56,7 @@ end end - context 'A PDF input file' do + context 'A PDF input file', :all_deps do it 'should load a multi-page PDF from a path' do input_source = Mindee::Input::Source::PathInputSource.new( File.join(V1_DATA_DIR, 'products/invoices/invoice.pdf') diff --git a/spec/pdf/extracted_pdf_spec.rb b/spec/pdf/extracted_pdf_spec.rb index 9f8e4d99..97d2122c 100644 --- a/spec/pdf/extracted_pdf_spec.rb +++ b/spec/pdf/extracted_pdf_spec.rb @@ -2,7 +2,13 @@ require 'mindee' -describe Mindee::PDF::ExtractedPDF do +describe 'Mindee::PDF::ExtractedPDF', :all_deps do + # Workaround for mindee-lite + if Mindee::Dependency.all_deps_available? + let(:described_class) do + Mindee::PDF::ExtractedPDF + end + end let(:output_dir) { File.join(V1_DATA_DIR, 'output') } let(:valid_pdf_path) { "#{V1_PRODUCT_DATA_DIR}/invoices/invoice.pdf" } let(:invalid_pdf_path) { "#{FILE_TYPES_DIR}/receipt.txt" } diff --git a/spec/pdf/pdf_compressor_spec.rb b/spec/pdf/pdf_compressor_spec.rb index 12119ab4..4136c5a6 100644 --- a/spec/pdf/pdf_compressor_spec.rb +++ b/spec/pdf/pdf_compressor_spec.rb @@ -4,7 +4,7 @@ require_relative '../data' -describe Mindee::PDF::PDFCompressor do +describe 'Mindee::PDF::PDFCompressor', :all_deps do describe 'The PDF text detection method' do it 'should detect text pdf in a PDF file.' do text_input = Mindee::Input::Source::PathInputSource.new("#{FILE_TYPES_DIR}/pdf/multipage.pdf") diff --git a/spec/pdf/pdf_extractor_spec.rb b/spec/pdf/pdf_extractor_spec.rb index 418d666d..cdae8a23 100644 --- a/spec/pdf/pdf_extractor_spec.rb +++ b/spec/pdf/pdf_extractor_spec.rb @@ -2,7 +2,7 @@ require 'mindee' -describe 'Invoice extraction' do +describe 'Invoice extraction', :all_deps do let(:invoice_default_sample_path) { File.join(V1_PRODUCT_DATA_DIR, 'invoices', 'default_sample.jpg') } let(:invoice_splitter_5p_path) { File.join(V1_PRODUCT_DATA_DIR, 'invoice_splitter', 'invoice_5p.pdf') } let(:loaded_prediction_path) { File.join(V1_PRODUCT_DATA_DIR, 'invoice_splitter', 'response_v1', 'complete.json') } diff --git a/spec/pdf/pdf_processor_spec.rb b/spec/pdf/pdf_processor_spec.rb index bf2b4c1a..f07dc0c5 100644 --- a/spec/pdf/pdf_processor_spec.rb +++ b/spec/pdf/pdf_processor_spec.rb @@ -4,7 +4,7 @@ require_relative '../data' -describe Mindee::PDF do +describe 'Mindee::PDF', :all_deps do def open_pdf(io_stream) pdf_parser = Origami::PDF::LinearParser.new({}) io_stream.seek(0) diff --git a/spec/v1/extraction/invoice_splitter_extraction_integration.rb b/spec/v1/extraction/invoice_splitter_extraction_integration.rb index 0b8fd773..78d536e4 100644 --- a/spec/v1/extraction/invoice_splitter_extraction_integration.rb +++ b/spec/v1/extraction/invoice_splitter_extraction_integration.rb @@ -4,47 +4,49 @@ require_relative '../../data' require_relative '../../test_utilities' -describe 'PDF Invoice Extraction (Strict Mode)' do - let(:invoice_splitter_5p_path) { File.join(V1_PRODUCT_DATA_DIR, 'invoice_splitter', 'invoice_5p.pdf') } - - def prepare_invoice_return(rst_file_path, invoice_prediction) - rst_content = File.read(rst_file_path) - parsing_version = invoice_prediction.inference.product.version - parsing_id = invoice_prediction.id - - rst_content.gsub!(Mindee::TestUtilities.get_version(rst_content), parsing_version) - rst_content.gsub!(Mindee::TestUtilities.get_id(rst_content), parsing_id) - - rst_content - end - - it 'should extract invoices from a PDF (strict mode)' do - client = Mindee::V1::Client.new - invoice_splitter_input = Mindee::Input::Source::PathInputSource.new( - File.join(V1_PRODUCT_DATA_DIR, 'invoice_splitter', 'default_sample.pdf') - ) - response = client.parse( - invoice_splitter_input, Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1, options: { close_file: false } - ) - inference = response.document.inference - - pdf_extractor = Mindee::PDF::PDFExtractor.new(invoice_splitter_input) - expect(pdf_extractor.page_count).to eq(2) - - extracted_pdfs_strict = pdf_extractor.extract_invoices(inference.prediction.invoice_page_groups, strict: true) - - expect(extracted_pdfs_strict.length).to eq(2) - expect(extracted_pdfs_strict[0].filename).to eq('default_sample_001-001.pdf') - expect(extracted_pdfs_strict[1].filename).to eq('default_sample_002-002.pdf') - - invoice0 = client.parse(extracted_pdfs_strict[0].as_input_source, Mindee::V1::Product::Invoice::InvoiceV4) - - test_string_rst_invoice0 = prepare_invoice_return( - File.join(V1_PRODUCT_DATA_DIR, 'invoices', 'response_v4', 'summary_full_invoice_p1.rst'), - invoice0.document - ) - - ratio = Mindee::TestUtilities.levenshtein_ratio(invoice0.document.to_s, test_string_rst_invoice0.chomp) - expect(ratio).to be >= 0.90 +if Mindee::Dependency.all_deps_available? # Can't be bypassed by tag as otherwise it will try to load the PDF module. + describe 'PDF Invoice Extraction (Strict Mode)', :all_deps do + let(:invoice_splitter_5p_path) { File.join(V1_PRODUCT_DATA_DIR, 'invoice_splitter', 'invoice_5p.pdf') } + + def prepare_invoice_return(rst_file_path, invoice_prediction) + rst_content = File.read(rst_file_path) + parsing_version = invoice_prediction.inference.product.version + parsing_id = invoice_prediction.id + + rst_content.gsub!(Mindee::TestUtilities.get_version(rst_content), parsing_version) + rst_content.gsub!(Mindee::TestUtilities.get_id(rst_content), parsing_id) + + rst_content + end + + it 'should extract invoices from a PDF (strict mode)' do + client = Mindee::V1::Client.new + invoice_splitter_input = Mindee::Input::Source::PathInputSource.new( + File.join(V1_PRODUCT_DATA_DIR, 'invoice_splitter', 'default_sample.pdf') + ) + response = client.parse( + invoice_splitter_input, Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1, options: { close_file: false } + ) + inference = response.document.inference + + pdf_extractor = Mindee::PDF::PDFExtractor.new(invoice_splitter_input) + expect(pdf_extractor.page_count).to eq(2) + + extracted_pdfs_strict = pdf_extractor.extract_invoices(inference.prediction.invoice_page_groups, strict: true) + + expect(extracted_pdfs_strict.length).to eq(2) + expect(extracted_pdfs_strict[0].filename).to eq('default_sample_001-001.pdf') + expect(extracted_pdfs_strict[1].filename).to eq('default_sample_002-002.pdf') + + invoice0 = client.parse(extracted_pdfs_strict[0].as_input_source, Mindee::V1::Product::Invoice::InvoiceV4) + + test_string_rst_invoice0 = prepare_invoice_return( + File.join(V1_PRODUCT_DATA_DIR, 'invoices', 'response_v4', 'summary_full_invoice_p1.rst'), + invoice0.document + ) + + ratio = Mindee::TestUtilities.levenshtein_ratio(invoice0.document.to_s, test_string_rst_invoice0.chomp) + expect(ratio).to be >= 0.90 + end end end diff --git a/spec/v1/extraction/multi_receipts_extractor_spec.rb b/spec/v1/extraction/multi_receipts_extractor_spec.rb index d966f1c5..0c5337fc 100644 --- a/spec/v1/extraction/multi_receipts_extractor_spec.rb +++ b/spec/v1/extraction/multi_receipts_extractor_spec.rb @@ -5,7 +5,7 @@ require 'mindee/v1/extraction' require_relative '../../data' -describe 'multi-receipts extraction' do +describe 'multi-receipts extraction', :all_deps do let(:empty_inference) do double('Inference', prediction: double('Prediction', receipts: nil), pages: []) end @@ -15,7 +15,7 @@ end let(:empty_input_source) do - double('InputSource', count_pages: 0) + double('InputSource', page_count: 0) end let(:multi_receipts_single_page_path) do File.join(V1_DATA_DIR, 'products', 'multi_receipts_detector', 'default_sample.jpg') diff --git a/spec/v1/input/sources/url_input_source_integration.rb b/spec/v1/input/sources/url_input_source_integration.rb index 27e33a8a..cf2f4847 100644 --- a/spec/v1/input/sources/url_input_source_integration.rb +++ b/spec/v1/input/sources/url_input_source_integration.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'tempfile' describe Mindee::Input::Source::URLInputSource do let(:client) { Mindee::V1::Client.new(api_key: ENV.fetch('MINDEE_API_KEY')) } diff --git a/spec/v2/client_v2_spec.rb b/spec/v2/client_v2_spec.rb index b5daabe0..67236dcb 100644 --- a/spec/v2/client_v2_spec.rb +++ b/spec/v2/client_v2_spec.rb @@ -34,7 +34,7 @@ def build_mock_http_response(hash, status_code = 400, status_msg = 'Bad Request' def stub_next_request_with(method, hash:, status_code: 0) fake_response = build_mock_http_response(hash, status_code) - allow_any_instance_of(Mindee::V2::HTTP::MindeeApi) + allow_any_instance_of(Mindee::V2::HTTP::MindeeApiV2) .to receive(method) .and_return(fake_response) end diff --git a/spec/v2/file_operation/crop_operation_integration.rb b/spec/v2/file_operation/crop_operation_integration.rb index 4d176e7b..f531a026 100644 --- a/spec/v2/file_operation/crop_operation_integration.rb +++ b/spec/v2/file_operation/crop_operation_integration.rb @@ -4,7 +4,7 @@ require 'mindee/v2/file_operation' require 'mindee/v2/product' -describe Mindee::V2::FileOperation::Crop, :integration, :v2 do +describe Mindee::V2::FileOperation::Crop, :integration, :v2, :all_deps do let(:crop_sample) do File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') end diff --git a/spec/v2/file_operation/crop_operation_spec.rb b/spec/v2/file_operation/crop_operation_spec.rb index ca8d86fe..e97277c9 100644 --- a/spec/v2/file_operation/crop_operation_spec.rb +++ b/spec/v2/file_operation/crop_operation_spec.rb @@ -1,12 +1,12 @@ # frozen_string_literal: true require 'json' -require 'mini_magick' +require 'mini_magick' if Mindee::Dependency.all_deps_available? require 'mindee' require 'mindee/v2/file_operation' require 'mindee/v2/product' -describe Mindee::V2::FileOperation::Crop, :v2 do +describe Mindee::V2::FileOperation::Crop, :v2, :all_deps do let(:crops_single_page_path) do File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') end diff --git a/spec/v2/file_operation/split_operation_integration.rb b/spec/v2/file_operation/split_operation_integration.rb index 40e699b5..dad47b1d 100644 --- a/spec/v2/file_operation/split_operation_integration.rb +++ b/spec/v2/file_operation/split_operation_integration.rb @@ -5,7 +5,7 @@ require 'mindee/v2/product' require 'fileutils' -describe Mindee::V2::Product::Split::Split, :integration, :v2 do +describe Mindee::V2::Product::Split::Split, :integration, :v2, :all_deps do let(:split_sample) do File.join(V2_PRODUCT_DATA_DIR, 'split', 'default_sample.pdf') end diff --git a/spec/v2/file_operation/split_operation_spec.rb b/spec/v2/file_operation/split_operation_spec.rb index a8977ff4..c0d87fb4 100644 --- a/spec/v2/file_operation/split_operation_spec.rb +++ b/spec/v2/file_operation/split_operation_spec.rb @@ -4,7 +4,7 @@ require 'mindee' require 'mindee/v2/product' -describe Mindee::V2::Product::Split::SplitResponse, :v2 do +describe Mindee::V2::Product::Split::SplitResponse, :v2, :all_deps do let(:splits_default) do File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document', 'default_sample.jpg') end