-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathauto_invoice_splitter_extraction.rb
More file actions
48 lines (40 loc) · 1.43 KB
/
auto_invoice_splitter_extraction.rb
File metadata and controls
48 lines (40 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# frozen_string_literal: true
require 'mindee'
def invoice_splitter_auto_extraction(file_path)
mindee_client = Mindee::V1::Client.new(api_key: 'my-api-key')
input_source = mindee_client.source_from_path(file_path)
if input_source.pdf? && input_source.page_count > 1
parse_multi_page(mindee_client, input_source)
else
parse_single_page(mindee_client, input_source)
end
end
def parse_single_page(mindee_client, input_source)
invoice_result = mindee_client.parse(
input_source,
Mindee::V1::Product::Invoice::InvoiceV4
)
puts invoice_result.document
end
def parse_multi_page(mindee_client, input_source)
pdf_extractor = Mindee::PDF::PDFExtractor.new(input_source)
invoice_splitter_response = mindee_client.parse(
input_source,
Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1,
options: { close_file: false }
)
page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
extracted_pdfs.each do |extracted_pdf|
# Optional: Save the files locally
# extracted_pdf.write_to_file("output/path")
invoice_result = mindee_client.parse(
extracted_pdf.as_input_source,
Mindee::V1::Product::Invoice::InvoiceV4,
options: { close_file: false }
)
puts invoice_result.document
end
end
my_file_path = '/path/to/the/file.ext'
invoice_splitter_auto_extraction(my_file_path)