1+ import os
2+ from openpyxl import Workbook
3+ import pdfplumber
4+ import re
5+ from datetime import datetime
6+ import mysql .connector
7+
8+ def execute_insert (cursor , invoice_number , invoice_date , file_name , status ):
9+ sql = "INSERT INTO invoice_records (invoice_number, invoice_date, file_name,status) VALUES (%s, %s, %s, %s)"
10+ val = (invoice_number , invoice_date , file_name ,status )
11+ cursor .execute (sql , val )
12+
13+ def main ():
14+ # STARTUP
15+
16+ # Database Connection
17+ db = mysql .connector .connect (
18+ host = "localhost" ,
19+ user = "root" ,
20+ password = "" ,
21+ database = "process_invoices"
22+ )
23+ cursor = db .cursor ()
24+ print ("--- Successfully connected to database... ---" )
25+
26+ # Get files from directory
27+ directory = 'pdf_invoices'
28+ files = os .listdir (directory )
29+ files_quantity = len (files )
30+
31+ if files_quantity == 0 :
32+ raise Exception ("No files found in the directory" )
33+
34+ # Create Excel file
35+ wb = Workbook ()
36+ ws = wb .active
37+ ws .title = 'Invoice Imports'
38+
39+ ws ['A1' ] = 'Invoice #'
40+ ws ['B1' ] = 'Date'
41+ ws ['C1' ] = 'File Name'
42+ ws ['D1' ] = 'Status'
43+
44+ last_empty_line = 1
45+ while ws ["D" + str (last_empty_line )].value is not None :
46+ last_empty_line += 1
47+
48+ # WORK
49+ for file in files :
50+ try :
51+ with pdfplumber .open (directory + "/" + file ) as pdf :
52+ first_page = pdf .pages [0 ]
53+ pdf_text = first_page .extract_text ()
54+
55+ inv_number_re_pattern = r'INVOICE #(\d+)'
56+ inv_date_re_pattern = r'DATE (\d{2}/\d{2}/\d{4})'
57+
58+ match_number = re .search (inv_number_re_pattern , pdf_text )
59+ match_date = re .search (inv_date_re_pattern , pdf_text )
60+
61+ if match_number :
62+ ws ['A{}' .format (last_empty_line )] = match_number .group (1 )
63+ else :
64+ raise Exception ("Couldn't find invoice number" )
65+
66+ if match_date :
67+ ws ['B{}' .format (last_empty_line )] = match_date .group (1 )
68+ else :
69+ raise Exception ("Couldn't find invoice date" )
70+
71+ ws ['C{}' .format (last_empty_line )] = file
72+ ws ['D{}' .format (last_empty_line )] = "Completed"
73+
74+ execute_insert (cursor , match_number .group (1 ), match_date .group (1 ), file , "Completed" )
75+ db .commit ()
76+
77+ last_empty_line += 1
78+
79+ except Exception as e :
80+ print (f"Error processing file: { e } " )
81+
82+ ws ['C{}' .format (last_empty_line )] = file
83+ ws ['D{}' .format (last_empty_line )] = "Exception: {}" .format (e )
84+
85+ execute_insert (cursor , "N/A" , "N/A" , file , "Exception: {}" .format (e ))
86+ db .commit ()
87+
88+ last_empty_line += 1
89+
90+ cursor .close ()
91+ db .close ()
92+
93+ full_now = str (datetime .now ()).replace (":" , "-" )
94+ dot_index = full_now .index ("." )
95+ now = full_now [:dot_index ]
96+ wb .save ("Invoices - {}.xlsx" .format (now ))
97+
98+ if __name__ == "__main__" :
99+ main ()
0 commit comments