Skip to content

Commit a671df5

Browse files
Davi ArndtDavi Arndt
authored andcommitted
Initial Commit
0 parents  commit a671df5

5 files changed

Lines changed: 99 additions & 0 deletions

File tree

invoices.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import os
2+
from openpyxl import Workbook
3+
import pdfplumber
4+
import re
5+
from datetime import datetime
6+
import mysql.connector
7+
8+
def execute_insert(cursor, invoice_number, invoice_date, file_name, status):
9+
sql = "INSERT INTO invoice_records (invoice_number, invoice_date, file_name,status) VALUES (%s, %s, %s, %s)"
10+
val = (invoice_number, invoice_date, file_name,status)
11+
cursor.execute(sql, val)
12+
13+
def main():
14+
# STARTUP
15+
16+
# Database Connection
17+
db = mysql.connector.connect(
18+
host="localhost",
19+
user="root",
20+
password="",
21+
database="process_invoices"
22+
)
23+
cursor = db.cursor()
24+
print("--- Successfully connected to database... ---")
25+
26+
# Get files from directory
27+
directory = 'pdf_invoices'
28+
files = os.listdir(directory)
29+
files_quantity = len(files)
30+
31+
if files_quantity == 0:
32+
raise Exception("No files found in the directory")
33+
34+
# Create Excel file
35+
wb = Workbook()
36+
ws = wb.active
37+
ws.title = 'Invoice Imports'
38+
39+
ws['A1'] = 'Invoice #'
40+
ws['B1'] = 'Date'
41+
ws['C1'] = 'File Name'
42+
ws['D1'] = 'Status'
43+
44+
last_empty_line = 1
45+
while ws["D" + str(last_empty_line)].value is not None:
46+
last_empty_line += 1
47+
48+
# WORK
49+
for file in files:
50+
try:
51+
with pdfplumber.open(directory + "/" + file) as pdf:
52+
first_page = pdf.pages[0]
53+
pdf_text = first_page.extract_text()
54+
55+
inv_number_re_pattern = r'INVOICE #(\d+)'
56+
inv_date_re_pattern = r'DATE (\d{2}/\d{2}/\d{4})'
57+
58+
match_number = re.search(inv_number_re_pattern, pdf_text)
59+
match_date = re.search(inv_date_re_pattern, pdf_text)
60+
61+
if match_number:
62+
ws['A{}'.format(last_empty_line)] = match_number.group(1)
63+
else:
64+
raise Exception("Couldn't find invoice number")
65+
66+
if match_date:
67+
ws['B{}'.format(last_empty_line)] = match_date.group(1)
68+
else:
69+
raise Exception("Couldn't find invoice date")
70+
71+
ws['C{}'.format(last_empty_line)] = file
72+
ws['D{}'.format(last_empty_line)] = "Completed"
73+
74+
execute_insert(cursor, match_number.group(1), match_date.group(1), file, "Completed")
75+
db.commit()
76+
77+
last_empty_line += 1
78+
79+
except Exception as e:
80+
print(f"Error processing file: {e}")
81+
82+
ws['C{}'.format(last_empty_line)] = file
83+
ws['D{}'.format(last_empty_line)] = "Exception: {}".format(e)
84+
85+
execute_insert(cursor, "N/A", "N/A", file, "Exception: {}".format(e))
86+
db.commit()
87+
88+
last_empty_line += 1
89+
90+
cursor.close()
91+
db.close()
92+
93+
full_now = str(datetime.now()).replace(":", "-")
94+
dot_index = full_now.index(".")
95+
now = full_now[:dot_index]
96+
wb.save("Invoices - {}.xlsx".format(now))
97+
98+
if __name__ == "__main__":
99+
main()

pdf_invoices/Invoice1.pdf

92.9 KB
Binary file not shown.

pdf_invoices/Invoice2.pdf

93.1 KB
Binary file not shown.

pdf_invoices/Invoice3.pdf

93.1 KB
Binary file not shown.

pdf_invoices/Invoice4.pdf

93.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)