Skip to content

Commit 9c67f82

Browse files
authored
Merge pull request #41 from PPeitsch/feature/file-import
[Feature] Implement automatic time data import from PDF and Excel files
2 parents 49302ce + a1fd8f9 commit 9c67f82

19 files changed

Lines changed: 920 additions & 3 deletions

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,6 @@ cython_debug/
163163
.idea/
164164

165165
# Database migrations
166-
migrations/
166+
# migrations/
167+
timetrack.db
168+
uploads/

app/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from flask import Flask, redirect, url_for
2-
from flask_migrate import Migrate
2+
from flask_migrate import Migrate # type: ignore
33

44
from app.db.database import db, init_db
55
from app.routes.main import main
@@ -24,4 +24,8 @@ def create_app(config_object):
2424
app.register_blueprint(monthly_log_bp)
2525
app.register_blueprint(settings_bp)
2626

27+
from app.routes.import_log import import_log_bp
28+
29+
app.register_blueprint(import_log_bp)
30+
2731
return app

app/models/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class ScheduleEntry(db.Model): # type: ignore
2727
date = Column(SQLADate, nullable=False)
2828
entries = Column(JSON, nullable=False)
2929
absence_code = Column(String, nullable=True)
30+
observation = Column(String, nullable=True)
3031

3132
employee: Mapped["Employee"] = relationship(
3233
"Employee", back_populates="schedule_entries"

app/routes/import_log.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import dataclasses
2+
import json
3+
import logging
4+
import os
5+
import pathlib
6+
import shutil
7+
import tempfile
8+
import uuid
9+
from datetime import datetime
10+
from typing import List
11+
12+
from flask import (
13+
Blueprint,
14+
current_app,
15+
flash,
16+
redirect,
17+
render_template,
18+
request,
19+
url_for,
20+
)
21+
from werkzeug.utils import secure_filename
22+
23+
from app.db.database import db
24+
from app.models.models import Employee, ScheduleEntry
25+
from app.services.importer.factory import ImporterFactory
26+
from app.services.importer.protocol import ImportResult
27+
from app.utils.time_calculator import calculate_daily_hours
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
import_log_bp = Blueprint("import_log", __name__, url_prefix="/import")
33+
34+
# Configure upload folder
35+
UPLOAD_FOLDER = os.path.join(os.getcwd(), "uploads")
36+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
37+
38+
39+
@import_log_bp.route("/", methods=["GET", "POST"])
40+
def upload_file():
41+
if request.method == "POST":
42+
if "file" not in request.files:
43+
flash("No file part", "error")
44+
return redirect(request.url)
45+
46+
file = request.files["file"]
47+
if file.filename == "":
48+
flash("No selected file", "error")
49+
return redirect(request.url)
50+
51+
if file:
52+
filename = secure_filename(file.filename or "")
53+
file_ext = filename.split(".")[-1].lower()
54+
55+
if file_ext not in ["pdf", "xlsx", "xls"]:
56+
flash("Unsupported file type", "error")
57+
return redirect(request.url)
58+
59+
# Generate unique ID for this upload
60+
upload_id = str(uuid.uuid4())
61+
temp_filename = f"{upload_id}.{file_ext}"
62+
filepath = os.path.join(UPLOAD_FOLDER, temp_filename)
63+
file.save(filepath)
64+
65+
return redirect(url_for("import_log.preview", upload_id=upload_id))
66+
67+
return render_template("import_upload.html")
68+
69+
70+
@import_log_bp.route("/preview/<upload_id>", methods=["GET"])
71+
def preview(upload_id):
72+
# Find file
73+
filepath = _get_filepath(upload_id)
74+
if not filepath:
75+
flash("File not found or expired", "error")
76+
return redirect(url_for("import_log.upload_file"))
77+
78+
try:
79+
importer = ImporterFactory.get_importer(filepath)
80+
with open(filepath, "rb") as f:
81+
content = f.read()
82+
result = importer.parse(content)
83+
84+
return render_template(
85+
"import_preview.html", result=result, upload_id=upload_id
86+
)
87+
except Exception as e:
88+
flash(f"Error parsing file: {str(e)}", "error")
89+
return redirect(url_for("import_log.upload_file"))
90+
91+
92+
@import_log_bp.route("/confirm/<upload_id>", methods=["POST"])
93+
def confirm(upload_id):
94+
filepath = _get_filepath(upload_id)
95+
if not filepath:
96+
flash("File not found or expired", "error")
97+
return redirect(url_for("import_log.upload_file"))
98+
99+
try:
100+
importer = ImporterFactory.get_importer(filepath)
101+
with open(filepath, "rb") as f:
102+
content = f.read()
103+
result = importer.parse(content)
104+
105+
# Import valid records
106+
count = 0
107+
# Assume for now we are importing for Employee ID 1 or passed in form
108+
# Ideally user selects employee in Upload or Preview
109+
# For now, let's hardcode 1 or get from request if we added it
110+
employee_id = 1
111+
112+
for record in result.records:
113+
if not record.is_valid:
114+
continue
115+
116+
# Check duplicate/overwrite?
117+
entry_date = datetime.strptime(record.date, "%Y-%m-%d").date()
118+
existing = ScheduleEntry.query.filter_by(
119+
employee_id=employee_id, date=entry_date
120+
).first()
121+
122+
entries_data = []
123+
if record.entry_time and record.exit_time:
124+
entries_data.append(
125+
{"entry": record.entry_time, "exit": record.exit_time}
126+
)
127+
128+
if existing:
129+
existing.entries = entries_data
130+
existing.observation = record.observation
131+
# If valid entries exist, we assume normal work day, so unset absence?
132+
if entries_data:
133+
existing.absence_code = None
134+
else:
135+
new_entry = ScheduleEntry(
136+
employee_id=employee_id,
137+
date=entry_date,
138+
entries=entries_data,
139+
observation=record.observation,
140+
)
141+
db.session.add(new_entry)
142+
count += 1
143+
144+
db.session.commit()
145+
146+
# Cleanup
147+
os.remove(filepath)
148+
149+
flash(f"Successfully imported {count} records", "success")
150+
return redirect(url_for("monthly_log.view_monthly_log"))
151+
152+
except Exception as e:
153+
db.session.rollback()
154+
flash(f"Error importing data: {str(e)}", "error")
155+
return redirect(url_for("import_log.preview", upload_id=upload_id))
156+
157+
158+
@import_log_bp.route("/cancel/<upload_id>", methods=["POST"])
159+
def cancel(upload_id):
160+
filepath = _get_filepath(upload_id)
161+
if filepath:
162+
try:
163+
os.remove(filepath)
164+
except:
165+
pass
166+
return redirect(url_for("import_log.upload_file"))
167+
168+
169+
def _get_filepath(upload_id):
170+
# Search for file with upload_id prefix
171+
for f in os.listdir(UPLOAD_FOLDER):
172+
if f.startswith(upload_id):
173+
return os.path.join(UPLOAD_FOLDER, f)
174+
return None
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import io
2+
from typing import Any, List, Optional
3+
4+
import pandas as pd # type: ignore
5+
6+
from app.services.importer.protocol import (
7+
ImporterProtocol,
8+
ImportResult,
9+
TimeEntryRecord,
10+
)
11+
from app.utils.validators import validate_date, validate_time_format
12+
13+
14+
class ExcelImporter(ImporterProtocol):
15+
def parse(self, file_content: Any) -> ImportResult:
16+
records: List[TimeEntryRecord] = []
17+
errors: List[str] = []
18+
19+
try:
20+
# Read Excel file
21+
df = pd.read_excel(io.BytesIO(file_content))
22+
23+
# Normalize headers
24+
df.columns = df.columns.astype(str).str.lower().str.strip()
25+
26+
# Map columns
27+
col_map = {}
28+
for col in df.columns:
29+
if "fecha" in col or "date" in col:
30+
col_map["date"] = col
31+
elif "entrada" in col or "in" in col:
32+
col_map["entry"] = col
33+
elif "salida" in col or "out" in col:
34+
col_map["exit"] = col
35+
elif "observ" in col or "note" in col:
36+
col_map["obs"] = col
37+
38+
if "date" not in col_map:
39+
errors.append("Could not find 'Fecha' or 'Date' column")
40+
return ImportResult([], 0, 0, errors)
41+
42+
for _, row in df.iterrows():
43+
date_val = row[col_map["date"]]
44+
if pd.isna(date_val):
45+
continue
46+
47+
# Handle dates
48+
if isinstance(date_val, pd.Timestamp):
49+
date_str = date_val.strftime("%Y-%m-%d")
50+
else:
51+
date_str = str(date_val).strip()
52+
53+
entry_val = (
54+
row.get(col_map.get("entry")) if "entry" in col_map else None
55+
)
56+
exit_val = row.get(col_map.get("exit")) if "exit" in col_map else None
57+
obs_val = row.get(col_map.get("obs")) if "obs" in col_map else None
58+
59+
entry_str = self._format_time(entry_val)
60+
exit_str = self._format_time(exit_val)
61+
62+
# Logic for validating
63+
is_valid = True
64+
error_msg = None
65+
66+
if not validate_date(date_str):
67+
is_valid = False
68+
error_msg = f"Invalid date format: {date_str}"
69+
elif entry_str and not validate_time_format(entry_str):
70+
is_valid = False
71+
error_msg = f"Invalid entry time: {entry_str}"
72+
elif exit_str and not validate_time_format(exit_str):
73+
is_valid = False
74+
error_msg = f"Invalid exit time: {exit_str}"
75+
76+
records.append(
77+
TimeEntryRecord(
78+
date=date_str,
79+
entry_time=entry_str,
80+
exit_time=exit_str,
81+
observation=str(obs_val) if pd.notna(obs_val) else None,
82+
is_valid=is_valid,
83+
error_message=error_msg,
84+
)
85+
)
86+
87+
except Exception as e:
88+
errors.append(f"Error parsing Excel: {str(e)}")
89+
90+
valid_records = sum(1 for r in records if r.is_valid)
91+
return ImportResult(records, len(records), valid_records, errors)
92+
93+
def _format_time(self, val: Any) -> Optional[str]:
94+
if pd.isna(val):
95+
return None
96+
97+
if isinstance(val, pd.Timestamp):
98+
return str(val.strftime("%H:%M"))
99+
100+
# If it's a datetime.time object
101+
try:
102+
return str(val.strftime("%H:%M"))
103+
except AttributeError:
104+
pass
105+
106+
s = str(val).strip()
107+
# Basic fixes
108+
return s

app/services/importer/factory.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from typing import Dict, Type
2+
3+
from app.services.importer.excel_importer import ExcelImporter
4+
from app.services.importer.pdf_importer import PDFImporter
5+
from app.services.importer.protocol import ImporterProtocol
6+
7+
8+
class ImporterFactory:
9+
_importers: Dict[str, Type[ImporterProtocol]] = {
10+
"pdf": PDFImporter,
11+
"xlsx": ExcelImporter,
12+
"xls": ExcelImporter,
13+
}
14+
15+
@classmethod
16+
def get_importer(cls, filename: str) -> ImporterProtocol:
17+
ext = filename.split(".")[-1].lower()
18+
importer_class = cls._importers.get(ext)
19+
if not importer_class:
20+
raise ValueError(f"Unsupported file extension: {ext}")
21+
return importer_class()

0 commit comments

Comments
 (0)