Skip to content

Commit f5d2b42

Browse files
author
Jonathan Sprauel
committed
continued improving mail tool
1 parent 721192b commit f5d2b42

1 file changed

Lines changed: 37 additions & 9 deletions

File tree

tools.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -428,15 +428,23 @@ def import_outlook_emails(sandbox_id: str, query: str = None, received_after: st
428428

429429
# Parse date if provided
430430
filter_date = None
431+
outlook_date_filter = ""
431432
if received_after:
432433
try:
433434
# Parse YYYY-MM-DD
434435
dt = datetime.strptime(received_after, "%Y-%m-%d")
435-
# Make it timezone-aware (UTC) to compare with Outlook's timezone-aware datetimes
436-
# Or simpler: remove timezone info from Outlook date for comparison
437436
filter_date = dt
437+
# Outlook Restrict filter format: [ReceivedTime] >= 'MM/DD/YYYY 00:00 AM'
438+
outlook_date_filter = f"[ReceivedTime] >= '{dt.strftime('%m/%d/%Y')} 00:00 AM'"
438439
except ValueError:
439440
return "Error: received_after must be in YYYY-MM-DD format."
441+
442+
from file_preprocessor import convert_pdf_to_text
443+
444+
def sanitize_filename(name: str) -> str:
445+
"""Sanitize string for usage as filename."""
446+
# Replace invalid chars with underscore
447+
return re.sub(r'[<>:"/\\|?*]', '_', str(name)).strip()[:50] # Limit length
440448

441449
try:
442450
outlook = win32com.client.Dispatch('Outlook.Application').GetNamespace('MAPI')
@@ -449,9 +457,15 @@ def process_folder(folder):
449457
for subfolder in folder.Folders:
450458
process_folder(subfolder)
451459

452-
# Outlook items are not always sorted, checking all
453-
# Optimization: could restrict folder types? For now, checking all.
454-
for message in folder.Items:
460+
# Access items, applying filter if present
461+
items = folder.Items
462+
if outlook_date_filter:
463+
try:
464+
items = items.Restrict(outlook_date_filter)
465+
except Exception:
466+
pass
467+
468+
for message in items:
455469
try:
456470
# Filter by date first
457471
if filter_date:
@@ -489,7 +503,12 @@ def process_folder(folder):
489503
except Exception:
490504
continue
491505

492-
save_folder = os.path.join(sandbox_path, "memory", f"memory_{unique_id}")
506+
# Correct Naming: Sender_Subject_ID
507+
sender_name = getattr(message, 'SenderName', 'Unknown')
508+
subject_text = getattr(message, 'Subject', 'No Subject')
509+
folder_name = f"{sanitize_filename(sender_name)}_{sanitize_filename(subject_text)}_{unique_id[:8]}"
510+
511+
save_folder = os.path.join(sandbox_path, "mail", folder_name)
493512
if os.path.exists(save_folder):
494513
continue
495514

@@ -498,12 +517,12 @@ def process_folder(folder):
498517
# Save metadata/content
499518
meta = {
500519
"id": unique_id,
501-
"Subject": getattr(message, 'Subject', 'No Subject'),
520+
"Subject": subject_text,
502521
"Body": getattr(message, 'Body', ''),
503522
"ReceivedTime": str(getattr(message, 'ReceivedTime', '')),
504-
"Sender": getattr(message, 'SenderName', ''),
523+
"Sender": sender_name,
505524
"To": getattr(message, 'To', ''),
506-
"Memory": os.path.relpath(save_folder, sandbox_path)
525+
"FolderName": folder.Name
507526
}
508527

509528
with open(os.path.join(save_folder, "email_data.json"), 'w', encoding='utf-8') as f:
@@ -516,6 +535,15 @@ def process_folder(folder):
516535
file_path = os.path.join(save_folder, attachment.FileName)
517536
attachment.SaveAsFile(file_path)
518537

538+
if attachment.FileName.lower().endswith('.pdf'):
539+
try:
540+
content, format_type = convert_pdf_to_text(file_path)
541+
ext = ".txt" if format_type == "text" else ".json"
542+
with open(file_path + ext, 'w', encoding='utf-8') as f:
543+
f.write(content)
544+
except Exception:
545+
pass
546+
519547
if attachment.FileName.lower().endswith('.zip'):
520548
try:
521549
with zipfile.ZipFile(file_path, 'r') as zip_ref:

0 commit comments

Comments
 (0)