@@ -428,15 +428,23 @@ def import_outlook_emails(sandbox_id: str, query: str = None, received_after: st
428428
429429 # Parse date if provided
430430 filter_date = None
431+ outlook_date_filter = ""
431432 if received_after :
432433 try :
433434 # Parse YYYY-MM-DD
434435 dt = datetime .strptime (received_after , "%Y-%m-%d" )
435- # Make it timezone-aware (UTC) to compare with Outlook's timezone-aware datetimes
436- # Or simpler: remove timezone info from Outlook date for comparison
437436 filter_date = dt
437+ # Outlook Restrict filter format: [ReceivedTime] >= 'MM/DD/YYYY 00:00 AM'
438+ outlook_date_filter = f"[ReceivedTime] >= '{ dt .strftime ('%m/%d/%Y' )} 00:00 AM'"
438439 except ValueError :
439440 return "Error: received_after must be in YYYY-MM-DD format."
441+
442+ from file_preprocessor import convert_pdf_to_text
443+
444+ def sanitize_filename (name : str ) -> str :
445+ """Sanitize string for usage as filename."""
446+ # Replace invalid chars with underscore
447+ return re .sub (r'[<>:"/\\|?*]' , '_' , str (name )).strip ()[:50 ] # Limit length
440448
441449 try :
442450 outlook = win32com .client .Dispatch ('Outlook.Application' ).GetNamespace ('MAPI' )
@@ -449,9 +457,15 @@ def process_folder(folder):
449457 for subfolder in folder .Folders :
450458 process_folder (subfolder )
451459
452- # Outlook items are not always sorted, checking all
453- # Optimization: could restrict folder types? For now, checking all.
454- for message in folder .Items :
460+ # Access items, applying filter if present
461+ items = folder .Items
462+ if outlook_date_filter :
463+ try :
464+ items = items .Restrict (outlook_date_filter )
465+ except Exception :
466+ pass
467+
468+ for message in items :
455469 try :
456470 # Filter by date first
457471 if filter_date :
@@ -489,7 +503,12 @@ def process_folder(folder):
489503 except Exception :
490504 continue
491505
492- save_folder = os .path .join (sandbox_path , "memory" , f"memory_{ unique_id } " )
506+ # Correct Naming: Sender_Subject_ID
507+ sender_name = getattr (message , 'SenderName' , 'Unknown' )
508+ subject_text = getattr (message , 'Subject' , 'No Subject' )
509+ folder_name = f"{ sanitize_filename (sender_name )} _{ sanitize_filename (subject_text )} _{ unique_id [:8 ]} "
510+
511+ save_folder = os .path .join (sandbox_path , "mail" , folder_name )
493512 if os .path .exists (save_folder ):
494513 continue
495514
@@ -498,12 +517,12 @@ def process_folder(folder):
498517 # Save metadata/content
499518 meta = {
500519 "id" : unique_id ,
501- "Subject" : getattr ( message , 'Subject' , 'No Subject' ) ,
520+ "Subject" : subject_text ,
502521 "Body" : getattr (message , 'Body' , '' ),
503522 "ReceivedTime" : str (getattr (message , 'ReceivedTime' , '' )),
504- "Sender" : getattr ( message , 'SenderName' , '' ) ,
523+ "Sender" : sender_name ,
505524 "To" : getattr (message , 'To' , '' ),
506- "Memory " : os . path . relpath ( save_folder , sandbox_path )
525+ "FolderName " : folder . Name
507526 }
508527
509528 with open (os .path .join (save_folder , "email_data.json" ), 'w' , encoding = 'utf-8' ) as f :
@@ -516,6 +535,15 @@ def process_folder(folder):
516535 file_path = os .path .join (save_folder , attachment .FileName )
517536 attachment .SaveAsFile (file_path )
518537
538+ if attachment .FileName .lower ().endswith ('.pdf' ):
539+ try :
540+ content , format_type = convert_pdf_to_text (file_path )
541+ ext = ".txt" if format_type == "text" else ".json"
542+ with open (file_path + ext , 'w' , encoding = 'utf-8' ) as f :
543+ f .write (content )
544+ except Exception :
545+ pass
546+
519547 if attachment .FileName .lower ().endswith ('.zip' ):
520548 try :
521549 with zipfile .ZipFile (file_path , 'r' ) as zip_ref :
0 commit comments