1- import os
2- import json
3- import base64
4- import difflib
5- import re
6- import hashlib
7- import zipfile
8- import io
1+ import logging
92from typing import Any , Dict , List , Union
103from datetime import datetime
114
5+ logger = logging .getLogger (__name__ )
6+
127try :
138 import docx
149except ImportError :
@@ -609,13 +604,16 @@ def import_outlook_emails(sandbox_id: str, query: str = None, received_after: st
609604 Import emails from Outlook based on a grep-like query and optional time boundary.
610605 Only works on Windows.
611606 """
607+ logger .info (f"Starting import_outlook_emails: query='{ query } ', received_after='{ received_after } '" )
612608 try :
613609 import win32com .client
614610 except ImportError :
611+ logger .error ("pywin32 not installed, Outlook import unavailable." )
615612 return "Error: Outlook import tool is only available on Windows systems with pywin32 installed."
616613
617614 sandbox_path = get_sandbox_path (sandbox_id )
618615 if not os .path .exists (sandbox_path ):
616+ logger .error (f"Sandbox path does not exist: { sandbox_path } " )
619617 return "Error: Sandbox directory does not exist."
620618
621619 processed_count = 0
@@ -631,7 +629,9 @@ def import_outlook_emails(sandbox_id: str, query: str = None, received_after: st
631629 filter_date = dt
632630 # Outlook Restrict filter format: [ReceivedTime] >= 'MM/DD/YYYY 00:00 AM'
633631 outlook_date_filter = f"[ReceivedTime] >= '{ dt .strftime ('%m/%d/%Y' )} 00:00 AM'"
632+ logger .info (f"Using date filter: { outlook_date_filter } " )
634633 except ValueError :
634+ logger .warning (f"Invalid date format: { received_after } " )
635635 return "Error: received_after must be in YYYY-MM-DD format."
636636
637637 from file_preprocessor import convert_pdf_to_text
@@ -643,6 +643,7 @@ def sanitize_filename(name: str) -> str:
643643 def _save_outlook_message (message , folder_name_source : str ) -> str :
644644 """Helper to save an Outlook message and its attachments."""
645645 nonlocal processed_count
646+ save_folder = None
646647 try :
647648 # Filter for emails specifically if possible, but handle others
648649 # olMail=43, olAppointment=26, olMeetingRequest=53
@@ -665,14 +666,17 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
665666 sender_name = getattr (message , 'SenderName' , '' ) or getattr (message , 'SenderEmailAddress' , 'Unknown' )
666667 elif item_class == 26 : # AppointmentItem
667668 sender_name = getattr (message , 'Organizer' , 'Unknown' )
668- except : pass
669+ except Exception as e :
670+ logger .debug (f"Could not get sender name: { e } " )
669671
670672 folder_name = f"{ sanitize_filename (sender_name )} _{ sanitize_filename (subj )} _{ unique_id [:8 ]} "
671673 save_folder = os .path .join (sandbox_path , "mail" , folder_name )
672674
673675 if os .path .exists (save_folder ):
676+ logger .debug (f"Email folder already exists, skipping: { folder_name } " )
674677 return None
675678
679+ logger .info (f"Saving email: { subj } from { sender_name } " )
676680 os .makedirs (save_folder , exist_ok = True )
677681
678682 meta = {
@@ -681,48 +685,66 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
681685 "Subject" : subj ,
682686 "Body" : body ,
683687 "HTMLBody" : html_body [:5000 ] if html_body else "" , # Truncate HTML body for meta
684- "ReceivedTime" : str (getattr (message , 'ReceivedTime' , '' )),
688+ "ReceivedTime" : str (getattr (message , 'ReceivedTime' , 'Unknown ' )),
685689 "Sender" : sender_name ,
686690 "To" : getattr (message , 'To' , '' ),
687691 "FolderName" : folder_name_source ,
688692 "ItemClass" : item_class
689693 }
690694
691- with open (os .path .join (save_folder , "email_data.json" ), 'w' , encoding = 'utf-8' ) as f :
695+ data_file = os .path .join (save_folder , "email_data.json" )
696+ with open (data_file , 'w' , encoding = 'utf-8' ) as f :
692697 json .dump (meta , f , indent = 2 )
698+ logger .debug (f"Saved email metadata to { data_file } " )
693699
694700 if hasattr (message , 'Attachments' ):
695- for attachment in message .Attachments :
701+ for i , attachment in enumerate ( message .Attachments ) :
696702 try :
697- file_path = os .path .join (save_folder , attachment .FileName )
703+ att_name = getattr (attachment , 'FileName' , f'attachment_{ i } ' )
704+ file_path = os .path .join (save_folder , att_name )
698705 attachment .SaveAsFile (file_path )
706+ logger .debug (f"Saved attachment: { att_name } " )
699707
700- if attachment . FileName .lower ().endswith ('.pdf' ):
708+ if att_name .lower ().endswith ('.pdf' ):
701709 try :
710+ logger .debug (f"Converting PDF attachment to text: { att_name } " )
702711 content , format_type = convert_pdf_to_text (file_path )
703712 ext = ".txt" if format_type == "text" else ".json"
704713 with open (file_path + ext , 'w' , encoding = 'utf-8' ) as f :
705714 f .write (content )
706- except Exception : pass
715+ except Exception as pdf_e :
716+ logger .warning (f"Failed to convert PDF { att_name } : { pdf_e } " )
707717
708- if attachment . FileName .lower ().endswith ('.zip' ):
718+ if att_name .lower ().endswith ('.zip' ):
709719 try :
720+ logger .debug (f"Extracting ZIP attachment: { att_name } " )
710721 with zipfile .ZipFile (file_path , 'r' ) as zip_ref :
711722 zip_ref .extractall (save_folder )
712- except Exception : pass
713- except Exception : pass
723+ except Exception as zip_e :
724+ logger .warning (f"Failed to extract ZIP { att_name } : { zip_e } " )
725+ except Exception as att_e :
726+ logger .warning (f"Error processing attachment { i } : { att_e } " )
714727
715728 processed_count += 1
716729 return os .path .relpath (save_folder , sandbox_path )
717- except Exception :
730+ except Exception as e :
731+ logger .error (f"Error saving outlook message '{ getattr (message , 'Subject' , 'Unknown' )} ': { e } " , exc_info = True )
732+ # Cleanup empty folder if we failed early
733+ if save_folder and os .path .exists (save_folder ) and not os .listdir (save_folder ):
734+ try :
735+ os .rmdir (save_folder )
736+ logger .debug (f"Cleaned up empty folder: { save_folder } " )
737+ except : pass
718738 return None
719739
720740 try :
741+ logger .info ("Connecting to Outlook.Application..." )
721742 outlook_app = win32com .client .Dispatch ('Outlook.Application' )
722743 namespace = outlook_app .GetNamespace ('MAPI' )
723744
724745 # 1. Strategy: Windows Search Index (ADODB) - Fast!
725746 if query :
747+ logger .info (f"Attempting Windows Search Index for query: { query } " )
726748 try :
727749 conn = win32com .client .Dispatch ("ADODB.Connection" )
728750 conn .Open ("Provider=Search.CollatorDSO;Extended Properties='Application=Windows';" )
@@ -739,6 +761,7 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
739761 sql += " ORDER BY System.DateModified DESC"
740762
741763 rs = conn .Execute (sql )[0 ]
764+ idx_found = 0
742765 while not rs .EOF :
743766 if processed_count >= 50 : break
744767 item_url = rs .Fields .Item (0 ).Value # e.g. outlook:00000000...
@@ -748,24 +771,31 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
748771 entry_id = item_url .split ("outlook:" )[1 ]
749772 message = namespace .GetItemFromID (entry_id )
750773 path = _save_outlook_message (message , "WindowsIndex" )
751- if path : saved_paths .append (path )
752- except : pass
774+ if path :
775+ saved_paths .append (path )
776+ idx_found += 1
777+ except Exception as e :
778+ logger .debug (f"Failed to retrieve item from ID { item_url } : { e } " )
753779 rs .MoveNext ()
754780 rs .Close ()
755781 conn .Close ()
756- except Exception :
757- pass
782+ logger .info (f"Windows Search Index strategy found { idx_found } items." )
783+ except Exception as e :
784+ logger .warning (f"Windows Search Index strategy failed: { e } " )
758785
759786 # 2. Strategy: DASL Filter (Outlook Native Search) - Medium Speed
760787 if processed_count < 10 : # Only if ADODB didn't yield much
788+ logger .info ("Attempting DASL strategy (Outlook Native Search)..." )
761789 targets = [namespace .GetDefaultFolder (6 ), namespace .GetDefaultFolder (5 )]
762790
763791 dasl_query = ""
764792 if query :
793+ # Basic DASL Escaping for simple queries
794+ clean_query = query .replace ("'" , "''" )
765795 dasl_query = (
766- f"@SQL=\" urn:schemas:httpmail:subject\" LIKE '%{ query } %' OR "
767- f"\" urn:schemas:httpmail:textdescription\" LIKE '%{ query } %' OR "
768- f"\" urn:schemas:httpmail:fromname\" LIKE '%{ query } %'"
796+ f"@SQL=\" urn:schemas:httpmail:subject\" LIKE '%{ clean_query } %' OR "
797+ f"\" urn:schemas:httpmail:textdescription\" LIKE '%{ clean_query } %' OR "
798+ f"\" urn:schemas:httpmail:fromname\" LIKE '%{ clean_query } %'"
769799 )
770800
771801 if received_after :
@@ -776,59 +806,74 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
776806 dasl_query = f"@SQL=({ dasl_query .replace ('@SQL=' , '' )} ) AND { date_part } "
777807 else :
778808 dasl_query = f"@SQL={ date_part } "
779- except : pass
809+ except Exception as e :
810+ logger .warning (f"Failed to construct DASL date part: { e } " )
780811
781812 for folder in targets :
782813 if processed_count >= 50 : break
783814 try :
815+ logger .info (f"Searching in folder: { folder .Name } " )
784816 items = folder .Items
785817 if dasl_query :
818+ logger .debug (f"Applying DASL Filter: { dasl_query } " )
786819 items = items .Restrict (dasl_query )
787820 items .Sort ("[ReceivedTime]" , True )
788821
789822 for message in items :
790823 if processed_count >= 50 : break
791824 path = _save_outlook_message (message , folder .Name )
792825 if path : saved_paths .append (path )
793- except Exception : continue
826+ except Exception as e :
827+ logger .warning (f"DASL search in folder { getattr (folder , 'Name' , 'Unknown' )} failed: { e } " )
828+ continue
794829
795830 # 3. Strategy: Full Recursive Fallback (Current) - Slowest
796831 if processed_count == 0 :
832+ logger .info ("Attempting Recursive Fallback strategy..." )
797833 def process_folder_recursive (folder ):
798834 for sub in folder .Folders :
799835 if processed_count >= 50 : return
800836 process_folder_recursive (sub )
801837
802- items = folder .Items
803- # Use crude restriction for dates if possible
804- if received_after :
805- try :
806- dt_obj = datetime .strptime (received_after , "%Y-%m-%d" )
807- local_filter = f"[ReceivedTime] >= '{ dt_obj .strftime ('%m/%d/%Y' )} 00:00 AM'"
808- items = items .Restrict (local_filter )
809- except : pass
810-
811- for msg in items :
812- if processed_count >= 50 : return
813- if query :
814- subj = getattr (msg , 'Subject' , '' ) or ''
815- body = getattr (msg , 'Body' , '' ) or ''
816- if not re .search (query , f"{ subj } { body } " , re .I ): continue
838+ try :
839+ items = folder .Items
840+ # Use crude restriction for dates if possible
841+ if received_after :
842+ try :
843+ dt_obj = datetime .strptime (received_after , "%Y-%m-%d" )
844+ local_filter = f"[ReceivedTime] >= '{ dt_obj .strftime ('%m/%d/%Y' )} 00:00 AM'"
845+ items = items .Restrict (local_filter )
846+ except : pass
817847
818- path = _save_outlook_message (msg , folder .Name )
819- if path : saved_paths .append (path )
848+ for msg in items :
849+ if processed_count >= 50 : return
850+ try :
851+ if query :
852+ subj = getattr (msg , 'Subject' , '' ) or ''
853+ body = getattr (msg , 'Body' , '' ) or ''
854+ if not re .search (query , f"{ subj } { body } " , re .I ): continue
855+
856+ path = _save_outlook_message (msg , folder .Name )
857+ if path : saved_paths .append (path )
858+ except Exception as e :
859+ logger .debug (f"Error checking message in recursive strategy: { e } " )
860+ except Exception as e :
861+ logger .debug (f"Error accessing items in folder { getattr (folder , 'Name' , 'Unknown' )} : { e } " )
820862
821863 for account in namespace .Folders :
822864 if processed_count >= 50 : break
865+ logger .info (f"Recursively processing account: { account .Name } " )
823866 process_folder_recursive (account )
824867
825868 except Exception as e :
869+ logger .error (f"Critical error connecting to Outlook: { e } " , exc_info = True )
826870 return f"Error connecting to Outlook: { e } "
827871
872+ logger .info (f"Finished import_outlook_emails. Total processed: { processed_count } " )
828873 if processed_count == 0 :
829874 return "No new emails found matching criteria."
830875
831- return f"Imported { processed_count } emails. Saved to: { ', ' .join (saved_paths [:5 ])} .. ."
876+ return f"Imported { processed_count } emails. Saved to: { ', ' .join (saved_paths [:5 ])} { ' and more' if len ( saved_paths ) > 5 else '' } ."
832877
833878
834879# --- Tool Registry ---
0 commit comments