Skip to content

Commit 8c97f48

Browse files
author
Jonathan Sprauel
committed
continuing mail tool
1 parent d38a61a commit 8c97f48

1 file changed

Lines changed: 91 additions & 46 deletions

File tree

tools.py

Lines changed: 91 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
1-
import os
2-
import json
3-
import base64
4-
import difflib
5-
import re
6-
import hashlib
7-
import zipfile
8-
import io
1+
import logging
92
from typing import Any, Dict, List, Union
103
from datetime import datetime
114

5+
logger = logging.getLogger(__name__)
6+
127
try:
138
import docx
149
except ImportError:
@@ -609,13 +604,16 @@ def import_outlook_emails(sandbox_id: str, query: str = None, received_after: st
609604
Import emails from Outlook based on a grep-like query and optional time boundary.
610605
Only works on Windows.
611606
"""
607+
logger.info(f"Starting import_outlook_emails: query='{query}', received_after='{received_after}'")
612608
try:
613609
import win32com.client
614610
except ImportError:
611+
logger.error("pywin32 not installed, Outlook import unavailable.")
615612
return "Error: Outlook import tool is only available on Windows systems with pywin32 installed."
616613

617614
sandbox_path = get_sandbox_path(sandbox_id)
618615
if not os.path.exists(sandbox_path):
616+
logger.error(f"Sandbox path does not exist: {sandbox_path}")
619617
return "Error: Sandbox directory does not exist."
620618

621619
processed_count = 0
@@ -631,7 +629,9 @@ def import_outlook_emails(sandbox_id: str, query: str = None, received_after: st
631629
filter_date = dt
632630
# Outlook Restrict filter format: [ReceivedTime] >= 'MM/DD/YYYY 00:00 AM'
633631
outlook_date_filter = f"[ReceivedTime] >= '{dt.strftime('%m/%d/%Y')} 00:00 AM'"
632+
logger.info(f"Using date filter: {outlook_date_filter}")
634633
except ValueError:
634+
logger.warning(f"Invalid date format: {received_after}")
635635
return "Error: received_after must be in YYYY-MM-DD format."
636636

637637
from file_preprocessor import convert_pdf_to_text
@@ -643,6 +643,7 @@ def sanitize_filename(name: str) -> str:
643643
def _save_outlook_message(message, folder_name_source: str) -> str:
644644
"""Helper to save an Outlook message and its attachments."""
645645
nonlocal processed_count
646+
save_folder = None
646647
try:
647648
# Filter for emails specifically if possible, but handle others
648649
# olMail=43, olAppointment=26, olMeetingRequest=53
@@ -665,14 +666,17 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
665666
sender_name = getattr(message, 'SenderName', '') or getattr(message, 'SenderEmailAddress', 'Unknown')
666667
elif item_class == 26: # AppointmentItem
667668
sender_name = getattr(message, 'Organizer', 'Unknown')
668-
except: pass
669+
except Exception as e:
670+
logger.debug(f"Could not get sender name: {e}")
669671

670672
folder_name = f"{sanitize_filename(sender_name)}_{sanitize_filename(subj)}_{unique_id[:8]}"
671673
save_folder = os.path.join(sandbox_path, "mail", folder_name)
672674

673675
if os.path.exists(save_folder):
676+
logger.debug(f"Email folder already exists, skipping: {folder_name}")
674677
return None
675678

679+
logger.info(f"Saving email: {subj} from {sender_name}")
676680
os.makedirs(save_folder, exist_ok=True)
677681

678682
meta = {
@@ -681,48 +685,66 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
681685
"Subject": subj,
682686
"Body": body,
683687
"HTMLBody": html_body[:5000] if html_body else "", # Truncate HTML body for meta
684-
"ReceivedTime": str(getattr(message, 'ReceivedTime', '')),
688+
"ReceivedTime": str(getattr(message, 'ReceivedTime', 'Unknown')),
685689
"Sender": sender_name,
686690
"To": getattr(message, 'To', ''),
687691
"FolderName": folder_name_source,
688692
"ItemClass": item_class
689693
}
690694

691-
with open(os.path.join(save_folder, "email_data.json"), 'w', encoding='utf-8') as f:
695+
data_file = os.path.join(save_folder, "email_data.json")
696+
with open(data_file, 'w', encoding='utf-8') as f:
692697
json.dump(meta, f, indent=2)
698+
logger.debug(f"Saved email metadata to {data_file}")
693699

694700
if hasattr(message, 'Attachments'):
695-
for attachment in message.Attachments:
701+
for i, attachment in enumerate(message.Attachments):
696702
try:
697-
file_path = os.path.join(save_folder, attachment.FileName)
703+
att_name = getattr(attachment, 'FileName', f'attachment_{i}')
704+
file_path = os.path.join(save_folder, att_name)
698705
attachment.SaveAsFile(file_path)
706+
logger.debug(f"Saved attachment: {att_name}")
699707

700-
if attachment.FileName.lower().endswith('.pdf'):
708+
if att_name.lower().endswith('.pdf'):
701709
try:
710+
logger.debug(f"Converting PDF attachment to text: {att_name}")
702711
content, format_type = convert_pdf_to_text(file_path)
703712
ext = ".txt" if format_type == "text" else ".json"
704713
with open(file_path + ext, 'w', encoding='utf-8') as f:
705714
f.write(content)
706-
except Exception: pass
715+
except Exception as pdf_e:
716+
logger.warning(f"Failed to convert PDF {att_name}: {pdf_e}")
707717

708-
if attachment.FileName.lower().endswith('.zip'):
718+
if att_name.lower().endswith('.zip'):
709719
try:
720+
logger.debug(f"Extracting ZIP attachment: {att_name}")
710721
with zipfile.ZipFile(file_path, 'r') as zip_ref:
711722
zip_ref.extractall(save_folder)
712-
except Exception: pass
713-
except Exception: pass
723+
except Exception as zip_e:
724+
logger.warning(f"Failed to extract ZIP {att_name}: {zip_e}")
725+
except Exception as att_e:
726+
logger.warning(f"Error processing attachment {i}: {att_e}")
714727

715728
processed_count += 1
716729
return os.path.relpath(save_folder, sandbox_path)
717-
except Exception:
730+
except Exception as e:
731+
logger.error(f"Error saving outlook message '{getattr(message, 'Subject', 'Unknown')}': {e}", exc_info=True)
732+
# Cleanup empty folder if we failed early
733+
if save_folder and os.path.exists(save_folder) and not os.listdir(save_folder):
734+
try:
735+
os.rmdir(save_folder)
736+
logger.debug(f"Cleaned up empty folder: {save_folder}")
737+
except: pass
718738
return None
719739

720740
try:
741+
logger.info("Connecting to Outlook.Application...")
721742
outlook_app = win32com.client.Dispatch('Outlook.Application')
722743
namespace = outlook_app.GetNamespace('MAPI')
723744

724745
# 1. Strategy: Windows Search Index (ADODB) - Fast!
725746
if query:
747+
logger.info(f"Attempting Windows Search Index for query: {query}")
726748
try:
727749
conn = win32com.client.Dispatch("ADODB.Connection")
728750
conn.Open("Provider=Search.CollatorDSO;Extended Properties='Application=Windows';")
@@ -739,6 +761,7 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
739761
sql += " ORDER BY System.DateModified DESC"
740762

741763
rs = conn.Execute(sql)[0]
764+
idx_found = 0
742765
while not rs.EOF:
743766
if processed_count >= 50: break
744767
item_url = rs.Fields.Item(0).Value # e.g. outlook:00000000...
@@ -748,24 +771,31 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
748771
entry_id = item_url.split("outlook:")[1]
749772
message = namespace.GetItemFromID(entry_id)
750773
path = _save_outlook_message(message, "WindowsIndex")
751-
if path: saved_paths.append(path)
752-
except: pass
774+
if path:
775+
saved_paths.append(path)
776+
idx_found += 1
777+
except Exception as e:
778+
logger.debug(f"Failed to retrieve item from ID {item_url}: {e}")
753779
rs.MoveNext()
754780
rs.Close()
755781
conn.Close()
756-
except Exception:
757-
pass
782+
logger.info(f"Windows Search Index strategy found {idx_found} items.")
783+
except Exception as e:
784+
logger.warning(f"Windows Search Index strategy failed: {e}")
758785

759786
# 2. Strategy: DASL Filter (Outlook Native Search) - Medium Speed
760787
if processed_count < 10: # Only if ADODB didn't yield much
788+
logger.info("Attempting DASL strategy (Outlook Native Search)...")
761789
targets = [namespace.GetDefaultFolder(6), namespace.GetDefaultFolder(5)]
762790

763791
dasl_query = ""
764792
if query:
793+
# Basic DASL Escaping for simple queries
794+
clean_query = query.replace("'", "''")
765795
dasl_query = (
766-
f"@SQL=\"urn:schemas:httpmail:subject\" LIKE '%{query}%' OR "
767-
f"\"urn:schemas:httpmail:textdescription\" LIKE '%{query}%' OR "
768-
f"\"urn:schemas:httpmail:fromname\" LIKE '%{query}%'"
796+
f"@SQL=\"urn:schemas:httpmail:subject\" LIKE '%{clean_query}%' OR "
797+
f"\"urn:schemas:httpmail:textdescription\" LIKE '%{clean_query}%' OR "
798+
f"\"urn:schemas:httpmail:fromname\" LIKE '%{clean_query}%'"
769799
)
770800

771801
if received_after:
@@ -776,59 +806,74 @@ def _save_outlook_message(message, folder_name_source: str) -> str:
776806
dasl_query = f"@SQL=({dasl_query.replace('@SQL=', '')}) AND {date_part}"
777807
else:
778808
dasl_query = f"@SQL={date_part}"
779-
except: pass
809+
except Exception as e:
810+
logger.warning(f"Failed to construct DASL date part: {e}")
780811

781812
for folder in targets:
782813
if processed_count >= 50: break
783814
try:
815+
logger.info(f"Searching in folder: {folder.Name}")
784816
items = folder.Items
785817
if dasl_query:
818+
logger.debug(f"Applying DASL Filter: {dasl_query}")
786819
items = items.Restrict(dasl_query)
787820
items.Sort("[ReceivedTime]", True)
788821

789822
for message in items:
790823
if processed_count >= 50: break
791824
path = _save_outlook_message(message, folder.Name)
792825
if path: saved_paths.append(path)
793-
except Exception: continue
826+
except Exception as e:
827+
logger.warning(f"DASL search in folder {getattr(folder, 'Name', 'Unknown')} failed: {e}")
828+
continue
794829

795830
# 3. Strategy: Full Recursive Fallback (Current) - Slowest
796831
if processed_count == 0:
832+
logger.info("Attempting Recursive Fallback strategy...")
797833
def process_folder_recursive(folder):
798834
for sub in folder.Folders:
799835
if processed_count >= 50: return
800836
process_folder_recursive(sub)
801837

802-
items = folder.Items
803-
# Use crude restriction for dates if possible
804-
if received_after:
805-
try:
806-
dt_obj = datetime.strptime(received_after, "%Y-%m-%d")
807-
local_filter = f"[ReceivedTime] >= '{dt_obj.strftime('%m/%d/%Y')} 00:00 AM'"
808-
items = items.Restrict(local_filter)
809-
except: pass
810-
811-
for msg in items:
812-
if processed_count >= 50: return
813-
if query:
814-
subj = getattr(msg, 'Subject', '') or ''
815-
body = getattr(msg, 'Body', '') or ''
816-
if not re.search(query, f"{subj} {body}", re.I): continue
838+
try:
839+
items = folder.Items
840+
# Use crude restriction for dates if possible
841+
if received_after:
842+
try:
843+
dt_obj = datetime.strptime(received_after, "%Y-%m-%d")
844+
local_filter = f"[ReceivedTime] >= '{dt_obj.strftime('%m/%d/%Y')} 00:00 AM'"
845+
items = items.Restrict(local_filter)
846+
except: pass
817847

818-
path = _save_outlook_message(msg, folder.Name)
819-
if path: saved_paths.append(path)
848+
for msg in items:
849+
if processed_count >= 50: return
850+
try:
851+
if query:
852+
subj = getattr(msg, 'Subject', '') or ''
853+
body = getattr(msg, 'Body', '') or ''
854+
if not re.search(query, f"{subj} {body}", re.I): continue
855+
856+
path = _save_outlook_message(msg, folder.Name)
857+
if path: saved_paths.append(path)
858+
except Exception as e:
859+
logger.debug(f"Error checking message in recursive strategy: {e}")
860+
except Exception as e:
861+
logger.debug(f"Error accessing items in folder {getattr(folder, 'Name', 'Unknown')}: {e}")
820862

821863
for account in namespace.Folders:
822864
if processed_count >= 50: break
865+
logger.info(f"Recursively processing account: {account.Name}")
823866
process_folder_recursive(account)
824867

825868
except Exception as e:
869+
logger.error(f"Critical error connecting to Outlook: {e}", exc_info=True)
826870
return f"Error connecting to Outlook: {e}"
827871

872+
logger.info(f"Finished import_outlook_emails. Total processed: {processed_count}")
828873
if processed_count == 0:
829874
return "No new emails found matching criteria."
830875

831-
return f"Imported {processed_count} emails. Saved to: {', '.join(saved_paths[:5])}..."
876+
return f"Imported {processed_count} emails. Saved to: {', '.join(saved_paths[:5])}{' and more' if len(saved_paths) > 5 else ''}."
832877

833878

834879
# --- Tool Registry ---

0 commit comments

Comments
 (0)