Skip to content

Commit 1cbb95f

Browse files
authored
Merge pull request #83 from ZHOUKAILIAN/fix/worker-mail-otp-extraction
fix: avoid reading six-digit email domains as OTPs
2 parents 99a6d6c + f219750 commit 1cbb95f

4 files changed

Lines changed: 140 additions & 18 deletions

File tree

src/services/base.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
import abc
77
import logging
8+
import re
89
import time
910
from dataclasses import dataclass
1011
from typing import Optional, Dict, Any, List
1112
from enum import Enum
1213

13-
from ..config.constants import EmailServiceType
14+
from ..config.constants import EmailServiceType, OTP_CODE_PATTERN, OTP_CODE_SEMANTIC_PATTERN
1415

1516

1617
logger = logging.getLogger(__name__)
@@ -146,6 +147,8 @@ def __init__(self, service_type: EmailServiceType, name: str = None):
146147
self._last_error = None
147148
self._provider_backoff = reset_adaptive_backoff()
148149

150+
_EMAIL_ADDRESS_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
151+
149152
@property
150153
def status(self) -> EmailServiceStatus:
151154
"""获取服务状态"""
@@ -272,6 +275,30 @@ def get_email_info(self, email_id: str) -> Optional[Dict[str, Any]]:
272275
return email_info
273276
return None
274277

278+
def _strip_email_addresses(self, text: str) -> str:
279+
"""移除文本中的邮箱地址,避免域名数字被误识别为验证码。"""
280+
return self._EMAIL_ADDRESS_PATTERN.sub(" ", text or "")
281+
282+
def _extract_otp_from_text(self, text: str, pattern: Optional[str] = None) -> Optional[str]:
283+
"""
284+
从文本中提取验证码。
285+
286+
优先语义匹配,再在移除邮箱地址后的文本上做 6 位数字兜底。
287+
"""
288+
if not text:
289+
return None
290+
291+
semantic_match = re.search(OTP_CODE_SEMANTIC_PATTERN, text, re.IGNORECASE)
292+
if semantic_match:
293+
return semantic_match.group(1)
294+
295+
fallback_pattern = pattern or OTP_CODE_PATTERN
296+
simple_match = re.search(fallback_pattern, self._strip_email_addresses(text))
297+
if simple_match:
298+
return simple_match.group(1)
299+
300+
return None
301+
275302
def wait_for_email(
276303
self,
277304
email: str,

src/services/freemail.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -237,34 +237,31 @@ def get_verification_code(
237237
if "openai" not in content.lower():
238238
continue
239239

240-
# 尝试直接使用 Freemail 提取的验证码
241-
v_code = mail.get("verification_code")
242-
if v_code:
243-
logger.info(f"从 Freemail 邮箱 {email} 找到验证码: {v_code}")
244-
self.update_status(True)
245-
return v_code
246-
247-
# 如果没有直接提供,通过正则匹配 preview
248-
match = re.search(pattern, content)
249-
if match:
250-
code = match.group(1)
240+
code = self._extract_otp_from_text(content, pattern)
241+
if code:
251242
logger.info(f"从 Freemail 邮箱 {email} 找到验证码: {code}")
252243
self.update_status(True)
253244
return code
254245

246+
v_code = str(mail.get("verification_code") or "").strip()
247+
255248
# 如果依然未找到,获取邮件详情进行匹配
256249
try:
257250
detail = self._make_request("GET", f"/api/email/{mail_id}")
258251
full_content = str(detail.get("content", "")) + "\n" + str(detail.get("html_content", ""))
259-
match = re.search(pattern, full_content)
260-
if match:
261-
code = match.group(1)
252+
code = self._extract_otp_from_text(full_content, pattern)
253+
if code:
262254
logger.info(f"从 Freemail 邮箱 {email} 找到验证码: {code}")
263255
self.update_status(True)
264256
return code
265257
except Exception as e:
266258
logger.debug(f"获取 Freemail 邮件详情失败: {e}")
267259

260+
if re.fullmatch(r"\d{6}", v_code):
261+
logger.info(f"从 Freemail 邮箱 {email} 找到验证码: {v_code}")
262+
self.update_status(True)
263+
return v_code
264+
268265
except Exception as e:
269266
logger.debug(f"检查 Freemail 邮件时出错: {e}")
270267

src/services/temp_mail.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -353,9 +353,8 @@ def get_verification_code(
353353
if "openai" not in sender and "openai" not in content.lower():
354354
continue
355355

356-
match = re.search(pattern, content)
357-
if match:
358-
code = match.group(1)
356+
code = self._extract_otp_from_text(content, pattern)
357+
if code:
359358
logger.info(f"从 TempMail 邮箱 {email} 找到验证码: {code}")
360359
self.update_status(True)
361360
return code
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from src.services.freemail import FreemailService
2+
from src.services.temp_mail import TempMailService
3+
4+
5+
class FakeResponse:
6+
def __init__(self, status_code=200, payload=None, text=""):
7+
self.status_code = status_code
8+
self._payload = payload
9+
self.text = text
10+
self.headers = {}
11+
12+
def json(self):
13+
if self._payload is None:
14+
raise ValueError("no json payload")
15+
return self._payload
16+
17+
18+
class FakeHTTPClient:
19+
def __init__(self, responses):
20+
self.responses = list(responses)
21+
self.calls = []
22+
23+
def request(self, method, url, **kwargs):
24+
self.calls.append({
25+
"method": method,
26+
"url": url,
27+
"kwargs": kwargs,
28+
})
29+
if not self.responses:
30+
raise AssertionError(f"未准备响应: {method} {url}")
31+
return self.responses.pop(0)
32+
33+
34+
def test_temp_mail_ignores_six_digit_domain_when_extracting_code():
35+
service = TempMailService({
36+
"base_url": "https://mail.example.com",
37+
"admin_password": "admin-secret",
38+
"domain": "123456.com",
39+
})
40+
service.http_client = FakeHTTPClient([
41+
FakeResponse(
42+
payload={
43+
"results": [
44+
{
45+
"id": "msg-1",
46+
"source": "OpenAI <noreply@openai.com>",
47+
"subject": "Your OpenAI verification code",
48+
"body": (
49+
"Email sent to tester@123456.com.\n"
50+
"Your OpenAI verification code is 654321"
51+
),
52+
}
53+
]
54+
}
55+
)
56+
])
57+
58+
code = service.get_verification_code(
59+
email="tester@123456.com",
60+
timeout=1,
61+
)
62+
63+
assert code == "654321"
64+
65+
66+
def test_freemail_prefers_real_code_over_worker_extracted_domain_digits():
67+
service = FreemailService({
68+
"base_url": "https://mail.example.com",
69+
"admin_token": "jwt-token",
70+
})
71+
service.http_client = FakeHTTPClient([
72+
FakeResponse(
73+
payload=[
74+
{
75+
"id": "msg-1",
76+
"sender": "noreply@openai.com",
77+
"subject": "Your OpenAI verification code",
78+
"preview": "Verification email sent to tester@123456.com",
79+
"verification_code": "123456",
80+
}
81+
]
82+
),
83+
FakeResponse(
84+
payload={
85+
"content": (
86+
"To: tester@123456.com\n"
87+
"Your OpenAI verification code is 654321"
88+
),
89+
"html_content": "",
90+
}
91+
),
92+
])
93+
94+
code = service.get_verification_code(
95+
email="tester@123456.com",
96+
timeout=1,
97+
)
98+
99+
assert code == "654321"

0 commit comments

Comments
 (0)