Skip to content

Commit 24eeb81

Browse files
authored
Merge pull request #74 from maxmind/greg/hash-email
Add support for client-side email hashing
2 parents e03897a + a3b82cf commit 24eeb81

7 files changed

Lines changed: 407 additions & 54 deletions

File tree

HISTORY.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ History
1010
the ``proxy`` parameter to the ``AsyncClient`` or ``Client`` constructor.
1111
* Added ``apple_pay`` and ``aps_payments`` to the ``/payment/processor``
1212
validation.
13+
* You may now enable client-side email hashing by setting the keyword argument
14+
``hash_email`` to ``True`` in the web-service client request methods (i.e.,
15+
``score``, ``insights``, ``factors``). When set, this normalizes the email
16+
address and sends an MD5 hash of it to the web service rather than the
17+
plain-text address. Note that the email domain will still be sent in plain
18+
text.
1319

1420
2.2.0 (2020-10-13)
1521
++++++++++++++++++

minfraud/request.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""This is an internal module used for preparing the minFraud request.
2+
3+
This code is only intended for internal use and is subject to change in ways
4+
that may break any direct use of it.
5+
6+
"""
7+
8+
import hashlib
9+
from typing import Any, Dict
10+
from voluptuous import MultipleInvalid
11+
12+
from .errors import InvalidRequestError
13+
from .validation import validate_report, validate_transaction
14+
15+
_TYPO_DOMAINS = {
16+
# gmail.com
17+
"35gmai.com": "gmail.com",
18+
"636gmail.com": "gmail.com",
19+
"gamil.com": "gmail.com",
20+
"gmail.comu": "gmail.com",
21+
"gmial.com": "gmail.com",
22+
"gmil.com": "gmail.com",
23+
"yahoogmail.com": "gmail.com",
24+
# outlook.com
25+
"putlook.com": "outlook.com",
26+
}
27+
28+
29+
def prepare_report(request: Dict[str, Any], validate: bool):
30+
"""Validate and prepare minFraud report"""
31+
cleaned_request = _copy_and_clean(request)
32+
if validate:
33+
try:
34+
validate_report(cleaned_request)
35+
except MultipleInvalid as ex:
36+
raise InvalidRequestError(f"Invalid report data: {ex}") from ex
37+
return cleaned_request
38+
39+
40+
def prepare_transaction(
41+
request: Dict[str, Any],
42+
validate: bool,
43+
hash_email: bool,
44+
):
45+
"""Validate and prepare minFraud transaction"""
46+
cleaned_request = _copy_and_clean(request)
47+
if validate:
48+
try:
49+
validate_transaction(cleaned_request)
50+
except MultipleInvalid as ex:
51+
raise InvalidRequestError(f"Invalid transaction data: {ex}") from ex
52+
53+
if hash_email:
54+
maybe_hash_email(cleaned_request)
55+
56+
return cleaned_request
57+
58+
59+
def _copy_and_clean(data: Any) -> Any:
60+
"""Create a copy of the data structure with Nones removed."""
61+
if isinstance(data, dict):
62+
return dict((k, _copy_and_clean(v)) for (k, v) in data.items() if v is not None)
63+
if isinstance(data, (list, set, tuple)):
64+
return [_copy_and_clean(x) for x in data if x is not None]
65+
return data
66+
67+
68+
def maybe_hash_email(transaction):
69+
"""Hash email address in transaction, if present"""
70+
try:
71+
email = transaction["email"]
72+
address = email["address"]
73+
except KeyError:
74+
return
75+
76+
if address is None:
77+
return
78+
79+
address = address.lower().strip()
80+
81+
at_idx = address.rfind("@")
82+
if at_idx == -1:
83+
return
84+
85+
domain = _clean_domain(address[at_idx + 1 :])
86+
local_part = address[:at_idx]
87+
88+
if domain != "" and "domain" not in email:
89+
email["domain"] = domain
90+
91+
email["address"] = _hash_email(local_part, domain)
92+
93+
94+
def _clean_domain(domain):
95+
domain = domain.strip().rstrip(".").encode("idna").decode("ASCII")
96+
return _TYPO_DOMAINS.get(domain, domain)
97+
98+
99+
def _hash_email(local_part, domain):
100+
# Strip off aliased part of email address
101+
if domain == "yahoo.com":
102+
divider = "-"
103+
else:
104+
divider = "+"
105+
106+
alias_idx = local_part.find(divider)
107+
if alias_idx > 0:
108+
local_part = local_part[:alias_idx]
109+
110+
return hashlib.md5(f"{local_part}@{domain}".encode("UTF-8")).hexdigest()

0 commit comments

Comments
 (0)