Skip to content

Commit e0a1715

Browse files
authored
Merge pull request #95 from bugout-dev/key_blacklist
Support of blacklisted keys for feature reports
2 parents 57607cd + aa1e62c commit e0a1715

6 files changed

Lines changed: 188 additions & 9 deletions

File tree

python/README.md

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,49 @@ set `MY_APP_NO_CONSENT=1`, then again no reports will get sent back.
126126
On the other hand, if the user has set `MY_APP_CONSENT=true` and left `MY_APP_NO_CONSENT` unset or
127127
set to a value other than `1`, Humbug will send you any reports you have configured.
128128

129-
### Example: activeloopai/Hub
129+
### Blacklisting parameters in feature reports
130130

131-
[This pull request](https://github.com/activeloopai/Hub/pull/624) shows how
131+
Arguments to functions and other callables can sometimes contain sensitive information which you may
132+
not want to include in Humbug reports.
133+
134+
Blacklist functions allow you to specify which parameters from an argument list to filter out of your
135+
feature reports.
136+
137+
#### `blacklist.generate_filter_parameters_by_key_fn`
138+
139+
If you would just like to filter out all paramters with a given name, you can use the `blacklist.generate_filter_parameters_by_key_fn`.
140+
141+
For example, to ignore all parameters named `token` (case insensitive), you would instantiate your
142+
`HumbugReporter` as follows:
143+
144+
```python
145+
reporter = HumbugReporter(
146+
...,
147+
blacklist_fn=blacklist.generate_filter_parameters_by_key_fn(["token"]),
148+
)
149+
```
150+
151+
#### Custom blacklist functions
152+
153+
You could also implement a custom blacklist function to remove all parameters that contained the substring
154+
`token` (case insensitive):
155+
156+
```python
157+
def blacklist_token_parameters_fn(params: Dict[str, Any]) -> Dict[str, Any]:
158+
admissible_params = {k:v for k, v in params.items() if "token" not in k}
159+
return admissible_params
160+
161+
reporter = HumbugReporter(
162+
...,
163+
blacklist_fn=blacklist_token_parameters_fn
164+
)
165+
```
166+
167+
### Case study: activeloopai/deeplake
168+
169+
[This pull request](https://github.com/activeloopai/deeplake/pull/624) shows how
132170
[Activeloop](https://www.activeloop.ai/) integrated Humbug into their popular
133-
[`Hub`](https://github.com/activeloopai/Hub) tool.
171+
[`deeplake`](https://github.com/activeloopai/deeplake) tool.
134172

135173
This example shows how to use Humbug to record consent in a configuration file that the user
136174
can modify at will. It also shows how to add custom tags to your Humbug reports.

python/humbug/blacklist.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""
2+
Various implementations of the blacklist functionality.
3+
"""
4+
from typing import Any, Callable, Dict, List, Optional
5+
6+
7+
def generate_filter_parameters_by_key_fn(
8+
blacklist_keys: List[str],
9+
) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
10+
"""
11+
Generates a parameter filter function which filters out the parameters whose names are in the given
12+
list of blacklist_keys.
13+
14+
The comparison to blacklist_keys is case insensitive.
15+
"""
16+
17+
lowercase_blacklist_keys = [key.lower() for key in blacklist_keys]
18+
19+
def filter_parameters_by_key(
20+
parameters: Dict[str, Any],
21+
) -> Dict[str, Any]:
22+
return {
23+
k: str(v)
24+
for k, v in parameters.items()
25+
if k.lower() not in lowercase_blacklist_keys
26+
}
27+
28+
return filter_parameters_by_key
29+
30+
31+
def generate_filter_parameters_by_key_inner_fn(
32+
blacklist_keys: List[str],
33+
) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
34+
"""
35+
Generates a parameter filter function which filters out the parameters whose names are in the given
36+
list of blacklist_keys for 1st and 2nd layer of dictionary. Expands pydantic model to dictionary.
37+
38+
The comparison to blacklist_keys is case insensitive.
39+
"""
40+
41+
lowercase_blacklist_keys = [key.lower() for key in blacklist_keys]
42+
43+
def filter_parameters_by_key_inner(
44+
parameters: Dict[str, Any],
45+
) -> Dict[str, Any]:
46+
"""
47+
Applies blacklist filter to provided parameters and to 2nd layer of
48+
inner dictionary parameter if exists.
49+
"""
50+
whitelisted_parameters: Dict[str, Any] = {}
51+
52+
for key in parameters.keys():
53+
if key.lower() in lowercase_blacklist_keys:
54+
continue
55+
56+
key_as_dict: Optional[Dict[str, Any]] = None
57+
for d in dir(parameters[key]):
58+
if d == "keys":
59+
key_as_dict = parameters[key]
60+
break
61+
elif d == "dict": # Pydantic models support
62+
key_as_dict = parameters[key].dict()
63+
break
64+
65+
if key_as_dict is not None:
66+
try:
67+
inner_dict: Dict[str, str] = {}
68+
for inner_key in key_as_dict.keys():
69+
if inner_key.lower() in lowercase_blacklist_keys:
70+
continue
71+
inner_dict[inner_key] = str(key_as_dict[inner_key])
72+
whitelisted_parameters[key] = inner_dict
73+
continue
74+
except Exception:
75+
pass
76+
77+
whitelisted_parameters[key] = str(parameters[key])
78+
79+
return whitelisted_parameters
80+
81+
return filter_parameters_by_key_inner

python/humbug/report.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def __init__(
6060
mode: Modes = Modes.DEFAULT,
6161
url: Optional[str] = None,
6262
tags: Optional[List[str]] = None,
63+
blacklist_fn: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
6364
):
6465
if url is None:
6566
url = DEFAULT_URL
@@ -93,6 +94,8 @@ def __init__(
9394
if tags is not None:
9495
self.tags = tags
9596

97+
self.blacklist_fn = blacklist_fn
98+
9699
def wait(self) -> None:
97100
concurrent.futures.wait(
98101
self.report_futures, timeout=float(self.timeout_seconds)
@@ -382,13 +385,17 @@ def logging_report(
382385
def feature_report(
383386
self,
384387
feature_name: str,
385-
parameters: Dict[str, str],
388+
parameters: Dict[str, Any],
386389
tags: Optional[List[str]] = None,
387390
publish: bool = True,
388391
wait: bool = False,
392+
apply_blacklist: bool = True,
389393
) -> Report:
390394
title = "Feature used: {name}".format(name=feature_name)
391395

396+
if apply_blacklist and self.blacklist_fn is not None:
397+
parameters = self.blacklist_fn(parameters)
398+
392399
parameters_content = "\n".join(
393400
[
394401
"- `{parameter_name}` = `{parameter_value}`".format(
@@ -438,7 +445,7 @@ def record_call(
438445
def wrapped_callable(*args, **kwargs):
439446
parameters = {**kwargs}
440447
for i, arg in enumerate(args):
441-
parameters["arg.{}".format(i)] = str(arg)
448+
parameters["arg.{}".format(i)] = arg
442449

443450
self.feature_report(callable.__name__, parameters)
444451

python/humbug/test_blacklist.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from typing import Any, Dict
2+
import unittest
3+
4+
from . import blacklist
5+
6+
7+
class TestGenerateFilterParametersByKeyFunction(unittest.TestCase):
8+
"""
9+
Tests for blacklist.generate_filter_parameters_by_key_fn.
10+
"""
11+
12+
def test_exact_matches(self):
13+
params: Dict[str, Any] = {"good": 1, "bad": "lol"}
14+
blacklist_fn = blacklist.generate_filter_parameters_by_key_fn(["bad"])
15+
filtered_params = blacklist_fn(params)
16+
self.assertDictEqual(filtered_params, {"good": "1"})
17+
18+
def test_case_insensitive_matches(self):
19+
params: Dict[str, Any] = {"good": 1, "BAD": "lol"}
20+
blacklist_fn = blacklist.generate_filter_parameters_by_key_fn(["Bad"])
21+
filtered_params = blacklist_fn(params)
22+
self.assertDictEqual(filtered_params, {"good": "1"})

python/humbug/test_report.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
import unittest
22
from unittest.mock import MagicMock
33

4-
from . import consent, report
4+
from . import consent, report, blacklist
55

66

77
class TestReporter(unittest.TestCase):
88
def setUp(self):
99
self.consent = consent.HumbugConsent(True)
1010
self.reporter = report.HumbugReporter(
11-
name="TestReporter", consent=self.consent, tags=["humbug-unit-test"]
11+
name="TestReporter",
12+
consent=self.consent,
13+
tags=["humbug-unit-test"],
14+
blacklist_fn=blacklist.generate_filter_parameters_by_key_inner_fn(
15+
["private"]
16+
),
1217
)
1318
self.reporter.publish = MagicMock()
1419

@@ -72,11 +77,37 @@ def test_post_body(self):
7277

7378
def test_feature_report(self):
7479
report = self.reporter.feature_report(
75-
"test_feature", {"population": "A", "version": "2"}, publish=False
80+
"test_feature",
81+
{
82+
"population": "A",
83+
"version": "2",
84+
"private": "confidential",
85+
"inner": {"private": "confidential"},
86+
},
87+
publish=False,
7688
)
7789
self.assertTrue("feature:{}".format("test_feature") in report.tags)
7890
self.assertTrue("parameter:{}={}".format("population", "A") in report.tags)
7991
self.assertTrue("parameter:{}={}".format("version", "2") in report.tags)
92+
self.assertTrue(
93+
"parameter:{}={}".format("private", "confidential") not in report.tags
94+
)
95+
self.assertTrue("parameter:{}={{}}".format("inner") in report.tags)
96+
97+
def test_feature_report_not_apply_blacklist(self):
98+
report = self.reporter.feature_report(
99+
"test_feature_not_apply_blacklist",
100+
{
101+
"private": "confidential",
102+
"inner": {"private": "confidential"},
103+
},
104+
publish=False,
105+
apply_blacklist=False,
106+
)
107+
self.assertTrue(
108+
"parameter:{}={}".format("private", "confidential") in report.tags
109+
)
110+
self.assertTrue("parameter:{}={{}}".format("inner") not in report.tags)
80111

81112
def test_record_call(self):
82113
@self.reporter.record_call

python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name="humbug",
9-
version="0.2.7",
9+
version="0.2.8",
1010
packages=find_packages(),
1111
package_data={"humbug": ["py.typed"]},
1212
install_requires=[

0 commit comments

Comments
 (0)