Skip to content

Commit 4a00b51

Browse files
authored
Merged Pull Request '#282 feat/share-usage->main: FEAT: ShareUsage example'
1 parent e4abb1d commit 4a00b51

1 file changed

Lines changed: 192 additions & 0 deletions

File tree

  • fiftyone_devicedetection_examples/src/fiftyone_devicedetection_examples/onpremise
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# *********************************************************************
2+
# This Original Work is copyright of 51 Degrees Mobile Experts Limited.
3+
# Copyright 2026 51 Degrees Mobile Experts Limited, Davidson House,
4+
# Forbury Square, Reading, Berkshire, United Kingdom RG1 3EU.
5+
#
6+
# This Original Work is licensed under the European Union Public Licence
7+
# (EUPL) v.1.2 and is subject to its terms as set out below.
8+
#
9+
# If a copy of the EUPL was not distributed with this file, You can obtain
10+
# one at https://opensource.org/licenses/EUPL-1.2.
11+
#
12+
# The 'Compatible Licences' set out in the Appendix to the EUPL (as may be
13+
# amended by the European Commission) shall be deemed incompatible for
14+
# the purposes of the Work and the provisions of the compatibility
15+
# clause in Article 5 of the EUPL shall not apply.
16+
#
17+
# If using the Work as, or as part of, a network application, by
18+
# including the attribution notice(s) required under Article 5 of the EUPL
19+
# in the end user terms of the application under an appropriate heading,
20+
# such notice(s) shall fulfill the requirements of that article.
21+
# *********************************************************************
22+
23+
## @example onpremise/shareusage.py
24+
#
25+
# Demonstrates how to use the ShareUsage element directly to send evidence data
26+
# to 51Degrees for usage sharing. This example shows how to:
27+
#
28+
# - Build a pipeline with only a ShareUsage element (no device detection)
29+
# - Configure batch size (minimum entries per message)
30+
# - Configure sampling rate (share percentage)
31+
# - Process evidence from a YAML file
32+
# - Add custom identifiers to track the data source
33+
#
34+
# This is useful for scenarios where you want to share usage data without
35+
# performing device detection, or when you want fine-grained control over
36+
# the share usage settings.
37+
#
38+
# **Note on Client IP Address:** The client IP address is included in the shared
39+
# evidence solely for deduplication purposes. This allows 51Degrees' machine learning
40+
# algorithms to properly weight evidence coming from different sources versus repeated
41+
# evidence from the same source. Without this, the training data could be skewed by
42+
# over-representing certain device configurations.
43+
#
44+
# **Identifying the Data Source (usage-from):** To help 51Degrees identify which
45+
# customer or partner is sending usage data, you can add a custom "usage-from" header
46+
# to the evidence. This is done by adding evidence with key `header.usage-from`
47+
# and your company/application name as the value. In the XML packet sent to 51Degrees,
48+
# this appears as: `<header Name="usage-from">YourCompanyName</header>`.
49+
# Replace "YourCompanyName" in the `USAGE_FROM_VALUE` constant with your actual
50+
# identifier before running this example.
51+
#
52+
# @include{doc} example-require-datafile.txt
53+
#
54+
# Required PyPi Dependencies:
55+
# - [fiftyone_pipeline_engines_fiftyone](https://pypi.org/project/fiftyone-pipeline-engines-fiftyone/)
56+
# - [ruamel.yaml](https://pypi.org/project/ruamel.yaml/)
57+
58+
import random
59+
import sys
60+
import time
61+
62+
from fiftyone_pipeline_core.pipelinebuilder import PipelineBuilder
63+
from fiftyone_pipeline_engines_fiftyone.share_usage import ShareUsage
64+
from fiftyone_devicedetection_examples.example_utils import ExampleUtils
65+
from fiftyone_devicedetection_shared.example_constants import EVIDENCE_FILE_NAME
66+
from ruamel.yaml import YAML
67+
68+
# Disable SSL verification for testing
69+
import urllib3
70+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
71+
import requests
72+
_original_post = requests.post
73+
requests.post = lambda *args, **kwargs: _original_post(*args, **{**kwargs, 'verify': False})
74+
75+
# Configurable settings
76+
REQUESTED_PACKAGE_SIZE = 10 # Send after 10 entries (default is 10)
77+
SHARE_PERCENTAGE = 1 # Share 100% of data (1 = 100%)
78+
RECORDS_TO_PROCESS = 100 # Number of records to process
79+
80+
# Evidence key for client IP address - used for evidence deduplication (see note above)
81+
EVIDENCE_CLIENTIP_KEY = "server.client-ip"
82+
83+
# Evidence key for identifying the source of share usage data.
84+
# This adds a <header Name="usage-from">YourCompanyName</header> element to the XML packet,
85+
# allowing 51Degrees to identify which customer/partner is sending the data.
86+
# Replace "YourCompanyName" with your actual company or application identifier.
87+
EVIDENCE_USAGE_FROM_KEY = "header.usage-from"
88+
USAGE_FROM_VALUE = "YourCompanyName"
89+
90+
91+
def generate_random_ip():
92+
"""Generate a random IP address for demonstration purposes.
93+
In a real application, this would come from the HTTP request."""
94+
return f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
95+
96+
97+
def filter_evidence(evidence, prefix):
98+
"""Filter evidence entries to only include those with the specified prefix."""
99+
return {k: v for k, v in evidence.items() if k.startswith(prefix)}
100+
101+
102+
class ShareUsageExample():
103+
def run(self, evidence_yaml):
104+
"""!
105+
Process a YAML representation of evidence and share it via the ShareUsage element.
106+
@param evidence_yaml: File containing the yaml representation of the evidence to process
107+
"""
108+
109+
print("Starting ShareUsage example")
110+
print(f"Settings: requested_package_size={REQUESTED_PACKAGE_SIZE}, "
111+
f"share_percentage={SHARE_PERCENTAGE * 100}%, "
112+
f"records_to_process={RECORDS_TO_PROCESS}")
113+
114+
# Build the ShareUsage element with custom settings
115+
share_usage_element = ShareUsage(
116+
# Set the minimum number of entries before sending a batch
117+
requested_package_size=REQUESTED_PACKAGE_SIZE,
118+
# Share 100% of data (1 = 100%)
119+
share_percentage=SHARE_PERCENTAGE,
120+
# Disable repeat evidence filtering for this demo (share all evidence)
121+
interval=0,
122+
# Optional: set a custom URL (default is 51Degrees endpoint)
123+
# endpoint="https://your-custom-endpoint.com/usage",
124+
# Optional: block specific headers from being shared
125+
# header_blacklist=["authorization"],
126+
# Optional: include specific query string parameters
127+
# query_whitelist=["campaign"],
128+
)
129+
130+
# Build a pipeline with only the ShareUsage element
131+
pipeline = PipelineBuilder().add(share_usage_element).build()
132+
133+
print("Pipeline built successfully with ShareUsage element")
134+
135+
# Load YAML evidence
136+
yaml = YAML()
137+
yaml_data = yaml.load_all(evidence_yaml)
138+
139+
# Process evidence records
140+
count = 0
141+
for evidence in yaml_data:
142+
if count >= RECORDS_TO_PROCESS:
143+
break
144+
145+
# Filter to only header.* entries
146+
data = filter_evidence(
147+
{k: str(v) for k, v in evidence.items()},
148+
"header.")
149+
150+
# Add a client IP address to the evidence for deduplication purposes
151+
# (see class-level note). In a real web application, this would come
152+
# from the request (e.g., REMOTE_ADDR or X-Forwarded-For header)
153+
data[EVIDENCE_CLIENTIP_KEY] = generate_random_ip()
154+
155+
# Add the usage-from identifier so 51Degrees knows the source of this data.
156+
# This appears as <header Name="usage-from">YourCompanyName</header> in the XML.
157+
data[EVIDENCE_USAGE_FROM_KEY] = USAGE_FROM_VALUE
158+
159+
# Create flow data and process
160+
flow_data = pipeline.create_flowdata()
161+
flow_data.evidence.add_from_dict(data)
162+
flow_data.process()
163+
164+
count += 1
165+
if count % 10 == 0:
166+
print(f"Processed {count} records")
167+
168+
print(f"Finished processing {count} records")
169+
print("Waiting for share usage to complete sending...")
170+
171+
# Give the background thread time to send any remaining data
172+
time.sleep(5)
173+
174+
print("Done!")
175+
176+
177+
def main(argv):
178+
# This file contains the 20,000 most commonly seen combinations of header values
179+
# that are relevant to device detection. For example, User-Agent and UA-CH headers.
180+
evidence_file = argv[0] if len(argv) > 0 else ExampleUtils.find_file(EVIDENCE_FILE_NAME)
181+
182+
if evidence_file is not None:
183+
with open(evidence_file, "r") as input:
184+
ShareUsageExample().run(input)
185+
else:
186+
print("ERROR: Failed to find the evidence file. Make sure the "
187+
"device-detection-data submodule has been updated by running "
188+
"`git submodule update --recursive`.")
189+
190+
191+
if __name__ == "__main__":
192+
main(sys.argv[1:])

0 commit comments

Comments
 (0)