|
| 1 | +# ********************************************************************* |
| 2 | +# This Original Work is copyright of 51 Degrees Mobile Experts Limited. |
| 3 | +# Copyright 2026 51 Degrees Mobile Experts Limited, Davidson House, |
| 4 | +# Forbury Square, Reading, Berkshire, United Kingdom RG1 3EU. |
| 5 | +# |
| 6 | +# This Original Work is licensed under the European Union Public Licence |
| 7 | +# (EUPL) v.1.2 and is subject to its terms as set out below. |
| 8 | +# |
| 9 | +# If a copy of the EUPL was not distributed with this file, You can obtain |
| 10 | +# one at https://opensource.org/licenses/EUPL-1.2. |
| 11 | +# |
| 12 | +# The 'Compatible Licences' set out in the Appendix to the EUPL (as may be |
| 13 | +# amended by the European Commission) shall be deemed incompatible for |
| 14 | +# the purposes of the Work and the provisions of the compatibility |
| 15 | +# clause in Article 5 of the EUPL shall not apply. |
| 16 | +# |
| 17 | +# If using the Work as, or as part of, a network application, by |
| 18 | +# including the attribution notice(s) required under Article 5 of the EUPL |
| 19 | +# in the end user terms of the application under an appropriate heading, |
| 20 | +# such notice(s) shall fulfill the requirements of that article. |
| 21 | +# ********************************************************************* |
| 22 | + |
| 23 | +## @example onpremise/shareusage.py |
| 24 | +# |
| 25 | +# Demonstrates how to use the ShareUsage element directly to send evidence data |
| 26 | +# to 51Degrees for usage sharing. This example shows how to: |
| 27 | +# |
| 28 | +# - Build a pipeline with only a ShareUsage element (no device detection) |
| 29 | +# - Configure batch size (minimum entries per message) |
| 30 | +# - Configure sampling rate (share percentage) |
| 31 | +# - Process evidence from a YAML file |
| 32 | +# - Add custom identifiers to track the data source |
| 33 | +# |
| 34 | +# This is useful for scenarios where you want to share usage data without |
| 35 | +# performing device detection, or when you want fine-grained control over |
| 36 | +# the share usage settings. |
| 37 | +# |
| 38 | +# **Note on Client IP Address:** The client IP address is included in the shared |
| 39 | +# evidence solely for deduplication purposes. This allows 51Degrees' machine learning |
| 40 | +# algorithms to properly weight evidence coming from different sources versus repeated |
| 41 | +# evidence from the same source. Without this, the training data could be skewed by |
| 42 | +# over-representing certain device configurations. |
| 43 | +# |
| 44 | +# **Identifying the Data Source (usage-from):** To help 51Degrees identify which |
| 45 | +# customer or partner is sending usage data, you can add a custom "usage-from" header |
| 46 | +# to the evidence. This is done by adding evidence with key `header.usage-from` |
| 47 | +# and your company/application name as the value. In the XML packet sent to 51Degrees, |
| 48 | +# this appears as: `<header Name="usage-from">YourCompanyName</header>`. |
| 49 | +# Replace "YourCompanyName" in the `USAGE_FROM_VALUE` constant with your actual |
| 50 | +# identifier before running this example. |
| 51 | +# |
| 52 | +# @include{doc} example-require-datafile.txt |
| 53 | +# |
| 54 | +# Required PyPi Dependencies: |
| 55 | +# - [fiftyone_pipeline_engines_fiftyone](https://pypi.org/project/fiftyone-pipeline-engines-fiftyone/) |
| 56 | +# - [ruamel.yaml](https://pypi.org/project/ruamel.yaml/) |
| 57 | + |
| 58 | +import random |
| 59 | +import sys |
| 60 | +import time |
| 61 | + |
| 62 | +from fiftyone_pipeline_core.pipelinebuilder import PipelineBuilder |
| 63 | +from fiftyone_pipeline_engines_fiftyone.share_usage import ShareUsage |
| 64 | +from fiftyone_devicedetection_examples.example_utils import ExampleUtils |
| 65 | +from fiftyone_devicedetection_shared.example_constants import EVIDENCE_FILE_NAME |
| 66 | +from ruamel.yaml import YAML |
| 67 | + |
| 68 | +# Disable SSL verification for testing |
| 69 | +import urllib3 |
| 70 | +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
| 71 | +import requests |
| 72 | +_original_post = requests.post |
| 73 | +requests.post = lambda *args, **kwargs: _original_post(*args, **{**kwargs, 'verify': False}) |
| 74 | + |
| 75 | +# Configurable settings |
| 76 | +REQUESTED_PACKAGE_SIZE = 10 # Send after 10 entries (default is 10) |
| 77 | +SHARE_PERCENTAGE = 1 # Share 100% of data (1 = 100%) |
| 78 | +RECORDS_TO_PROCESS = 100 # Number of records to process |
| 79 | + |
| 80 | +# Evidence key for client IP address - used for evidence deduplication (see note above) |
| 81 | +EVIDENCE_CLIENTIP_KEY = "server.client-ip" |
| 82 | + |
| 83 | +# Evidence key for identifying the source of share usage data. |
| 84 | +# This adds a <header Name="usage-from">YourCompanyName</header> element to the XML packet, |
| 85 | +# allowing 51Degrees to identify which customer/partner is sending the data. |
| 86 | +# Replace "YourCompanyName" with your actual company or application identifier. |
| 87 | +EVIDENCE_USAGE_FROM_KEY = "header.usage-from" |
| 88 | +USAGE_FROM_VALUE = "YourCompanyName" |
| 89 | + |
| 90 | + |
| 91 | +def generate_random_ip(): |
| 92 | + """Generate a random IP address for demonstration purposes. |
| 93 | + In a real application, this would come from the HTTP request.""" |
| 94 | + return f"{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}" |
| 95 | + |
| 96 | + |
| 97 | +def filter_evidence(evidence, prefix): |
| 98 | + """Filter evidence entries to only include those with the specified prefix.""" |
| 99 | + return {k: v for k, v in evidence.items() if k.startswith(prefix)} |
| 100 | + |
| 101 | + |
| 102 | +class ShareUsageExample(): |
| 103 | + def run(self, evidence_yaml): |
| 104 | + """! |
| 105 | + Process a YAML representation of evidence and share it via the ShareUsage element. |
| 106 | + @param evidence_yaml: File containing the yaml representation of the evidence to process |
| 107 | + """ |
| 108 | + |
| 109 | + print("Starting ShareUsage example") |
| 110 | + print(f"Settings: requested_package_size={REQUESTED_PACKAGE_SIZE}, " |
| 111 | + f"share_percentage={SHARE_PERCENTAGE * 100}%, " |
| 112 | + f"records_to_process={RECORDS_TO_PROCESS}") |
| 113 | + |
| 114 | + # Build the ShareUsage element with custom settings |
| 115 | + share_usage_element = ShareUsage( |
| 116 | + # Set the minimum number of entries before sending a batch |
| 117 | + requested_package_size=REQUESTED_PACKAGE_SIZE, |
| 118 | + # Share 100% of data (1 = 100%) |
| 119 | + share_percentage=SHARE_PERCENTAGE, |
| 120 | + # Disable repeat evidence filtering for this demo (share all evidence) |
| 121 | + interval=0, |
| 122 | + # Optional: set a custom URL (default is 51Degrees endpoint) |
| 123 | + # endpoint="https://your-custom-endpoint.com/usage", |
| 124 | + # Optional: block specific headers from being shared |
| 125 | + # header_blacklist=["authorization"], |
| 126 | + # Optional: include specific query string parameters |
| 127 | + # query_whitelist=["campaign"], |
| 128 | + ) |
| 129 | + |
| 130 | + # Build a pipeline with only the ShareUsage element |
| 131 | + pipeline = PipelineBuilder().add(share_usage_element).build() |
| 132 | + |
| 133 | + print("Pipeline built successfully with ShareUsage element") |
| 134 | + |
| 135 | + # Load YAML evidence |
| 136 | + yaml = YAML() |
| 137 | + yaml_data = yaml.load_all(evidence_yaml) |
| 138 | + |
| 139 | + # Process evidence records |
| 140 | + count = 0 |
| 141 | + for evidence in yaml_data: |
| 142 | + if count >= RECORDS_TO_PROCESS: |
| 143 | + break |
| 144 | + |
| 145 | + # Filter to only header.* entries |
| 146 | + data = filter_evidence( |
| 147 | + {k: str(v) for k, v in evidence.items()}, |
| 148 | + "header.") |
| 149 | + |
| 150 | + # Add a client IP address to the evidence for deduplication purposes |
| 151 | + # (see class-level note). In a real web application, this would come |
| 152 | + # from the request (e.g., REMOTE_ADDR or X-Forwarded-For header) |
| 153 | + data[EVIDENCE_CLIENTIP_KEY] = generate_random_ip() |
| 154 | + |
| 155 | + # Add the usage-from identifier so 51Degrees knows the source of this data. |
| 156 | + # This appears as <header Name="usage-from">YourCompanyName</header> in the XML. |
| 157 | + data[EVIDENCE_USAGE_FROM_KEY] = USAGE_FROM_VALUE |
| 158 | + |
| 159 | + # Create flow data and process |
| 160 | + flow_data = pipeline.create_flowdata() |
| 161 | + flow_data.evidence.add_from_dict(data) |
| 162 | + flow_data.process() |
| 163 | + |
| 164 | + count += 1 |
| 165 | + if count % 10 == 0: |
| 166 | + print(f"Processed {count} records") |
| 167 | + |
| 168 | + print(f"Finished processing {count} records") |
| 169 | + print("Waiting for share usage to complete sending...") |
| 170 | + |
| 171 | + # Give the background thread time to send any remaining data |
| 172 | + time.sleep(5) |
| 173 | + |
| 174 | + print("Done!") |
| 175 | + |
| 176 | + |
| 177 | +def main(argv): |
| 178 | + # This file contains the 20,000 most commonly seen combinations of header values |
| 179 | + # that are relevant to device detection. For example, User-Agent and UA-CH headers. |
| 180 | + evidence_file = argv[0] if len(argv) > 0 else ExampleUtils.find_file(EVIDENCE_FILE_NAME) |
| 181 | + |
| 182 | + if evidence_file is not None: |
| 183 | + with open(evidence_file, "r") as input: |
| 184 | + ShareUsageExample().run(input) |
| 185 | + else: |
| 186 | + print("ERROR: Failed to find the evidence file. Make sure the " |
| 187 | + "device-detection-data submodule has been updated by running " |
| 188 | + "`git submodule update --recursive`.") |
| 189 | + |
| 190 | + |
| 191 | +if __name__ == "__main__": |
| 192 | + main(sys.argv[1:]) |
0 commit comments