From fda34e06843986ae23c1e1249d4ccd60a6002074 Mon Sep 17 00:00:00 2001 From: Bibiana Cristofol Amat Date: Wed, 3 Jun 2026 14:05:42 +0100 Subject: [PATCH 1/2] additional_data: sources: add licence info https://github.com/ThreeSixtyGiving/datastore/issues/213 --- .../additional_data/sources/codelist_code.py | 5 +++ .../sources/find_that_charity.py | 5 +++ .../additional_data/sources/geo_lookup.py | 7 +++ datastore/additional_data/sources/nspl.py | 6 +++ .../test_additional_data_codelist_code.py | 3 +- .../tests/test_additional_data_license.py | 44 +++++++++++++++++++ datastore/tests/test_additional_data_nspl.py | 6 ++- 7 files changed, 73 insertions(+), 3 deletions(-) diff --git a/datastore/additional_data/sources/codelist_code.py b/datastore/additional_data/sources/codelist_code.py index 492bd2cb..0263adad 100644 --- a/datastore/additional_data/sources/codelist_code.py +++ b/datastore/additional_data/sources/codelist_code.py @@ -23,6 +23,9 @@ class CodeListSource(object): responsible for field: codeListLookup """ + ADDITIONAL_DATA_KEY = "codeListLookup" + LICENCE = "https://creativecommons.org/licenses/by/4.0/" + def import_codelists(self): with transaction.atomic(): CodelistCode.objects.all().delete() @@ -136,3 +139,5 @@ def update_additional_data(self, grant, source_file, additional_data): "recipientOrg_location_countryCode": recipientOrg_location_countryCode, "fundingOrg_location_countryCode": fundingOrg_location_countryCode, } + + additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE diff --git a/datastore/additional_data/sources/find_that_charity.py b/datastore/additional_data/sources/find_that_charity.py index 3d202f97..94d7de5f 100644 --- a/datastore/additional_data/sources/find_that_charity.py +++ b/datastore/additional_data/sources/find_that_charity.py @@ -74,6 +74,9 @@ class FindThatCharitySource(object): FindThatCharity (FTC) organisation info data sources""" ADDITIONAL_DATA_KEY = "recipientOrgInfos" + LICENCE = ( + "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) def __init__(self, *args, **kwargs): # A basic internal memory cache to avoid hitting the db on duplicate @@ -83,6 +86,8 @@ def __init__(self, *args, **kwargs): self._cache = {} def update_additional_data(self, grant, source_file, additional_data): + additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE + # We can't do anything if this grant doesn't have a recipientOrganization if not grant.get("recipientOrganization"): return diff --git a/datastore/additional_data/sources/geo_lookup.py b/datastore/additional_data/sources/geo_lookup.py index cd81f13d..970cc0a0 100644 --- a/datastore/additional_data/sources/geo_lookup.py +++ b/datastore/additional_data/sources/geo_lookup.py @@ -13,6 +13,11 @@ class GeoLookupSource(object): """Imports geographical lookups from https://github.com/drkane/geo-lookups/ These allow for looking up from lower-level geography like Ward to a standard local authority, region, etc""" + ADDITIONAL_DATA_KEY = "locationLookup" + LICENCE = ( + "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) + # Source URLS SOURCE_URLS = { "lsoa": { @@ -143,3 +148,5 @@ def update_additional_data(self, grant, source_file, additional_data): area["source"] = "recipientOrganizationPostcode" area["sourceCode"] = lsoa additional_data["locationLookup"].append(area) + + additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE diff --git a/datastore/additional_data/sources/nspl.py b/datastore/additional_data/sources/nspl.py index 64e598c9..9609da0f 100644 --- a/datastore/additional_data/sources/nspl.py +++ b/datastore/additional_data/sources/nspl.py @@ -24,6 +24,10 @@ class NSPLSource(object): # Search for the most recent "National Statistics Postcode Lookup 2021 Cencus" # https://geoportal.statistics.gov.uk/search?collection=Dataset&q=National%20Statistics%20Postcode%20Lookup%20-%202021%20Census&sort=-modified&source=office%20for%20national%20statistics&tags=national%20statistics%20postcode%20lookup%2C2021_cencus&type=csv%20collection NSPL_URL = "https://www.arcgis.com/sharing/rest/content/items/204e40244d4d4903ba1861d492f47d29/data" + ADDITIONAL_DATA_KEY = "recipientOrganizationLocation" + LICENCE = ( + "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) def __init__(self): self._nspl_cache = {} @@ -204,3 +208,5 @@ def update_additional_data(self, grant, source_file, additional_data): self.update_location_data_code_names(location_data) additional_data["recipientOrganizationLocation"] = location_data break + + additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE diff --git a/datastore/tests/test_additional_data_codelist_code.py b/datastore/tests/test_additional_data_codelist_code.py index 01caabab..e4ca1981 100644 --- a/datastore/tests/test_additional_data_codelist_code.py +++ b/datastore/tests/test_additional_data_codelist_code.py @@ -37,7 +37,8 @@ def test_code_list(self): "beneficiaryLocation_countryCode": "Australia", "fundingOrg_location_countryCode": "France", "recipientOrg_location_countryCode": "United Kingdom of Great Britain and Northern Ireland", - } + }, + "codeListLookup_LICENCE": "https://creativecommons.org/licenses/by/4.0/", } source.update_additional_data(grant, source_file, additional_data_in) diff --git a/datastore/tests/test_additional_data_license.py b/datastore/tests/test_additional_data_license.py index b844af79..b682ab7e 100644 --- a/datastore/tests/test_additional_data_license.py +++ b/datastore/tests/test_additional_data_license.py @@ -1,8 +1,15 @@ from django.test import TestCase from additional_data.sources.grant_metadata import GrantMetadataSource +from additional_data.sources.find_that_charity import FindThatCharitySource +from additional_data.sources.geo_lookup import GeoLookupSource +from additional_data.sources.nspl import NSPLSource +from additional_data.sources.codelist_code import CodeListSource from db.models import Grant +OGL_V3 = "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" +CC_BY_4 = "https://creativecommons.org/licenses/by/4.0/" + class TestAdditionalDataLicense(TestCase): fixtures = ["test_data.json"] @@ -35,3 +42,40 @@ def test_publisher_license(self): expected_additional_data, "The additional license data added is not correct.", ) + + +class TestAdditionalDataSourceLicences(TestCase): + fixtures = ["test_data.json"] + + def _assert_licence(self, source, expected_licence): + grant = Grant.objects.first() + additional_data = {} + + source.update_additional_data( + grant.data, grant.source_file.data, additional_data + ) + + licence_key = f"{source.ADDITIONAL_DATA_KEY}_LICENCE" + + self.assertIn( + licence_key, + additional_data, + f"{licence_key} was not added by {source.__class__.__name__}.", + ) + self.assertEqual( + additional_data[licence_key], + expected_licence, + f"{source.__class__.__name__} set the wrong licence value.", + ) + + def test_find_that_charity_licence(self): + self._assert_licence(FindThatCharitySource(), OGL_V3) + + def test_geo_lookup_licence(self): + self._assert_licence(GeoLookupSource(), OGL_V3) + + def test_nspl_licence(self): + self._assert_licence(NSPLSource(), OGL_V3) + + def test_codelist_lookup_licence(self): + self._assert_licence(CodeListSource(), CC_BY_4) diff --git a/datastore/tests/test_additional_data_nspl.py b/datastore/tests/test_additional_data_nspl.py index c32a39ef..0696ed98 100644 --- a/datastore/tests/test_additional_data_nspl.py +++ b/datastore/tests/test_additional_data_nspl.py @@ -200,7 +200,8 @@ def test_nspl_update_additional_data_with_not_existing_postcode(self): nspl.update_additional_data(grant.data, grant.source_file.data, additional_data) self.assertNotIn("recipientOrganizationLocation", additional_data) - self.assertEqual(len(additional_data), 0) + self.assertEqual(len(additional_data), 1) + self.assertIn("recipientOrganizationLocation_LICENCE", additional_data) def test_nspl_update_additional_data_without_postcode(self): self.save_nspl_mock_data() @@ -214,7 +215,8 @@ def test_nspl_update_additional_data_without_postcode(self): nspl.update_additional_data(grant.data, grant.source_file.data, additional_data) self.assertNotIn("recipientOrganizationLocation", additional_data) - self.assertEqual(len(additional_data), 0) + self.assertEqual(len(additional_data), 1) + self.assertIn("recipientOrganizationLocation_LICENCE", additional_data) def test_nspl_update_additional_data_breaks_after_updating(self): # Once we have one recipient org location, the loop should break. From 7fa3b9647e73536f1199fed6b77fb0c06b53f554 Mon Sep 17 00:00:00 2001 From: Bibiana Cristofol Amat Date: Thu, 11 Jun 2026 10:19:42 +0100 Subject: [PATCH 2/2] additional_data: sources: refactor add licence info --- datastore/additional_data/generator.py | 18 +++- .../additional_data/sources/codelist_code.py | 2 - .../sources/find_that_charity.py | 2 - .../additional_data/sources/geo_lookup.py | 2 - .../additional_data/sources/grant_metadata.py | 19 ++++- datastore/additional_data/sources/nspl.py | 2 - .../test_additional_data_codelist_code.py | 3 +- .../tests/test_additional_data_license.py | 85 ++++++++++++++----- datastore/tests/test_additional_data_nspl.py | 6 +- 9 files changed, 102 insertions(+), 37 deletions(-) diff --git a/datastore/additional_data/generator.py b/datastore/additional_data/generator.py index c32c5c8c..552d25c2 100644 --- a/datastore/additional_data/generator.py +++ b/datastore/additional_data/generator.py @@ -48,9 +48,21 @@ def create(self, grant, source_file, additional_data_sources=DATA_SOURCES): for additional_data_source in additional_data_sources: try: - getattr(self, additional_data_source).update_additional_data( - grant, source_file, additional_data - ) + source_instance = getattr(self, additional_data_source) + if additional_data_source == "grant_metadata": + # Build sources dict excluding grant_metadata itself + sources_dict = { + key: getattr(self, key) + for key in additional_data_sources + if key != "grant_metadata" + } + source_instance.update_additional_data( + grant, source_file, additional_data, sources=sources_dict + ) + else: + source_instance.update_additional_data( + grant, source_file, additional_data + ) except AttributeError: raise Exception( f"Data source {additional_data_source} is not a known additional data source." diff --git a/datastore/additional_data/sources/codelist_code.py b/datastore/additional_data/sources/codelist_code.py index 0263adad..de525883 100644 --- a/datastore/additional_data/sources/codelist_code.py +++ b/datastore/additional_data/sources/codelist_code.py @@ -139,5 +139,3 @@ def update_additional_data(self, grant, source_file, additional_data): "recipientOrg_location_countryCode": recipientOrg_location_countryCode, "fundingOrg_location_countryCode": fundingOrg_location_countryCode, } - - additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE diff --git a/datastore/additional_data/sources/find_that_charity.py b/datastore/additional_data/sources/find_that_charity.py index 94d7de5f..40a7efa4 100644 --- a/datastore/additional_data/sources/find_that_charity.py +++ b/datastore/additional_data/sources/find_that_charity.py @@ -86,8 +86,6 @@ def __init__(self, *args, **kwargs): self._cache = {} def update_additional_data(self, grant, source_file, additional_data): - additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE - # We can't do anything if this grant doesn't have a recipientOrganization if not grant.get("recipientOrganization"): return diff --git a/datastore/additional_data/sources/geo_lookup.py b/datastore/additional_data/sources/geo_lookup.py index 970cc0a0..ff6a457a 100644 --- a/datastore/additional_data/sources/geo_lookup.py +++ b/datastore/additional_data/sources/geo_lookup.py @@ -148,5 +148,3 @@ def update_additional_data(self, grant, source_file, additional_data): area["source"] = "recipientOrganizationPostcode" area["sourceCode"] = lsoa additional_data["locationLookup"].append(area) - - additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE diff --git a/datastore/additional_data/sources/grant_metadata.py b/datastore/additional_data/sources/grant_metadata.py index 54b90d18..671b3d8f 100644 --- a/datastore/additional_data/sources/grant_metadata.py +++ b/datastore/additional_data/sources/grant_metadata.py @@ -2,11 +2,12 @@ class GrantMetadataSource(object): """Adds metadata to a grant: * metadata/source_license * metadata/source_license_name + * metadata/sources_metadata """ ADDITIONAL_DATA_KEY = "metadata" - def update_additional_data(self, grant, source_file, additional_data): + def update_additional_data(self, grant, source_file, additional_data, sources=None): additional_data[self.ADDITIONAL_DATA_KEY] = {} @@ -20,3 +21,19 @@ def update_additional_data(self, grant, source_file, additional_data): additional_data[self.ADDITIONAL_DATA_KEY][ "source_license" ] = source_file.get("license") + + # Aggregate licenses from all additional_data sources + sources_metadata = {} + if sources: + for source_name, source_instance in sources.items(): + if hasattr(source_instance, "LICENCE") and hasattr( + source_instance, "ADDITIONAL_DATA_KEY" + ): + sources_metadata[source_instance.ADDITIONAL_DATA_KEY] = { + "license": source_instance.LICENCE + } + + if sources_metadata: + additional_data[self.ADDITIONAL_DATA_KEY][ + "sources_metadata" + ] = sources_metadata diff --git a/datastore/additional_data/sources/nspl.py b/datastore/additional_data/sources/nspl.py index 9609da0f..45927e54 100644 --- a/datastore/additional_data/sources/nspl.py +++ b/datastore/additional_data/sources/nspl.py @@ -208,5 +208,3 @@ def update_additional_data(self, grant, source_file, additional_data): self.update_location_data_code_names(location_data) additional_data["recipientOrganizationLocation"] = location_data break - - additional_data[f"{self.ADDITIONAL_DATA_KEY}_LICENCE"] = self.LICENCE diff --git a/datastore/tests/test_additional_data_codelist_code.py b/datastore/tests/test_additional_data_codelist_code.py index e4ca1981..01caabab 100644 --- a/datastore/tests/test_additional_data_codelist_code.py +++ b/datastore/tests/test_additional_data_codelist_code.py @@ -37,8 +37,7 @@ def test_code_list(self): "beneficiaryLocation_countryCode": "Australia", "fundingOrg_location_countryCode": "France", "recipientOrg_location_countryCode": "United Kingdom of Great Britain and Northern Ireland", - }, - "codeListLookup_LICENCE": "https://creativecommons.org/licenses/by/4.0/", + } } source.update_additional_data(grant, source_file, additional_data_in) diff --git a/datastore/tests/test_additional_data_license.py b/datastore/tests/test_additional_data_license.py index b682ab7e..a81a9fed 100644 --- a/datastore/tests/test_additional_data_license.py +++ b/datastore/tests/test_additional_data_license.py @@ -47,35 +47,82 @@ def test_publisher_license(self): class TestAdditionalDataSourceLicences(TestCase): fixtures = ["test_data.json"] - def _assert_licence(self, source, expected_licence): + def test_source_licenses_aggregation(self): + """Test that GrantMetadataSource aggregates licenses from all sources""" grant = Grant.objects.first() additional_data = {} - source.update_additional_data( - grant.data, grant.source_file.data, additional_data - ) + # Create source instances + sources = { + "find_that_charity_source": FindThatCharitySource(), + "geo_lookup": GeoLookupSource(), + "nspl_source": NSPLSource(), + "code_lists": CodeListSource(), + } - licence_key = f"{source.ADDITIONAL_DATA_KEY}_LICENCE" + grant_metadata_source = GrantMetadataSource() + + # Call with sources parameter + grant_metadata_source.update_additional_data( + grant.data, grant.source_file.data, additional_data, sources=sources + ) + # Verify the structure self.assertIn( - licence_key, + "metadata", additional_data, - f"{licence_key} was not added by {source.__class__.__name__}.", + "metadata key was not added.", ) - self.assertEqual( - additional_data[licence_key], - expected_licence, - f"{source.__class__.__name__} set the wrong licence value.", + + self.assertIn( + "sources_metadata", + additional_data["metadata"], + "sources_metadata was not aggregated by GrantMetadataSource.", ) - def test_find_that_charity_licence(self): - self._assert_licence(FindThatCharitySource(), OGL_V3) + sources_metadata = additional_data["metadata"]["sources_metadata"] - def test_geo_lookup_licence(self): - self._assert_licence(GeoLookupSource(), OGL_V3) + # Verify each source's license is present + self.assertIn( + "recipientOrgInfos", + sources_metadata, + "FindThatCharitySource license not aggregated.", + ) + self.assertEqual( + sources_metadata["recipientOrgInfos"]["license"], + OGL_V3, + "FindThatCharitySource has wrong license.", + ) - def test_nspl_licence(self): - self._assert_licence(NSPLSource(), OGL_V3) + self.assertIn( + "locationLookup", + sources_metadata, + "GeoLookupSource license not aggregated.", + ) + self.assertEqual( + sources_metadata["locationLookup"]["license"], + OGL_V3, + "GeoLookupSource has wrong license.", + ) - def test_codelist_lookup_licence(self): - self._assert_licence(CodeListSource(), CC_BY_4) + self.assertIn( + "recipientOrganizationLocation", + sources_metadata, + "NSPLSource license not aggregated.", + ) + self.assertEqual( + sources_metadata["recipientOrganizationLocation"]["license"], + OGL_V3, + "NSPLSource has wrong license.", + ) + + self.assertIn( + "codeListLookup", + sources_metadata, + "CodeListSource license not aggregated.", + ) + self.assertEqual( + sources_metadata["codeListLookup"]["license"], + CC_BY_4, + "CodeListSource has wrong license.", + ) diff --git a/datastore/tests/test_additional_data_nspl.py b/datastore/tests/test_additional_data_nspl.py index 0696ed98..c32a39ef 100644 --- a/datastore/tests/test_additional_data_nspl.py +++ b/datastore/tests/test_additional_data_nspl.py @@ -200,8 +200,7 @@ def test_nspl_update_additional_data_with_not_existing_postcode(self): nspl.update_additional_data(grant.data, grant.source_file.data, additional_data) self.assertNotIn("recipientOrganizationLocation", additional_data) - self.assertEqual(len(additional_data), 1) - self.assertIn("recipientOrganizationLocation_LICENCE", additional_data) + self.assertEqual(len(additional_data), 0) def test_nspl_update_additional_data_without_postcode(self): self.save_nspl_mock_data() @@ -215,8 +214,7 @@ def test_nspl_update_additional_data_without_postcode(self): nspl.update_additional_data(grant.data, grant.source_file.data, additional_data) self.assertNotIn("recipientOrganizationLocation", additional_data) - self.assertEqual(len(additional_data), 1) - self.assertIn("recipientOrganizationLocation_LICENCE", additional_data) + self.assertEqual(len(additional_data), 0) def test_nspl_update_additional_data_breaks_after_updating(self): # Once we have one recipient org location, the loop should break.