diff --git a/datastore/additional_data/generator.py b/datastore/additional_data/generator.py index c32c5c8c..552d25c2 100644 --- a/datastore/additional_data/generator.py +++ b/datastore/additional_data/generator.py @@ -48,9 +48,21 @@ def create(self, grant, source_file, additional_data_sources=DATA_SOURCES): for additional_data_source in additional_data_sources: try: - getattr(self, additional_data_source).update_additional_data( - grant, source_file, additional_data - ) + source_instance = getattr(self, additional_data_source) + if additional_data_source == "grant_metadata": + # Build sources dict excluding grant_metadata itself + sources_dict = { + key: getattr(self, key) + for key in additional_data_sources + if key != "grant_metadata" + } + source_instance.update_additional_data( + grant, source_file, additional_data, sources=sources_dict + ) + else: + source_instance.update_additional_data( + grant, source_file, additional_data + ) except AttributeError: raise Exception( f"Data source {additional_data_source} is not a known additional data source." diff --git a/datastore/additional_data/sources/codelist_code.py b/datastore/additional_data/sources/codelist_code.py index 492bd2cb..de525883 100644 --- a/datastore/additional_data/sources/codelist_code.py +++ b/datastore/additional_data/sources/codelist_code.py @@ -23,6 +23,9 @@ class CodeListSource(object): responsible for field: codeListLookup """ + ADDITIONAL_DATA_KEY = "codeListLookup" + LICENCE = "https://creativecommons.org/licenses/by/4.0/" + def import_codelists(self): with transaction.atomic(): CodelistCode.objects.all().delete() diff --git a/datastore/additional_data/sources/find_that_charity.py b/datastore/additional_data/sources/find_that_charity.py index 3d202f97..40a7efa4 100644 --- a/datastore/additional_data/sources/find_that_charity.py +++ b/datastore/additional_data/sources/find_that_charity.py @@ -74,6 +74,9 @@ class FindThatCharitySource(object): FindThatCharity (FTC) organisation info data sources""" ADDITIONAL_DATA_KEY = "recipientOrgInfos" + LICENCE = ( + "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) def __init__(self, *args, **kwargs): # A basic internal memory cache to avoid hitting the db on duplicate diff --git a/datastore/additional_data/sources/geo_lookup.py b/datastore/additional_data/sources/geo_lookup.py index cd81f13d..ff6a457a 100644 --- a/datastore/additional_data/sources/geo_lookup.py +++ b/datastore/additional_data/sources/geo_lookup.py @@ -13,6 +13,11 @@ class GeoLookupSource(object): """Imports geographical lookups from https://github.com/drkane/geo-lookups/ These allow for looking up from lower-level geography like Ward to a standard local authority, region, etc""" + ADDITIONAL_DATA_KEY = "locationLookup" + LICENCE = ( + "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) + # Source URLS SOURCE_URLS = { "lsoa": { diff --git a/datastore/additional_data/sources/grant_metadata.py b/datastore/additional_data/sources/grant_metadata.py index 54b90d18..671b3d8f 100644 --- a/datastore/additional_data/sources/grant_metadata.py +++ b/datastore/additional_data/sources/grant_metadata.py @@ -2,11 +2,12 @@ class GrantMetadataSource(object): """Adds metadata to a grant: * metadata/source_license * metadata/source_license_name + * metadata/sources_metadata """ ADDITIONAL_DATA_KEY = "metadata" - def update_additional_data(self, grant, source_file, additional_data): + def update_additional_data(self, grant, source_file, additional_data, sources=None): additional_data[self.ADDITIONAL_DATA_KEY] = {} @@ -20,3 +21,19 @@ def update_additional_data(self, grant, source_file, additional_data): additional_data[self.ADDITIONAL_DATA_KEY][ "source_license" ] = source_file.get("license") + + # Aggregate licenses from all additional_data sources + sources_metadata = {} + if sources: + for source_name, source_instance in sources.items(): + if hasattr(source_instance, "LICENCE") and hasattr( + source_instance, "ADDITIONAL_DATA_KEY" + ): + sources_metadata[source_instance.ADDITIONAL_DATA_KEY] = { + "license": source_instance.LICENCE + } + + if sources_metadata: + additional_data[self.ADDITIONAL_DATA_KEY][ + "sources_metadata" + ] = sources_metadata diff --git a/datastore/additional_data/sources/nspl.py b/datastore/additional_data/sources/nspl.py index 64e598c9..45927e54 100644 --- a/datastore/additional_data/sources/nspl.py +++ b/datastore/additional_data/sources/nspl.py @@ -24,6 +24,10 @@ class NSPLSource(object): # Search for the most recent "National Statistics Postcode Lookup 2021 Cencus" # https://geoportal.statistics.gov.uk/search?collection=Dataset&q=National%20Statistics%20Postcode%20Lookup%20-%202021%20Census&sort=-modified&source=office%20for%20national%20statistics&tags=national%20statistics%20postcode%20lookup%2C2021_cencus&type=csv%20collection NSPL_URL = "https://www.arcgis.com/sharing/rest/content/items/204e40244d4d4903ba1861d492f47d29/data" + ADDITIONAL_DATA_KEY = "recipientOrganizationLocation" + LICENCE = ( + "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" + ) def __init__(self): self._nspl_cache = {} diff --git a/datastore/tests/test_additional_data_license.py b/datastore/tests/test_additional_data_license.py index b844af79..a81a9fed 100644 --- a/datastore/tests/test_additional_data_license.py +++ b/datastore/tests/test_additional_data_license.py @@ -1,8 +1,15 @@ from django.test import TestCase from additional_data.sources.grant_metadata import GrantMetadataSource +from additional_data.sources.find_that_charity import FindThatCharitySource +from additional_data.sources.geo_lookup import GeoLookupSource +from additional_data.sources.nspl import NSPLSource +from additional_data.sources.codelist_code import CodeListSource from db.models import Grant +OGL_V3 = "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" +CC_BY_4 = "https://creativecommons.org/licenses/by/4.0/" + class TestAdditionalDataLicense(TestCase): fixtures = ["test_data.json"] @@ -35,3 +42,87 @@ def test_publisher_license(self): expected_additional_data, "The additional license data added is not correct.", ) + + +class TestAdditionalDataSourceLicences(TestCase): + fixtures = ["test_data.json"] + + def test_source_licenses_aggregation(self): + """Test that GrantMetadataSource aggregates licenses from all sources""" + grant = Grant.objects.first() + additional_data = {} + + # Create source instances + sources = { + "find_that_charity_source": FindThatCharitySource(), + "geo_lookup": GeoLookupSource(), + "nspl_source": NSPLSource(), + "code_lists": CodeListSource(), + } + + grant_metadata_source = GrantMetadataSource() + + # Call with sources parameter + grant_metadata_source.update_additional_data( + grant.data, grant.source_file.data, additional_data, sources=sources + ) + + # Verify the structure + self.assertIn( + "metadata", + additional_data, + "metadata key was not added.", + ) + + self.assertIn( + "sources_metadata", + additional_data["metadata"], + "sources_metadata was not aggregated by GrantMetadataSource.", + ) + + sources_metadata = additional_data["metadata"]["sources_metadata"] + + # Verify each source's license is present + self.assertIn( + "recipientOrgInfos", + sources_metadata, + "FindThatCharitySource license not aggregated.", + ) + self.assertEqual( + sources_metadata["recipientOrgInfos"]["license"], + OGL_V3, + "FindThatCharitySource has wrong license.", + ) + + self.assertIn( + "locationLookup", + sources_metadata, + "GeoLookupSource license not aggregated.", + ) + self.assertEqual( + sources_metadata["locationLookup"]["license"], + OGL_V3, + "GeoLookupSource has wrong license.", + ) + + self.assertIn( + "recipientOrganizationLocation", + sources_metadata, + "NSPLSource license not aggregated.", + ) + self.assertEqual( + sources_metadata["recipientOrganizationLocation"]["license"], + OGL_V3, + "NSPLSource has wrong license.", + ) + + self.assertIn( + "codeListLookup", + sources_metadata, + "CodeListSource license not aggregated.", + ) + self.assertEqual( + sources_metadata["codeListLookup"]["license"], + CC_BY_4, + "CodeListSource has wrong license.", + )