Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions datastore/additional_data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,21 @@ def create(self, grant, source_file, additional_data_sources=DATA_SOURCES):

for additional_data_source in additional_data_sources:
try:
getattr(self, additional_data_source).update_additional_data(
grant, source_file, additional_data
)
source_instance = getattr(self, additional_data_source)
if additional_data_source == "grant_metadata":
# Build sources dict excluding grant_metadata itself
sources_dict = {
key: getattr(self, key)
for key in additional_data_sources
if key != "grant_metadata"
}
source_instance.update_additional_data(
grant, source_file, additional_data, sources=sources_dict
)
else:
source_instance.update_additional_data(
grant, source_file, additional_data
)
except AttributeError:
raise Exception(
f"Data source {additional_data_source} is not a known additional data source."
Expand Down
3 changes: 3 additions & 0 deletions datastore/additional_data/sources/codelist_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ class CodeListSource(object):
responsible for field: codeListLookup
"""

ADDITIONAL_DATA_KEY = "codeListLookup"
LICENCE = "https://creativecommons.org/licenses/by/4.0/"

def import_codelists(self):
with transaction.atomic():
CodelistCode.objects.all().delete()
Expand Down
3 changes: 3 additions & 0 deletions datastore/additional_data/sources/find_that_charity.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class FindThatCharitySource(object):
FindThatCharity (FTC) organisation info data sources"""

ADDITIONAL_DATA_KEY = "recipientOrgInfos"
LICENCE = (
"https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
)

def __init__(self, *args, **kwargs):
# A basic internal memory cache to avoid hitting the db on duplicate
Expand Down
5 changes: 5 additions & 0 deletions datastore/additional_data/sources/geo_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ class GeoLookupSource(object):
"""Imports geographical lookups from https://github.com/drkane/geo-lookups/
These allow for looking up from lower-level geography like Ward to a standard local authority, region, etc"""

ADDITIONAL_DATA_KEY = "locationLookup"
LICENCE = (
"https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
)

# Source URLS
SOURCE_URLS = {
"lsoa": {
Expand Down
19 changes: 18 additions & 1 deletion datastore/additional_data/sources/grant_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ class GrantMetadataSource(object):
"""Adds metadata to a grant:
* metadata/source_license
* metadata/source_license_name
* metadata/sources_metadata
"""

ADDITIONAL_DATA_KEY = "metadata"

def update_additional_data(self, grant, source_file, additional_data):
def update_additional_data(self, grant, source_file, additional_data, sources=None):

additional_data[self.ADDITIONAL_DATA_KEY] = {}

Expand All @@ -20,3 +21,19 @@ def update_additional_data(self, grant, source_file, additional_data):
additional_data[self.ADDITIONAL_DATA_KEY][
"source_license"
] = source_file.get("license")

# Aggregate licenses from all additional_data sources
sources_metadata = {}
if sources:
for source_name, source_instance in sources.items():
if hasattr(source_instance, "LICENCE") and hasattr(
source_instance, "ADDITIONAL_DATA_KEY"
):
sources_metadata[source_instance.ADDITIONAL_DATA_KEY] = {
"license": source_instance.LICENCE
}

if sources_metadata:
additional_data[self.ADDITIONAL_DATA_KEY][
"sources_metadata"
] = sources_metadata
4 changes: 4 additions & 0 deletions datastore/additional_data/sources/nspl.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class NSPLSource(object):
# Search for the most recent "National Statistics Postcode Lookup 2021 Cencus"
# https://geoportal.statistics.gov.uk/search?collection=Dataset&q=National%20Statistics%20Postcode%20Lookup%20-%202021%20Census&sort=-modified&source=office%20for%20national%20statistics&tags=national%20statistics%20postcode%20lookup%2C2021_cencus&type=csv%20collection
NSPL_URL = "https://www.arcgis.com/sharing/rest/content/items/204e40244d4d4903ba1861d492f47d29/data"
ADDITIONAL_DATA_KEY = "recipientOrganizationLocation"
LICENCE = (
"https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
)

def __init__(self):
self._nspl_cache = {}
Expand Down
91 changes: 91 additions & 0 deletions datastore/tests/test_additional_data_license.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
from django.test import TestCase

from additional_data.sources.grant_metadata import GrantMetadataSource
from additional_data.sources.find_that_charity import FindThatCharitySource
from additional_data.sources.geo_lookup import GeoLookupSource
from additional_data.sources.nspl import NSPLSource
from additional_data.sources.codelist_code import CodeListSource
from db.models import Grant

OGL_V3 = "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
CC_BY_4 = "https://creativecommons.org/licenses/by/4.0/"


class TestAdditionalDataLicense(TestCase):
fixtures = ["test_data.json"]
Expand Down Expand Up @@ -35,3 +42,87 @@ def test_publisher_license(self):
expected_additional_data,
"The additional license data added is not correct.",
)


class TestAdditionalDataSourceLicences(TestCase):
fixtures = ["test_data.json"]

def test_source_licenses_aggregation(self):
"""Test that GrantMetadataSource aggregates licenses from all sources"""
grant = Grant.objects.first()
additional_data = {}

# Create source instances
sources = {
"find_that_charity_source": FindThatCharitySource(),
"geo_lookup": GeoLookupSource(),
"nspl_source": NSPLSource(),
"code_lists": CodeListSource(),
}

grant_metadata_source = GrantMetadataSource()

# Call with sources parameter
grant_metadata_source.update_additional_data(
grant.data, grant.source_file.data, additional_data, sources=sources
)

# Verify the structure
self.assertIn(
"metadata",
additional_data,
"metadata key was not added.",
)

self.assertIn(
"sources_metadata",
additional_data["metadata"],
"sources_metadata was not aggregated by GrantMetadataSource.",
)

sources_metadata = additional_data["metadata"]["sources_metadata"]

# Verify each source's license is present
self.assertIn(
"recipientOrgInfos",
sources_metadata,
"FindThatCharitySource license not aggregated.",
)
self.assertEqual(
sources_metadata["recipientOrgInfos"]["license"],
OGL_V3,
"FindThatCharitySource has wrong license.",
)

self.assertIn(
"locationLookup",
sources_metadata,
"GeoLookupSource license not aggregated.",
)
self.assertEqual(
sources_metadata["locationLookup"]["license"],
OGL_V3,
"GeoLookupSource has wrong license.",
)

self.assertIn(
"recipientOrganizationLocation",
sources_metadata,
"NSPLSource license not aggregated.",
)
self.assertEqual(
sources_metadata["recipientOrganizationLocation"]["license"],
OGL_V3,
"NSPLSource has wrong license.",
)

self.assertIn(
"codeListLookup",
sources_metadata,
"CodeListSource license not aggregated.",
)
self.assertEqual(
sources_metadata["codeListLookup"]["license"],
CC_BY_4,
"CodeListSource has wrong license.",
)
Loading