Skip to content

Commit 496ffa2

Browse files
authored
Merge pull request #143 from oslokommune/validate-download-links
Validate S3 download URLs
2 parents 0f77586 + c69e93a commit 496ffa2

5 files changed

Lines changed: 23 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
## ?.?.? - Unreleased
22

33
* Removed support for file system credentials caching.
4+
* S3 download URLs are now validated.
45

56
## 4.0.0 - 2026-01-22
67

okdata/sdk/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"keycloakServerUrl": "https://login-test.oslo.kommune.no/auth",
1919
"pipelineUrl": "https://api.data-dev.oslo.systems/pipeline",
2020
"s3BucketUrl": "https://s3.eu-west-1.amazonaws.com/ok-origo-dataplatform-dev",
21+
"s3DownloadBaseUrl": "https://ok-origo-dataplatform-dev.s3.amazonaws.com",
2122
"tokenService": "https://api.data-dev.oslo.systems/token-service/token",
2223
"uploadUrl": "https://api.data-dev.oslo.systems/data-uploader",
2324
"statusApiUrl": "https://api.data-dev.oslo.systems/status-api/status",
@@ -33,6 +34,7 @@
3334
"keycloakServerUrl": "https://login.oslo.kommune.no/auth",
3435
"pipelineUrl": "https://api.data.oslo.systems/pipeline",
3536
"s3BucketUrl": "https://s3.eu-west-1.amazonaws.com/ok-origo-dataplatform-prod",
37+
"s3DownloadBaseUrl": "https://ok-origo-dataplatform-prod.s3.amazonaws.com",
3638
"tokenService": "https://api.data.oslo.systems/token-service/token",
3739
"uploadUrl": "https://api.data.oslo.systems/data-uploader",
3840
"statusApiUrl": "https://api.data.oslo.systems/status-api/status",

okdata/sdk/data/download.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77
log = logging.getLogger()
88

99

10+
class DownloadURLAssertionError(Exception):
11+
def __init__(self, url, expected_prefix):
12+
super().__init__(
13+
f"Aborting download because of an unexpected S3 download URL "
14+
f"(should start with {expected_prefix}): {url}"
15+
)
16+
17+
1018
class Download(SDK):
1119
def __init__(self, config=None, auth=None, env=None):
1220
self.__name__ = "download"
@@ -27,7 +35,13 @@ def download(self, dataset_id, version, edition, output_path, retries=0):
2735
downloaded_files = []
2836
for file in self.get_files(dataset_id, version, edition, retries=retries):
2937
file_name = file["key"].split("/")[-1]
30-
file_content_response = requests.get(file["url"], stream=True)
38+
file_url = file["url"]
39+
base_url = self.config.get("s3DownloadBaseUrl")
40+
41+
if not file_url.startswith(base_url):
42+
raise DownloadURLAssertionError(file_url, base_url)
43+
44+
file_content_response = requests.get(file_url, stream=True)
3145
file_content_response.raise_for_status()
3246

3347
write_file_content(file_name, output_path, file_content_response.raw.read())

okdata/sdk/sdk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
log = logging.getLogger()
1010

1111

12-
class SDK(object):
12+
class SDK:
1313
def __init__(self, config=None, auth=None, env=None):
1414
self.config = config
1515
if self.config is None:

tests/data/download_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import json
12
import os
3+
24
import pytest
3-
import json
45

56
from okdata.sdk.data.download import Download
67

@@ -17,6 +18,7 @@
1718

1819
def test_download(mock_home_dir, mock_http_calls):
1920
data_downloader = Download()
21+
data_downloader.config.config["s3DownloadBaseUrl"] = download_url
2022
output_path = f"{os.environ['HOME']}/my/path"
2123
result = data_downloader.download(
2224
dataset_id, version, edition, output_path=output_path
@@ -29,6 +31,7 @@ def test_download(mock_home_dir, mock_http_calls):
2931

3032
def test_download_public(mock_home_dir, mock_http_calls_public):
3133
data_downloader = Download()
34+
data_downloader.config.config["s3DownloadBaseUrl"] = download_url
3235
data_downloader.auth.token_provider = None
3336
output_path = f"{os.environ['HOME']}/my/path"
3437
result = data_downloader.download(

0 commit comments

Comments
 (0)