diff --git a/pyproject.toml b/pyproject.toml index 4419c47f5..de72b5f64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ googledrive = "waterbutler.providers.googledrive:GoogleDriveProvider" onedrive = "waterbutler.providers.onedrive:OneDriveProvider" googlecloud = "waterbutler.providers.googlecloud:GoogleCloudProvider" azureblobstorage = "waterbutler.providers.azureblobstorage:AzureBlobStorageProvider" +oraclecloud = "waterbutler.providers.oraclecloud:OracleCloudProvider" [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" diff --git a/setup.py b/setup.py index 3067af411..d362486be 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ 'onedrive = waterbutler.providers.onedrive:OneDriveProvider', 'googlecloud = waterbutler.providers.googlecloud:GoogleCloudProvider', 'azureblobstorage = waterbutler.providers.azureblobstorage:AzureBlobStorageProvider', + 'oraclecloud = waterbutler.providers.oraclecloud:OracleCloudProvider', ], }, ) diff --git a/tests/providers/oraclecloud/__init__.py b/tests/providers/oraclecloud/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/providers/oraclecloud/fixtures/__init__.py b/tests/providers/oraclecloud/fixtures/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/providers/oraclecloud/fixtures/providers.py b/tests/providers/oraclecloud/fixtures/providers.py new file mode 100644 index 000000000..ffb691535 --- /dev/null +++ b/tests/providers/oraclecloud/fixtures/providers.py @@ -0,0 +1,27 @@ +import pytest + + +@pytest.fixture() +def mock_auth(): + return {"name": "Test User", "email": "test@osf.io"} + + +@pytest.fixture() +def mock_creds(): + return { + "json_creds": { + "s3compat": { + "access_key": "fake-access-key-id", + "secret_key": "fake-secret-access-key", + "region": "us-ashburn-1", + }, + } + } + + +@pytest.fixture() +def mock_settings(): + return { + "bucket": "test-bucket", + "namespace": "test-namespace", + } diff --git a/tests/providers/oraclecloud/test_metadata.py b/tests/providers/oraclecloud/test_metadata.py new file mode 100644 index 000000000..02c5dfea9 --- /dev/null +++ b/tests/providers/oraclecloud/test_metadata.py @@ -0,0 +1,136 @@ +import pytest + +from waterbutler.providers.oraclecloud.metadata import ( + BaseOracleCloudMetadata, + OracleCloudFileMetadata, + OracleCloudFolderMetadata, +) + + +class TestOracleCloudFileMetadata: + + def test_file_metadata_from_dict(self): + + raw = { + "object_name": "path/to/file.txt", + "content_type": "text/plain", + "last_modified": "Thu, 01 Mar 2025 19:04:45 GMT", + "size": 1024, + "etag": "abc123def456", + "extra": { + "md5": "abc123def456", + }, + "time_created": "2025-03-01T19:00:00+00:00", + } + metadata = OracleCloudFileMetadata(raw) + + assert isinstance(metadata, BaseOracleCloudMetadata) + assert metadata.provider == "oraclecloud" + assert metadata.kind == "file" + assert metadata.name == "file.txt" + assert metadata.path == "/path/to/file.txt" + assert metadata.content_type == "text/plain" + assert metadata.modified == "Thu, 01 Mar 2025 19:04:45 GMT" + assert metadata.size == 1024 + assert metadata.size_as_int == 1024 + assert metadata.etag == "abc123def456" + assert metadata.extra["md5"] == "abc123def456" + assert metadata.created_utc == "2025-03-01T19:00:00+00:00" + + def test_file_metadata_missing_optional_fields(self): + + raw = { + "object_name": "simple.txt", + "content_type": None, + "last_modified": None, + "size": None, + "etag": None, + "extra": {}, + } + metadata = OracleCloudFileMetadata(raw) + + assert metadata.name == "simple.txt" + assert metadata.content_type is None + assert metadata.modified is None + assert metadata.size is None + assert metadata.etag is None + assert metadata.extra == {} + assert metadata.created_utc is None + + def test_file_metadata_from_head_response(self): + + headers = { + "Content-Type": "application/pdf", + "Content-Length": "2048", + "Last-Modified": "Fri, 14 Mar 2025 12:00:00 GMT", + "ETag": '"deadbeef"', + } + + metadata = OracleCloudFileMetadata.new_from_head_response( + "folder/report.pdf", headers + ) + + assert metadata.provider == "oraclecloud" + assert metadata.name == "report.pdf" + assert metadata.path == "/folder/report.pdf" + assert metadata.content_type == "application/pdf" + assert metadata.size == 2048 + assert metadata.etag == "deadbeef" + assert metadata.extra["md5"] == "deadbeef" + + def test_file_metadata_from_s3_list_entry(self): + + entry = { + "Key": "data/results.csv", + "LastModified": "2025-03-10T08:00:00.000Z", + "ETag": '"etag123"', + "Size": "4096", + "StorageClass": "STANDARD", + } + + metadata = OracleCloudFileMetadata.new_from_s3_list_entry(entry) + + assert metadata.name == "results.csv" + assert metadata.path == "/data/results.csv" + assert metadata.size == 4096 + assert metadata.etag == "etag123" + assert metadata.extra["md5"] == "etag123" + assert metadata.modified == "2025-03-10T08:00:00.000Z" + assert metadata.created_utc is None + + def test_file_metadata_from_s3_list_entry_no_quotes(self): + + entry = { + "Key": "file.bin", + "LastModified": "2025-01-01T00:00:00.000Z", + "ETag": "notag", + "Size": "0", + } + + metadata = OracleCloudFileMetadata.new_from_s3_list_entry(entry) + + assert metadata.name == "file.bin" + assert metadata.etag == "notag" + assert metadata.size == 0 + + +class TestOracleCloudFolderMetadata: + + def test_folder_metadata(self): + + raw = {"object_name": "path/to/folder/"} + metadata = OracleCloudFolderMetadata(raw) + + assert isinstance(metadata, BaseOracleCloudMetadata) + assert metadata.provider == "oraclecloud" + assert metadata.kind == "folder" + assert metadata.name == "folder" + assert metadata.path == "/path/to/folder/" + + def test_folder_metadata_nested(self): + + raw = {"object_name": "a/b/c/"} + metadata = OracleCloudFolderMetadata(raw) + + assert metadata.name == "c" + assert metadata.path == "/a/b/c/" diff --git a/tests/providers/oraclecloud/test_provider.py b/tests/providers/oraclecloud/test_provider.py new file mode 100644 index 000000000..3114bf662 --- /dev/null +++ b/tests/providers/oraclecloud/test_provider.py @@ -0,0 +1,522 @@ +import hashlib +from http import HTTPStatus +from unittest import mock + +import pytest + +from tests.providers.oraclecloud.fixtures.providers import ( + mock_auth, + mock_creds, + mock_settings, +) +from waterbutler.core import exceptions +from waterbutler.core.path import WaterButlerPath +from waterbutler.core.streams import ResponseStreamReader, StringStream +from waterbutler.providers.oraclecloud import OracleCloudProvider +from waterbutler.providers.oraclecloud.metadata import OracleCloudFileMetadata + + +@pytest.fixture() +def mock_provider(mock_auth, mock_creds, mock_settings): + return OracleCloudProvider(mock_auth, mock_creds, mock_settings) + + +@pytest.fixture() +def file_wb_path(): + return WaterButlerPath("/folder-1/text-file-1.txt") + + +@pytest.fixture() +def folder_wb_path(): + return WaterButlerPath("/folder-1/") + + +@pytest.fixture() +def file_content(): + return b"file content for testing upload and download" + + +def _mock_response(status=200, headers=None, body=b"", text=""): + """Build a mock aiohttp-style response for ``make_request``.""" + resp = mock.AsyncMock() + resp.status = status + resp.headers = headers or {} + resp.read = mock.AsyncMock(return_value=body) + resp.text = mock.AsyncMock( + return_value=text or body.decode("utf-8", errors="replace") + ) + resp.release = mock.AsyncMock() + resp.close = mock.Mock() + # ResponseStreamReader reads from response.content + content = mock.AsyncMock() + content.read = mock.AsyncMock(return_value=body) + resp.content = content + return resp + + +def _request_sequence(items): + """``make_request`` side effect that drains any streamed body, then yields each + queued response in order (an item may be an exception to raise instead). + """ + queue = list(items) + + async def _side_effect(method, url, *args, **kwargs): + data = kwargs.get("data") + if data is not None and hasattr(data, "read"): + # Drain in positive-size chunks the way aiohttp does, so tee'd hash + # writers see the body exactly once. + while await data.read(8192): + pass + item = queue.pop(0) + if isinstance(item, Exception): + raise item + return item + + return _side_effect + + +class TestProviderInit: + + def test_provider_init(self, mock_provider, mock_settings): + + assert mock_provider is not None + assert mock_provider.NAME == "oraclecloud" + assert mock_provider.bucket == mock_settings["bucket"] + assert "compat.objectstorage" in mock_provider.BASE_URL + assert "us-ashburn-1" in mock_provider.BASE_URL + assert "test-namespace" in mock_provider.BASE_URL + + def test_provider_init_missing_bucket(self, mock_auth, mock_creds): + + with pytest.raises(exceptions.InvalidProviderConfigError): + OracleCloudProvider(mock_auth, mock_creds, {"namespace": "ns"}) + + def test_provider_init_missing_namespace(self, mock_auth, mock_creds): + + with pytest.raises(exceptions.InvalidProviderConfigError): + OracleCloudProvider(mock_auth, mock_creds, {"bucket": "bkt"}) + + def test_provider_init_missing_access_key(self, mock_auth, mock_settings): + + creds = {"json_creds": {"s3compat": {"secret_key": "sk", "region": "us-ashburn-1"}}} + with pytest.raises(exceptions.InvalidProviderConfigError): + OracleCloudProvider(mock_auth, creds, mock_settings) + + def test_provider_init_missing_secret_key(self, mock_auth, mock_settings): + + creds = {"json_creds": {"s3compat": {"access_key": "ak", "region": "us-ashburn-1"}}} + with pytest.raises(exceptions.InvalidProviderConfigError): + OracleCloudProvider(mock_auth, creds, mock_settings) + + def test_provider_init_missing_region(self, mock_auth, mock_settings): + + creds = {"json_creds": {"s3compat": {"access_key": "ak", "secret_key": "sk"}}} + with pytest.raises(exceptions.InvalidProviderConfigError): + OracleCloudProvider(mock_auth, creds, mock_settings) + + +class TestValidatePath: + + @pytest.mark.asyncio + async def test_validate_v1_path_file(self, mock_provider, file_wb_path): + wb_path = await mock_provider.validate_v1_path("/folder-1/text-file-1.txt") + assert wb_path == file_wb_path + + @pytest.mark.asyncio + async def test_validate_v1_path_folder(self, mock_provider, folder_wb_path): + wb_path = await mock_provider.validate_v1_path("/folder-1/") + assert wb_path == folder_wb_path + + @pytest.mark.asyncio + async def test_validate_path_file(self, mock_provider, file_wb_path): + wb_path = await mock_provider.validate_path("/folder-1/text-file-1.txt") + assert wb_path == file_wb_path + + +class TestOperations: + + def test_can_duplicate_names(self, mock_provider): + assert mock_provider.can_duplicate_names() + + def test_can_intra_copy_file(self, mock_provider, file_wb_path): + assert mock_provider.can_intra_copy(mock_provider, file_wb_path) + + def test_can_intra_copy_folder(self, mock_provider, folder_wb_path): + assert not mock_provider.can_intra_copy(mock_provider, folder_wb_path) + + def test_can_intra_move_file(self, mock_provider, file_wb_path): + assert mock_provider.can_intra_move(mock_provider, file_wb_path) + + def test_can_intra_move_folder(self, mock_provider, folder_wb_path): + assert not mock_provider.can_intra_move(mock_provider, folder_wb_path) + + def test_can_intra_copy_other_provider(self, mock_provider, file_wb_path): + assert not mock_provider.can_intra_copy(mock.Mock(), file_wb_path) + + def test_can_intra_copy_different_endpoint( + self, mock_provider, mock_auth, mock_creds, mock_settings, file_wb_path + ): + other = OracleCloudProvider( + mock_auth, mock_creds, {**mock_settings, "namespace": "other-ns"} + ) + assert not mock_provider.can_intra_copy(other, file_wb_path) + + +class TestMetadata: + + @pytest.mark.asyncio + async def test_metadata_file(self, mock_provider, file_wb_path): + + head_resp = _mock_response( + status=200, + headers={ + "Content-Type": "text/plain", + "Content-Length": "1024", + "ETag": '"abc123"', + "Last-Modified": "Thu, 01 Mar 2025 19:04:45 GMT", + }, + ) + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + return_value=head_resp, + ): + metadata = await mock_provider.metadata(file_wb_path) + + assert isinstance(metadata, OracleCloudFileMetadata) + assert metadata.name == "text-file-1.txt" + assert metadata.size == 1024 + assert metadata.etag == "abc123" + + @pytest.mark.asyncio + async def test_metadata_file_not_found(self, mock_provider, file_wb_path): + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + side_effect=exceptions.MetadataError("not found", code=HTTPStatus.NOT_FOUND), + ): + with pytest.raises(exceptions.MetadataError): + await mock_provider.metadata(file_wb_path) + + @pytest.mark.asyncio + async def test_metadata_folder_raises(self, mock_provider, folder_wb_path): + + with pytest.raises(exceptions.MetadataError): + await mock_provider.metadata(folder_wb_path) + + +class TestCRUD: + + @pytest.mark.asyncio + async def test_download_file(self, mock_provider, file_wb_path, file_content): + + get_resp = _mock_response( + status=200, + body=file_content, + headers={"Content-Length": str(len(file_content))}, + ) + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + return_value=get_resp, + ): + stream = await mock_provider.download(file_wb_path) + + assert isinstance(stream, ResponseStreamReader) + + @pytest.mark.asyncio + async def test_download_folder_raises(self, mock_provider, folder_wb_path): + + with pytest.raises(exceptions.DownloadError): + await mock_provider.download(folder_wb_path) + + @pytest.mark.asyncio + async def test_download_not_found(self, mock_provider, file_wb_path): + + get_resp = _mock_response(status=404) + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + return_value=get_resp, + ): + with pytest.raises(exceptions.DownloadError) as exc: + await mock_provider.download(file_wb_path) + assert exc.value.code == HTTPStatus.NOT_FOUND + + @pytest.mark.asyncio + async def test_download_with_range(self, mock_provider, file_wb_path): + + get_resp = _mock_response( + status=206, + body=b"partial", + headers={"Content-Length": "7"}, + ) + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + return_value=get_resp, + ) as mocked: + stream = await mock_provider.download(file_wb_path, range=(0, 6)) + + assert isinstance(stream, ResponseStreamReader) + # Verify Range header was included in the signed headers + call_kwargs = mocked.call_args + assert call_kwargs.kwargs["headers"]["Range"] == "bytes=0-6" + + @pytest.mark.asyncio + async def test_download_open_ended_range(self, mock_provider, file_wb_path): + + get_resp = _mock_response( + status=206, body=b"tail", headers={"Content-Length": "4"} + ) + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + return_value=get_resp, + ) as mocked: + await mock_provider.download(file_wb_path, range=(5, None)) + + assert mocked.call_args.kwargs["headers"]["Range"] == "bytes=5-" + + @pytest.mark.asyncio + async def test_upload_file(self, mock_provider, file_wb_path, file_content): + + expected_md5 = hashlib.md5(file_content).hexdigest() + put_resp = _mock_response(status=200, headers={"ETag": f'"{expected_md5}"'}) + head_200 = _mock_response( + status=200, + headers={ + "Content-Type": "application/octet-stream", + "Content-Length": str(len(file_content)), + "ETag": f'"{expected_md5}"', + "Last-Modified": "Thu, 01 Mar 2025 19:04:45 GMT", + }, + ) + # exists() HEAD raises 404, PUT succeeds, post-upload HEAD returns metadata + sequence = _request_sequence([ + exceptions.MetadataError("not found", code=HTTPStatus.NOT_FOUND), + put_resp, + head_200, + ]) + + with mock.patch.object(mock_provider, "make_request", side_effect=sequence): + metadata, created = await mock_provider.upload( + StringStream(file_content), file_wb_path + ) + + assert created is True + assert isinstance(metadata, OracleCloudFileMetadata) + assert metadata.name == "text-file-1.txt" + assert metadata.etag == expected_md5 + + @pytest.mark.asyncio + async def test_upload_existing_file(self, mock_provider, file_wb_path, file_content): + + expected_md5 = hashlib.md5(file_content).hexdigest() + head_exists = _mock_response( + status=200, + headers={ + "Content-Type": "text/plain", + "Content-Length": "100", + "ETag": '"oldmd5"', + "Last-Modified": "Thu, 01 Mar 2025 00:00:00 GMT", + }, + ) + put_resp = _mock_response(status=200, headers={"ETag": f'"{expected_md5}"'}) + head_200 = _mock_response( + status=200, + headers={ + "Content-Type": "application/octet-stream", + "Content-Length": str(len(file_content)), + "ETag": f'"{expected_md5}"', + "Last-Modified": "Thu, 01 Mar 2025 19:04:45 GMT", + }, + ) + # exists() HEAD finds the object, so created is False + sequence = _request_sequence([head_exists, put_resp, head_200]) + + with mock.patch.object(mock_provider, "make_request", side_effect=sequence): + metadata, created = await mock_provider.upload( + StringStream(file_content), file_wb_path + ) + + assert created is False + assert isinstance(metadata, OracleCloudFileMetadata) + + @pytest.mark.asyncio + async def test_upload_no_etag_skips_verification( + self, mock_provider, file_wb_path, file_content + ): + # No ETag on the PUT response -> integrity check is skipped, upload still succeeds + put_resp = _mock_response(status=200, headers={}) + head_200 = _mock_response( + status=200, + headers={ + "Content-Type": "application/octet-stream", + "Content-Length": str(len(file_content)), + "ETag": '"abc123"', + }, + ) + sequence = _request_sequence([ + exceptions.MetadataError("not found", code=HTTPStatus.NOT_FOUND), + put_resp, + head_200, + ]) + + with mock.patch.object(mock_provider, "make_request", side_effect=sequence): + metadata, created = await mock_provider.upload( + StringStream(file_content), file_wb_path + ) + + assert created is True + assert isinstance(metadata, OracleCloudFileMetadata) + + @pytest.mark.asyncio + async def test_upload_checksum_mismatch( + self, mock_provider, file_wb_path, file_content + ): + + # PUT returns 200 with an ETag that won't match the uploaded body + put_resp = _mock_response(status=200, headers={"ETag": '"deadbeef"'}) + sequence = _request_sequence([ + exceptions.MetadataError("not found", code=HTTPStatus.NOT_FOUND), + put_resp, + ]) + + with mock.patch.object(mock_provider, "make_request", side_effect=sequence): + with pytest.raises(exceptions.UploadChecksumMismatchError): + await mock_provider.upload(StringStream(file_content), file_wb_path) + + @pytest.mark.asyncio + async def test_download_accept_url(self, mock_provider, file_wb_path): + + signed_url = await mock_provider.download(file_wb_path, accept_url=True) + + assert isinstance(signed_url, str) + assert "X-Amz-Algorithm=AWS4-HMAC-SHA256" in signed_url + assert "X-Amz-Credential=fake-access-key-id" in signed_url + assert "X-Amz-Signature=" in signed_url + assert "response-content-disposition" in signed_url + + @pytest.mark.asyncio + async def test_delete_file(self, mock_provider, file_wb_path): + + del_resp = _mock_response(status=204) + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + return_value=del_resp, + ): + await mock_provider.delete(file_wb_path) + + @pytest.mark.asyncio + async def test_delete_folder_raises(self, mock_provider, folder_wb_path): + + with pytest.raises(exceptions.DeleteError): + await mock_provider.delete(folder_wb_path) + + @pytest.mark.asyncio + async def test_delete_not_found(self, mock_provider, file_wb_path): + + with mock.patch.object( + mock_provider, + "make_request", + new_callable=mock.AsyncMock, + side_effect=exceptions.DeleteError("not found", code=HTTPStatus.NOT_FOUND), + ): + with pytest.raises(exceptions.DeleteError): + await mock_provider.delete(file_wb_path) + + +class TestIntraCopy: + + @pytest.mark.asyncio + async def test_intra_copy_file(self, mock_provider, file_wb_path): + + dest_path = WaterButlerPath("/folder-2/copy.txt") + copy_resp = _mock_response(status=200, body=b"") + head_200 = _mock_response( + status=200, + headers={ + "Content-Type": "application/octet-stream", + "Content-Length": "44", + "ETag": '"abc123"', + }, + ) + # exists(dest) HEAD raises 404, copy PUT succeeds, then HEAD for metadata + sequence = _request_sequence([ + exceptions.MetadataError("not found", code=HTTPStatus.NOT_FOUND), + copy_resp, + head_200, + ]) + + with mock.patch.object(mock_provider, "make_request", side_effect=sequence): + metadata, created = await mock_provider.intra_copy( + mock_provider, file_wb_path, dest_path + ) + + assert created is True + assert isinstance(metadata, OracleCloudFileMetadata) + assert metadata.path == "/folder-2/copy.txt" + + @pytest.mark.asyncio + async def test_intra_copy_folder_raises(self, mock_provider, folder_wb_path): + + with pytest.raises(exceptions.CopyError): + await mock_provider.intra_copy( + mock_provider, folder_wb_path, folder_wb_path + ) + + @pytest.mark.asyncio + async def test_intra_copy_file_folder_mismatch_raises( + self, mock_provider, file_wb_path, folder_wb_path + ): + with pytest.raises(exceptions.CopyError): + await mock_provider.intra_copy( + mock_provider, file_wb_path, folder_wb_path + ) + + +class TestURLBuilding: + + def test_object_url(self, mock_provider): + url = mock_provider._object_url("folder/file.txt") + assert url == ( + "https://test-namespace.compat.objectstorage" + ".us-ashburn-1.oraclecloud.com/test-bucket/folder/file.txt" + ) + + def test_object_url_encodes_special_chars(self, mock_provider): + url = mock_provider._object_url("folder/file name.txt") + assert "file%20name.txt" in url + + def test_bucket_url_no_query(self, mock_provider): + url = mock_provider._bucket_url() + assert url.endswith("/test-bucket") + + def test_bucket_url_with_query(self, mock_provider): + url = mock_provider._bucket_url(**{"list-type": "2", "delimiter": "/"}) + assert "list-type=2" in url + assert "delimiter=%2F" in url + + def test_get_obj_name(self, mock_provider): + path = WaterButlerPath("/folder/file.txt") + assert mock_provider._get_obj_name(path) == "folder/file.txt" + + def test_get_obj_name_root(self, mock_provider): + path = WaterButlerPath("/") + assert mock_provider._get_obj_name(path) == "" diff --git a/tests/providers/oraclecloud/test_signing.py b/tests/providers/oraclecloud/test_signing.py new file mode 100644 index 000000000..bcc4b123e --- /dev/null +++ b/tests/providers/oraclecloud/test_signing.py @@ -0,0 +1,192 @@ +import datetime as dt_mod +from unittest import mock + +import pytest + +from waterbutler.providers.oraclecloud.signing import ( + ALGORITHM, + EMPTY_SHA256, + UNSIGNED_PAYLOAD, + SigV4Signer, + _derive_signing_key, +) + + +@pytest.fixture() +def signer(): + return SigV4Signer( + access_key="AKEXAMPLE", + secret_key="SECRET", + region="us-ashburn-1", + ) + + +@pytest.fixture() +def frozen_now(): + return dt_mod.datetime(2025, 3, 1, 12, 0, 0, tzinfo=dt_mod.timezone.utc) + + +@pytest.fixture() +def frozen_signer(signer, frozen_now): + """Signer with frozen time for deterministic output.""" + with mock.patch("waterbutler.providers.oraclecloud.signing.datetime") as mock_dt: + mock_dt.datetime.now.return_value = frozen_now + mock_dt.timezone = dt_mod.timezone + yield signer + + +class TestDeriveSigningKey: + + def test_returns_32_bytes(self): + key = _derive_signing_key("secret", "20250301", "us-ashburn-1", "s3") + assert isinstance(key, bytes) + assert len(key) == 32 + + def test_deterministic(self): + k1 = _derive_signing_key("secret", "20250301", "us-ashburn-1", "s3") + k2 = _derive_signing_key("secret", "20250301", "us-ashburn-1", "s3") + assert k1 == k2 + + def test_varies_with_date(self): + k1 = _derive_signing_key("secret", "20250301", "us-ashburn-1", "s3") + k2 = _derive_signing_key("secret", "20250302", "us-ashburn-1", "s3") + assert k1 != k2 + + def test_varies_with_region(self): + k1 = _derive_signing_key("secret", "20250301", "us-ashburn-1", "s3") + k2 = _derive_signing_key("secret", "20250301", "eu-frankfurt-1", "s3") + assert k1 != k2 + + def test_varies_with_secret(self): + k1 = _derive_signing_key("secret-a", "20250301", "us-ashburn-1", "s3") + k2 = _derive_signing_key("secret-b", "20250301", "us-ashburn-1", "s3") + assert k1 != k2 + + +class TestSigV4Signer: + + def test_authorization_header_format(self, frozen_signer): + headers = frozen_signer.sign_request( + "GET", + "https://ns.compat.objectstorage.us-ashburn-1.oraclecloud.com/bkt/key", + ) + auth = headers["Authorization"] + assert auth.startswith( + f"{ALGORITHM} Credential=AKEXAMPLE/20250301/us-ashburn-1/s3/aws4_request" + ) + assert "SignedHeaders=" in auth + assert "Signature=" in auth + + def test_amz_date_header(self, frozen_signer): + headers = frozen_signer.sign_request("GET", "https://host/bkt/key") + assert headers["x-amz-date"] == "20250301T120000Z" + + def test_content_sha256_defaults_to_empty(self, frozen_signer): + headers = frozen_signer.sign_request("GET", "https://host/bkt/key") + assert headers["x-amz-content-sha256"] == EMPTY_SHA256 + + def test_content_sha256_with_unsigned_payload(self, frozen_signer): + headers = frozen_signer.sign_request( + "PUT", "https://host/bkt/key", payload_hash=UNSIGNED_PAYLOAD + ) + assert headers["x-amz-content-sha256"] == UNSIGNED_PAYLOAD + + def test_preserves_extra_headers(self, frozen_signer): + headers = frozen_signer.sign_request( + "PUT", + "https://host/bkt/key", + headers={"Content-Type": "text/plain", "Content-Length": "42"}, + ) + assert headers["Content-Type"] == "text/plain" + assert headers["Content-Length"] == "42" + + def test_host_not_in_output(self, frozen_signer): + """``host`` is used for signing but not returned (aiohttp sets it).""" + headers = frozen_signer.sign_request( + "GET", "https://host.example.com/bkt/key" + ) + assert "host" not in headers + assert "Host" not in headers + + def test_signed_headers_sorted(self, frozen_signer): + headers = frozen_signer.sign_request( + "PUT", + "https://host/bkt/key", + headers={"Zebra": "z", "Alpha": "a"}, + ) + auth = headers["Authorization"] + sh_part = [p for p in auth.split(", ") if p.startswith("SignedHeaders=")][0] + signed = sh_part.split("=", 1)[1].split(";") + assert signed == sorted(signed) + + def test_host_included_in_signed_headers(self, frozen_signer): + headers = frozen_signer.sign_request("GET", "https://host/bkt/key") + auth = headers["Authorization"] + sh_part = [p for p in auth.split(", ") if p.startswith("SignedHeaders=")][0] + signed_names = sh_part.split("=", 1)[1].split(";") + assert "host" in signed_names + + def test_different_methods_different_signatures(self, frozen_signer): + url = "https://host/bkt/key" + h_get = frozen_signer.sign_request("GET", url) + h_put = frozen_signer.sign_request("PUT", url) + assert h_get["Authorization"] != h_put["Authorization"] + + def test_different_urls_different_signatures(self, frozen_signer): + h1 = frozen_signer.sign_request("GET", "https://host/bkt/key1") + h2 = frozen_signer.sign_request("GET", "https://host/bkt/key2") + assert h1["Authorization"] != h2["Authorization"] + + def test_query_string_parameters_signed(self, frozen_signer): + headers = frozen_signer.sign_request( + "GET", + "https://host/bkt?list-type=2&prefix=f%2F&delimiter=%2F", + ) + assert "Authorization" in headers + + def test_deterministic_with_same_inputs(self, frozen_signer): + url = "https://host/bkt/key" + h1 = frozen_signer.sign_request("GET", url) + h2 = frozen_signer.sign_request("GET", url) + assert h1["Authorization"] == h2["Authorization"] + + def test_credential_scope_contains_region_and_service(self, frozen_signer): + headers = frozen_signer.sign_request("GET", "https://host/bkt/key") + auth = headers["Authorization"] + assert "us-ashburn-1/s3/aws4_request" in auth + + def test_different_payload_hashes_different_signatures(self, frozen_signer): + url = "https://host/bkt/key" + h1 = frozen_signer.sign_request("PUT", url, payload_hash=EMPTY_SHA256) + h2 = frozen_signer.sign_request("PUT", url, payload_hash="abc123") + assert h1["Authorization"] != h2["Authorization"] + + +class TestSignRequestQuery: + + def test_returns_presigned_url(self, frozen_signer): + url = frozen_signer.sign_request_query( + "GET", + "https://host/bkt/key?response-content-disposition=attachment", + ) + assert isinstance(url, str) + assert "X-Amz-Algorithm=AWS4-HMAC-SHA256" in url + assert "X-Amz-Credential=AKEXAMPLE" in url + assert "X-Amz-Date=20250301T120000Z" in url + assert "X-Amz-Expires=3600" in url + assert "X-Amz-SignedHeaders=host" in url + assert "X-Amz-Signature=" in url + + def test_deterministic(self, frozen_signer): + url = "https://host/bkt/key?response-content-disposition=attachment" + assert ( + frozen_signer.sign_request_query("GET", url) + == frozen_signer.sign_request_query("GET", url) + ) + + def test_preserves_existing_query(self, frozen_signer): + url = frozen_signer.sign_request_query( + "GET", + "https://host/bkt/key?response-content-disposition=attachment", + ) + assert "response-content-disposition=attachment" in url diff --git a/waterbutler/providers/oraclecloud/__init__.py b/waterbutler/providers/oraclecloud/__init__.py new file mode 100644 index 000000000..083bc9bb4 --- /dev/null +++ b/waterbutler/providers/oraclecloud/__init__.py @@ -0,0 +1 @@ +from .provider import OracleCloudProvider # noqa diff --git a/waterbutler/providers/oraclecloud/metadata.py b/waterbutler/providers/oraclecloud/metadata.py new file mode 100644 index 000000000..170d96795 --- /dev/null +++ b/waterbutler/providers/oraclecloud/metadata.py @@ -0,0 +1,114 @@ +import logging +import os + +from waterbutler.core import metadata + +logger = logging.getLogger(__name__) + + +class BaseOracleCloudMetadata(metadata.BaseMetadata): + """Base metadata for the OracleCloud provider (S3-compatible API). + + This is an abstract class; it does not implement all abstract methods and + properties in :class:`.BaseMetadata`. + """ + + @property + def provider(self) -> str: + return "oraclecloud" + + @property + def path(self) -> str: + return self.build_path(self.raw.get("object_name", "")) + + +class OracleCloudFileMetadata(BaseOracleCloudMetadata, metadata.BaseFileMetadata): + """File metadata for the OracleCloud provider (S3-compatible API).""" + + @property + def name(self) -> str: + return os.path.split(self.path)[1] + + @property + def content_type(self) -> str | None: + return self.raw.get("content_type", None) + + @property + def modified(self) -> str | None: + return self.raw.get("last_modified", None) + + @property + def created_utc(self) -> str | None: + return self.raw.get("time_created", None) + + @property + def size(self) -> int | None: + size = self.raw.get("size", None) + return int(size) if size is not None else None + + @property + def etag(self) -> str | None: + return self.raw.get("etag", None) + + @property + def extra(self) -> dict: + return self.raw.get("extra", {}) + + @classmethod + def new_from_s3_list_entry(cls, entry: dict) -> "OracleCloudFileMetadata": + """Construct from a parsed S3 ``ListBucketResult/Contents`` element. + + :param entry: a dict produced by ``xmltodict`` from the ```` element + :rtype: :class:`.OracleCloudFileMetadata` + """ + etag = entry.get("ETag", "").strip('"') + return cls( + { + "object_name": entry["Key"], + "content_type": None, + "last_modified": entry.get("LastModified"), + "size": int(entry.get("Size", 0)), + "etag": etag, + "extra": { + "md5": etag, + }, + } + ) + + @classmethod + def new_from_head_response( + cls, obj_name: str, headers + ) -> "OracleCloudFileMetadata": + """Construct from the response headers of an S3-compatible ``HEAD`` request. + + Works with both ``CIMultiDict`` (aiohttp) and plain ``dict`` headers. + + :param str obj_name: the object key (no leading ``/``) + :param headers: the response headers + :rtype: :class:`.OracleCloudFileMetadata` + """ + etag = headers.get("ETag", "").strip('"') + return cls( + { + "object_name": obj_name, + "content_type": headers.get("Content-Type"), + "last_modified": headers.get("Last-Modified"), + "size": int(headers.get("Content-Length", 0)), + "etag": etag, + "extra": { + "md5": etag, + }, + } + ) + + +class OracleCloudFolderMetadata(BaseOracleCloudMetadata, metadata.BaseFolderMetadata): + """Folder metadata for the OracleCloud provider. + + OCI Object Storage uses a flat namespace; folders are represented by common + prefixes returned by the S3-compatible ``ListObjectsV2`` operation. + """ + + @property + def name(self) -> str: + return os.path.split(self.path.rstrip("/"))[1] diff --git a/waterbutler/providers/oraclecloud/provider.py b/waterbutler/providers/oraclecloud/provider.py new file mode 100644 index 000000000..663d5b5bf --- /dev/null +++ b/waterbutler/providers/oraclecloud/provider.py @@ -0,0 +1,478 @@ +import base64 +import hashlib +import logging +from http import HTTPStatus +from urllib.parse import parse_qsl, quote, urlencode, urlparse, urlunparse +from xml.sax.saxutils import escape as xml_escape + +import xmltodict + +from waterbutler.core.exceptions import ( + CopyError, + DeleteError, + DownloadError, + IntraCopyError, + InvalidProviderConfigError, + MetadataError, + NotFoundError, + UploadChecksumMismatchError, + UploadError, +) +from waterbutler.core.path import WaterButlerPath +from waterbutler.core.provider import BaseProvider +from waterbutler.core.streams import BaseStream, HashStreamWriter, ResponseStreamReader +from waterbutler.core.utils import make_disposition +from waterbutler.providers.oraclecloud.metadata import ( + BaseOracleCloudMetadata, + OracleCloudFileMetadata, + OracleCloudFolderMetadata, +) +from waterbutler.providers.oraclecloud.signing import EMPTY_SHA256, UNSIGNED_PAYLOAD, SigV4Signer + +logger = logging.getLogger(__name__) + + +class OracleCloudProvider(BaseProvider): + """Provider for Oracle Cloud Infrastructure Object Storage via S3-compatible API. + + Uses OCI's Amazon S3 Compatibility API with manual AWS SigV4 signing (no boto + dependency). All requests go through :meth:`BaseProvider.make_request` backed + by aiohttp. + + S3 Compat docs: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm + + Quirks: + + * OCI's S3-compatible endpoint uses **path-style** addressing: + ``https://.compat.objectstorage..oraclecloud.com//`` + * ...and **vhost-style** addressing: + ``https://.vhcompat.objectstorage..oci.customer-oci.com`` + # ) + * Authentication requires an OCI *Customer Secret Key* (access key + secret key) + which is separate from the native OCI API signing key. + + S3-compatible conversion notes (ENG-10671): + + Successfully converted to S3-compatible API: + + * **Upload** (PUT Object) -- full body upload with SHA-256 signed payload and + ETag-based MD5 integrity verification. + * **Download** (GET Object) -- streaming via ResponseStreamReader, range support. + * **Delete** -- single-object via ``DELETE Object``; folder via list + batch + ``POST ?delete`` (DeleteObjects, 1000 keys/batch, paginated). + * **Metadata** (HEAD Object) -- zero-egress metadata retrieval. + * **Folder listing** (ListObjectsV2) -- common-prefix based virtual folders. + * **Intra-copy** (ENG-10659) -- single-file ``PUT Object - Copy`` via the + ``x-amz-copy-source`` header for same-namespace, same-region copies. + * **SigV4 signing** -- fully custom, no boto/botocore dependency. + + Not yet implemented but feasible via S3-compatible API: + + * **Multipart uploads** -- S3 multipart upload API is supported by OCI S3 compat. + Would be needed for files > ~5 GB. + * **Folder intra-copy** -- the current ``intra_copy`` implementation only + handles individual files; recursive folder copy would walk the prefix and + issue one ``PUT Object - Copy`` per key. NOT NEEDED FOR LIMITED PROVIDER + * **Metadata pagination** -- ``_metadata_folder`` reads only the first page + of ListObjectsV2 results; ``_list_keys_under_prefix`` (used by folder + delete) already paginates correctly and can serve as the template. NOT + NEEDED FOR LIMITED PROVIDER + + Cannot be done via S3-compatible API (OCI-native only): + + * **Work Request polling** -- OCI-specific async operation tracking for long-running + tasks. S3 compat does not expose this. Multipart upload is the S3 equivalent + for large operations. NOT NEEDED FOR LIMITED PROVIDER + * **Storage tier management** -- OCI storage tiers (Standard, InfrequentAccess, + Archive) and archival-state transitions require the native OCI API. NOT NEEDED + FOR LIMITED PROVIDER + * **Object lifecycle policies** -- native OCI API only. NOT NEEDED FOR LIMITED + PROVIDER + * **Namespace/compartment management** -- native OCI API only. NOT NEEDED FOR + LIMITED PROVIDER + """ + + NAME = "oraclecloud" + + def __init__(self, auth: dict, credentials: dict, settings: dict, **kwargs) -> None: + """Initialize a provider instance. + + Example credential / settings layout used by ``OSFStorageProvider``:: + + WATERBUTLER_CREDENTIALS = { + "storage": { + "json_creds": { + "s3compat": { + "access_key": "", + "secret_key": "", + "region": "us-ashburn-1" + } + } + } + } + + WATERBUTLER_SETTINGS = { + "storage": { + "provider": "oraclecloud", + "bucket": "my-bucket", + "namespace": "my-namespace", + } + } + """ + super().__init__(auth, credentials, settings, **kwargs) + + self.bucket = settings.get("bucket") + if not self.bucket: + raise InvalidProviderConfigError( + self.NAME, + message="Missing Object Storage bucket name from OSF", + ) + + namespace = settings.get("namespace") + if not namespace: + raise InvalidProviderConfigError( + self.NAME, + message="Missing Object Storage namespace from OSF", + ) + + s3_creds = credentials['json_creds']['s3compat'] + + s3_region = s3_creds.get("region") + if not s3_region: + raise InvalidProviderConfigError( + self.NAME, + message="Missing required credential: region", + ) + + s3_access_key = s3_creds.get("access_key") + if not s3_access_key: + raise InvalidProviderConfigError( + self.NAME, + message="Missing required credential: access_key", + ) + + s3_secret_key = s3_creds.get("secret_key") + if not s3_secret_key: + raise InvalidProviderConfigError( + self.NAME, + message="Missing required credential: secret_key", + ) + + # path style base url + self.BASE_URL = ( + f"https://{namespace}.compat.objectstorage.{s3_region}.oraclecloud.com" + ) + + # # vhost style base url + # self.BASE_URL = ( + # f"https://{self.bucket}.vhcompat.objectstorage.{region}.oci.customer-oci.com" + # ) + + self._s3_signer = SigV4Signer(s3_access_key, s3_secret_key, s3_region) + + # ------------------------------------------------------------------ + # URL helpers + # ------------------------------------------------------------------ + + def _object_url(self, obj_name: str) -> str: + """Full URL for a single-object operation (HEAD / GET / PUT / DELETE).""" + return f"{self.BASE_URL}/{self.bucket}/{quote(obj_name, safe='/')}" + + def _bucket_url(self, **query: str) -> str: + """Bucket URL with optional query-string parameters. + + Parameters are sorted and URI-encoded for SigV4 consistency. + """ + base = f"{self.BASE_URL}/{self.bucket}" + if not query: + return base + parts = [ + f"{quote(str(k), safe='')}={quote(str(v), safe='')}" + for k, v in sorted(query.items()) + ] + return f"{base}?{'&'.join(parts)}" + + # ------------------------------------------------------------------ + # Signing helpers + # ------------------------------------------------------------------ + + def _signed_headers( + self, + method: str, + url: str, + extra_headers: dict[str, str] | None = None, + payload_hash: str = UNSIGNED_PAYLOAD, + ) -> dict[str, str]: + """Return a signed-headers dict ready for ``make_request``.""" + return self._s3_signer.sign_request( + method, url, headers=extra_headers, payload_hash=payload_hash + ) + + # ------------------------------------------------------------------ + # Required provider interface + # ------------------------------------------------------------------ + + async def validate_v1_path(self, path: str, **kwargs) -> WaterButlerPath: + return await self.validate_path(path) + + async def validate_path(self, path: str, **kwargs) -> WaterButlerPath: + return WaterButlerPath(path) + + async def metadata( + self, path: WaterButlerPath, **kwargs + ) -> OracleCloudFileMetadata | list[BaseOracleCloudMetadata]: + """Get metadata about the object or folder at *path*. + + .. note:: + + This limited version only supports metadata for file objects. There are no technical + blockers. The only reason is that OSFStorage never performs any action on folders for + this inner storage provider. We prefer not to have dead or unreachable code. + + :param path: the WaterButlerPath to the file or folder + :rtype: :class:`.OracleCloudFileMetadata` | list[:class:`.BaseOracleCloudMetadata`] + """ + if path.is_folder: + raise MetadataError('This limited provider does not support folder metadata.') + return await self._metadata_file(path) + + async def upload( + self, stream: BaseStream, path: WaterButlerPath, *args, **kwargs + ) -> tuple[OracleCloudFileMetadata, bool]: + """Upload a file stream to the given path. + + """ + created = not await self.exists(path) + + await self._s3_upload(stream, path, *args, **kwargs) + + file_metadata = await self._metadata_file(path) + return file_metadata, created + + async def download( + self, + path: WaterButlerPath, + accept_url=False, + range=None, # type: ignore[assignment] + **kwargs, + ) -> ResponseStreamReader: + """Download the object at the given path. + + S3 Compat API: ``GET //`` + """ + if path.is_folder: + raise DownloadError("Cannot download folders", code=HTTPStatus.BAD_REQUEST) + + obj_name = self._get_obj_name(path) + url = self._object_url(obj_name) + + if accept_url: + display_name = kwargs.get('display_name') or path.name + disp_query = {'response-content-disposition': make_disposition(display_name)} + + url_parts = list(urlparse(url)) + query = dict(parse_qsl(url_parts[4])) + query.update(disp_query) + url_parts[4] = urlencode(query) + + # There is no need to delay URL building and signing + signed_url = self._s3_signer.sign_request_query( + 'GET', + urlunparse(url_parts), + {}, + ) + return signed_url + + extra: dict[str, str] = {} + if range is not None: + start, end = range + range_str = f"bytes={start}-" + if end is not None: + range_str = f"bytes={start}-{end}" + extra["Range"] = range_str + + headers = self._signed_headers("GET", url, extra_headers=extra) + + resp = await self.make_request( + "GET", + url, + headers=headers, + expects=(200, 206, 404), + throws=DownloadError, + ) + + if resp.status == HTTPStatus.NOT_FOUND: + resp.close() + raise DownloadError( + f"Object not found: {path}", code=HTTPStatus.NOT_FOUND + ) + + return ResponseStreamReader(resp) + + async def delete(self, path: WaterButlerPath, *args, **kwargs) -> None: # type: ignore[override] + r"""Delete the file object at the given path. + + .. note:: + + This limited version only supports deletion for file objects because + ``OSFStorageProvider`` does not need it for folders. + + Files use S3 Compat ``DELETE //``. + """ + if path.is_folder: + raise DeleteError('This limited provider does not support folder deletion.') + + obj_name = self._get_obj_name(path) + url = self._object_url(obj_name) + headers = self._signed_headers("DELETE", url) + + resp = await self.make_request( + "DELETE", + url, + headers=headers, + expects=(200, 204), + throws=DeleteError, + ) + await resp.release() + + async def intra_copy( + self, + dest_provider: BaseProvider, + source_path: WaterButlerPath, + dest_path: WaterButlerPath, + ) -> tuple[OracleCloudFileMetadata, bool]: + """Server-side copy a single file via S3 ``PUT Object - Copy``. + + .. note:: + + This limited version only supports intra-copy for file objects, because + ``OSFStorageProvider`` does not need it. + + Uses the ``x-amz-copy-source`` header. Only same-namespace, same-region + copies between two ``OracleCloudProvider`` instances are supported -- see + :meth:`can_intra_copy`. + """ + if source_path.is_folder and dest_path.is_folder: + raise CopyError('This limited provider does not support folder intra-copy.') + + if source_path.is_folder or dest_path.is_folder: # actually an xor + raise CopyError('Cannot copy between a file and a folder') + + exists = await dest_provider.exists(dest_path) + + src_obj = self._get_obj_name(source_path) + dest_obj = dest_provider._get_obj_name(dest_path) + dest_url = dest_provider._object_url(dest_obj) + + # `x-amz-copy-source` must be URL-encoded and prefixed with the source + # bucket; SigV4 covers it via the canonical-headers list. + copy_source = f"/{self.bucket}/{quote(src_obj, safe='/')}" + + headers = dest_provider._signed_headers( + "PUT", + dest_url, + extra_headers={ + "x-amz-copy-source": copy_source, + "Content-Length": "0", + }, + ) + + resp = await dest_provider.make_request( + "PUT", + dest_url, + headers=headers, + expects=(200,), + throws=CopyError, + ) + _ = await resp.text() # awaiting the response waits for it to finish + await resp.release() + + return await dest_provider._metadata_file(dest_path), not exists + + def can_intra_copy(self, other: BaseProvider, path: WaterButlerPath = None) -> bool: + """True for file-level copies between two OracleCloudProvider instances + sharing the same namespace and region (single SigV4 endpoint). + """ + if path is not None and getattr(path, "is_folder", False): + return False + if not isinstance(other, OracleCloudProvider): + return False + return self.BASE_URL == other.BASE_URL + + def can_intra_move(self, other: BaseProvider, path: WaterButlerPath = None) -> bool: + return self.can_intra_copy(other, path) + + def can_duplicate_names(self): + return True + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + async def _metadata_file(self, path: WaterButlerPath) -> OracleCloudFileMetadata: + """Fetch file metadata via ``HEAD //``. + + Uses HEAD to avoid egress charges (no body transferred). + """ + obj_name = self._get_obj_name(path) + url = self._object_url(obj_name) + headers = self._signed_headers("HEAD", url) + + resp = await self.make_request( + "HEAD", + url, + headers=headers, + expects=(200, ), + throws=MetadataError, + ) + + return OracleCloudFileMetadata.new_from_head_response(obj_name, resp.headers) + + @staticmethod + def _get_obj_name(path: WaterButlerPath) -> str: + """Convert a WaterButlerPath to an S3-compatible object key (no leading ``/``).""" + return path.path.lstrip("/") + + async def _s3_upload( + self, stream: BaseStream, path: WaterButlerPath, *args, **kwargs + ): + """Upload a file stream to the given path. + + S3 Compat API: ``PUT //`` + + After a successful upload, the ETag (hex-encoded MD5 for non-multipart + uploads) is verified against a locally computed digest. An extra HEAD + request is made to retrieve full metadata for the response. + """ + obj_name = self._get_obj_name(path) + url = self._object_url(obj_name) + + stream.add_writer('sha256', HashStreamWriter(hashlib.sha256)) + stream.add_writer('md5', HashStreamWriter(hashlib.md5)) + + headers = self._signed_headers( + "PUT", + url, + extra_headers={ + "Content-Length": str(stream.size), + "Content-Type": "application/octet-stream", + }, + ) + + resp = await self.make_request( + "PUT", + url, + data=stream, + headers=headers, + skip_auto_headers={"Content-Type"}, + expects=(200,), + throws=UploadError, + ) + await resp.release() + + # Verify upload integrity via ETag (hex MD5 for non-multipart uploads) + resp_etag = resp.headers.get("ETag", "").strip('"') + if resp_etag: + expected_md5 = stream.writers['md5'].hexdigest + if resp_etag != expected_md5: + raise UploadChecksumMismatchError() diff --git a/waterbutler/providers/oraclecloud/settings.py b/waterbutler/providers/oraclecloud/settings.py new file mode 100644 index 000000000..6577c2bf5 --- /dev/null +++ b/waterbutler/providers/oraclecloud/settings.py @@ -0,0 +1,3 @@ +from waterbutler import settings + +config = settings.child("ORACLECLOUD_PROVIDER_SETTINGS") diff --git a/waterbutler/providers/oraclecloud/signing.py b/waterbutler/providers/oraclecloud/signing.py new file mode 100644 index 000000000..e5db5420f --- /dev/null +++ b/waterbutler/providers/oraclecloud/signing.py @@ -0,0 +1,266 @@ +"""AWS Signature Version 4 signing for OCI S3-compatible API. + +Implements SigV4 header-based authentication without any boto/botocore dependency. +Used by :class:`~waterbutler.providers.oraclecloud.provider.OracleCloudProvider` to sign +requests to Oracle Cloud Infrastructure's Amazon S3 Compatibility API. + +Reference: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html +""" + +import datetime +import hashlib +import hmac +import logging +from urllib.parse import parse_qsl, quote, urlparse + +logger = logging.getLogger(__name__) + +ALGORITHM = "AWS4-HMAC-SHA256" +EMPTY_SHA256 = hashlib.sha256(b"").hexdigest() +UNSIGNED_PAYLOAD = "UNSIGNED-PAYLOAD" + + +def _hmac_sha256(key: bytes, msg: str) -> bytes: + """HMAC-SHA256 of a UTF-8 string keyed with the given bytes.""" + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def _hmac_sha256_hex(key: bytes, msg: bytes) -> str: + """HMAC-SHA256 of a UTF-8 string keyed with the given bytes, in hex.""" + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).hexdigest() + + +def _sha256_hex(data: bytes) -> str: + """Hex-encoded SHA-256 digest.""" + return hashlib.sha256(data).hexdigest() + + +def _derive_signing_key( + secret_key: str, date_stamp: str, region: str, service: str +) -> bytes: + """Derive the four-level HMAC signing key used by SigV4. + + :: + + kDate = HMAC("AWS4" + secret, date) + kRegion = HMAC(kDate, region) + kService = HMAC(kRegion, service) + kSigning = HMAC(kService, "aws4_request") + """ + k_date = _hmac_sha256(("AWS4" + secret_key).encode("utf-8"), date_stamp) + k_region = _hmac_sha256(k_date, region) + k_service = _hmac_sha256(k_region, service) + return _hmac_sha256(k_service, "aws4_request") + + +class SigV4Signer: + """Signs HTTP requests using AWS Signature Version 4. + + Produces the ``Authorization``, ``x-amz-date``, and ``x-amz-content-sha256`` + headers required by OCI's Amazon S3 Compatibility API. + + Reference: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm + """ + + def __init__( + self, + access_key: str, + secret_key: str, + region: str, + service: str = "s3", + ) -> None: + self.access_key = access_key + self.secret_key = secret_key + self.region = region + self.service = service + + def sign_request( + self, + method: str, + url: str, + headers: dict[str, str] | None = None, + payload_hash: str = EMPTY_SHA256, + ) -> dict[str, str]: + """Add SigV4 signing headers to *headers* and return them. + + The ``host`` value is derived from *url* and included in the canonical + request for signing, but is **not** returned in the result dict because + aiohttp sets it automatically from the URL. + + :param method: HTTP verb (``GET``, ``PUT``, ``HEAD``, ``DELETE``, ...) + :param url: Fully-qualified URL with scheme, host, path, and query + :param headers: Extra request headers to include in the signature + :param payload_hash: Hex SHA-256 of the body, or :data:`UNSIGNED_PAYLOAD` + :returns: Headers dict ready to pass to ``make_request`` + """ + if headers is None: + headers = {} + + parsed = urlparse(url) + host = parsed.netloc + canonical_uri = parsed.path or "/" + + # Canonical query string: parse, sort, and re-encode per SigV4 rules. + qs_pairs = parse_qsl(parsed.query, keep_blank_values=True) + qs_pairs.sort(key=lambda kv: (kv[0], kv[1])) + canonical_querystring = "&".join( + f"{quote(k, safe='')}={quote(v, safe='')}" for k, v in qs_pairs + ) + + now = datetime.datetime.now(datetime.timezone.utc) + amz_date = now.strftime("%Y%m%dT%H%M%SZ") + date_stamp = now.strftime("%Y%m%d") + + # Collect all headers for signing (lowercased keys, trimmed values). + headers_for_signing: dict[str, str] = { + k.lower(): v.strip() for k, v in headers.items() + } + headers_for_signing["host"] = host + headers_for_signing["x-amz-date"] = amz_date + headers_for_signing["x-amz-content-sha256"] = payload_hash + + signed_header_names = sorted(headers_for_signing) + canonical_headers = "".join( + f"{k}:{headers_for_signing[k]}\n" for k in signed_header_names + ) + signed_headers_str = ";".join(signed_header_names) + + # Step 1 -- Canonical request + canonical_request = "\n".join( + [ + method.upper(), + canonical_uri, + canonical_querystring, + canonical_headers, + signed_headers_str, + payload_hash, + ] + ) + + # Step 2 -- String to sign + credential_scope = ( + f"{date_stamp}/{self.region}/{self.service}/aws4_request" + ) + string_to_sign = "\n".join( + [ + ALGORITHM, + amz_date, + credential_scope, + _sha256_hex(canonical_request.encode("utf-8")), + ] + ) + + # Step 3 -- Signature + signing_key = _derive_signing_key( + self.secret_key, date_stamp, self.region, self.service + ) + signature = hmac.new( + signing_key, string_to_sign.encode("utf-8"), hashlib.sha256 + ).hexdigest() + + # Step 4 -- Assemble output headers (host excluded; aiohttp sets it) + result = dict(headers) + result["x-amz-date"] = amz_date + result["x-amz-content-sha256"] = payload_hash + result["Authorization"] = ( + f"{ALGORITHM} " + f"Credential={self.access_key}/{credential_scope}, " + f"SignedHeaders={signed_headers_str}, " + f"Signature={signature}" + ) + return result + + def sign_request_query( + self, + method: str, + url: str, + headers: dict[str, str] | None = None, + payload_hash: str = UNSIGNED_PAYLOAD, + ) -> dict[str, str]: + """Add SigV4 signing headers to *headers* and return them. + + The ``host`` value is derived from *url* and included in the canonical + request for signing, but is **not** returned in the result dict because + aiohttp sets it automatically from the URL. + + :param method: HTTP verb (``GET``, ``PUT``, ``HEAD``, ``DELETE``, ...) + :param url: Fully-qualified URL with scheme, host, path, and query + :param headers: Extra request headers to include in the signature + :param payload_hash: Hex SHA-256 of the body, or :data:`UNSIGNED_PAYLOAD` + :returns: url ready to pass to ``make_request`` + """ + if headers is None: + headers = {} + + now = datetime.datetime.now(datetime.timezone.utc) + amz_date = now.strftime("%Y%m%dT%H%M%SZ") + date_stamp = now.strftime("%Y%m%d") + + magic_str = 'aws4_request' + # TODO: need smarter query assemblage here, currently assumes content-disposition is set + url = url + '&X-Amz-Algorithm=AWS4-HMAC-SHA256' + url = url + f'&X-Amz-Credential={self.access_key}%2F{date_stamp}%2F{self.region}%2F{self.service}%2F{magic_str}' + url = url + f'&X-Amz-Date={amz_date}' + url = url + '&X-Amz-Expires=3600' + url = url + '&X-Amz-SignedHeaders=host' + + parsed = urlparse(url) + host = parsed.netloc + canonical_uri = parsed.path or "/" + + # Canonical query string: parse, sort, and re-encode per SigV4 rules. + qs_pairs = parse_qsl(parsed.query, keep_blank_values=True) + qs_pairs.sort(key=lambda kv: (kv[0], kv[1])) + canonical_querystring = "&".join( + f"{quote(k, safe='')}={quote(v, safe='')}" for k, v in qs_pairs + ) + + # Collect all headers for signing (lowercased keys, trimmed values). + headers_for_signing: dict[str, str] = { + k.lower(): v.strip() for k, v in headers.items() + } + headers_for_signing["host"] = host + signed_header_names = sorted(headers_for_signing) + canonical_headers = "".join( + f"{k}:{headers_for_signing[k]}\n" for k in signed_header_names + ) + signed_headers_str = ";".join(signed_header_names) + + # Step 1 -- Canonical request + canonical_request = "\n".join( + [ + method.upper(), + canonical_uri, + canonical_querystring, + canonical_headers, + signed_headers_str, + payload_hash, + ] + ) + logger.debug(f'canonical_request:({canonical_request})') + + # Step 2 -- String to sign + credential_scope = ( + f"{date_stamp}/{self.region}/{self.service}/{magic_str}" + ) + string_to_sign = "\n".join( + [ + ALGORITHM, + amz_date, + credential_scope, + _sha256_hex(canonical_request.encode("utf-8")), + ] + ) + logger.debug(f'string_to_sign:({string_to_sign})') + + # Step 3 -- Signature + signing_key = _derive_signing_key( + self.secret_key, date_stamp, self.region, self.service + ) + signature = hmac.new( + signing_key, string_to_sign.encode("utf-8"), hashlib.sha256 + ).hexdigest() + + signed_url = url + f'&X-Amz-Signature={signature}' + logger.debug(f'signed_url:({signed_url})') + return signed_url