From f91164789e529f0a45c139dc3008a3ee734f1d80 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Thu, 4 Jun 2026 15:09:18 +0200 Subject: [PATCH 1/7] Fix URL signing api for URLs containing special characters --- .../fix-url-signing-special-characters.md | 25 +++++++ .../source/installation/config.rst | 7 +- docker-compose-dev.yml | 1 + .../edu/harvard/iq/dataverse/api/Admin.java | 9 ++- .../iq/dataverse/util/UrlSignerUtil.java | 54 ++++++++++----- .../iq/dataverse/util/UrlSignerUtilTest.java | 66 +++++++++++++++++++ 6 files changed, 143 insertions(+), 19 deletions(-) create mode 100644 doc/release-notes/fix-url-signing-special-characters.md diff --git a/doc/release-notes/fix-url-signing-special-characters.md b/doc/release-notes/fix-url-signing-special-characters.md new file mode 100644 index 00000000000..48fff1ae79d --- /dev/null +++ b/doc/release-notes/fix-url-signing-special-characters.md @@ -0,0 +1,25 @@ +### Signed URLs work again for URLs with special characters + +Requesting a signed URL (e.g. via `/api/admin/requestSignedUrl`, used by external tools, the Globus +integration and third-party integrations) was broken for URLs whose query contained special +characters; most notably persistent IDs such as `doi:10.5072/FK2/ABC` (which contain `:` and +`/`), as well as spaces, percent-encoded values and non-ASCII characters. The signing logic had +started normalizing/re-encoding the URL before signing it, while the signature is a byte-exact MAC +over the URL string; the re-encoded bytes no longer matched what callers presented back, so +validation failed with "signature does not match"/authentication errors. + +Signing is now byte-exact again: the base URL is preserved exactly as provided (reserved signing +parameters are still stripped, but without re-encoding the rest of the URL). Clients must continue to +present the signed URL exactly as Dataverse returned it. + +### A signing secret is now required to request signed URLs + +The `/api/admin/requestSignedUrl` endpoint now requires a non-empty signing secret +(`dataverse.api.signing-secret`) to be configured. Previously an unset secret silently fell back to +using only the user's API token as the signing key, which is too weak. If the secret is not +configured, the endpoint now returns an error instead of issuing a weakly-signed URL. + +**Upgrade note:** installations that use signed URLs through this endpoint (including the +`rdm-integration` connector) must set `dataverse.api.signing-secret`. See the +[Configuration Guide](https://guides.dataverse.org/en/latest/installation/config.html#dataverse-api-signing-secret). +Treat the value like a password. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2a6fdfa324..ef28b54c651 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3349,9 +3349,10 @@ are time limited and only allow the action of the API call in the URL. See :ref: :ref:`api-native-signed-url` for more details. The key used to sign a URL is created from the API token of the creating user plus a signing-secret provided by an administrator. -**Using a signing-secret is highly recommended.** This setting defaults to an empty string. Using a non-empty -signing-secret makes it impossible for someone who knows an API token from forging signed URLs and provides extra security by -making the overall signing key longer. +**A non-empty signing-secret is required to request signed URLs through the API.** If it is not configured, the +``/api/admin/requestSignedUrl`` endpoint (see :ref:`api-native-signed-url`) returns an error instead of issuing a +weakly-signed URL. (The setting otherwise defaults to an empty string.) A non-empty signing-secret makes it impossible for +someone who only knows an API token to forge signed URLs, and provides extra security by making the overall signing key longer. **WARNING**: *Since the signing-secret is sensitive, you should treat it like a password.* diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index b24bf0ed6f6..86aff92e523 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -60,6 +60,7 @@ services: -Ddataverse.pid.fake.label=FakeDOIProvider -Ddataverse.pid.fake.authority=10.5072 -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.api.signing-secret=dev-only-signing-secret-change-me -Ddataverse.cors.origin=* \ -Ddataverse.cors.methods=GET,POST,PUT,DELETE,OPTIONS \ -Ddataverse.cors.headers.allow=range,content-type,x-dataverse-key,accept \ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 1eecdd4b171..031a8a6845e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2453,7 +2453,14 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur if (superuser == null || !superuser.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Requesting signed URLs is restricted to superusers."); } - + + // A signing secret must be configured. Without it the signing key would fall back to just + // the user's API token, which is too weak. Fail fast rather than hand out weakly-signed URLs. + if (JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("").isEmpty()) { + return error(Response.Status.BAD_REQUEST, + "Requesting signed URLs requires a signing secret to be configured. Please set the dataverse.api.signing-secret JVM option."); + } + String userId = urlInfo.getString("user"); String key=null; if (userId != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 222d265a0c3..49ae548c9d0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -2,12 +2,10 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.NameValuePair; -import org.apache.http.client.utils.URIBuilder; import org.apache.http.client.utils.URLEncodedUtils; import org.joda.time.LocalDateTime; import java.net.MalformedURLException; -import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.List; @@ -45,19 +43,13 @@ public class UrlSignerUtil { */ public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - // check for reserved parameter names ("until","user", "method", or "token") - String[] urlQP = baseUrl.split("\\?"); - if (urlQP.length > 1) { - try { - URIBuilder uriBuilder = new URIBuilder(baseUrl); - List params = uriBuilder.getQueryParams(); - params.removeIf(pair -> reservedParameters.contains(pair.getName())); - uriBuilder.setParameters(params); - baseUrl = uriBuilder.build().toString(); - } catch (URISyntaxException e) { - logger.severe("Invalid URL for signing: " + baseUrl + " " + e.getMessage()); - } - } + // Remove any reserved signing parameters ("until", "user", "method", "token", "key", + // "signed") that may already be present in the base URL, so they cannot be spoofed and so + // re-signing an already-signed URL does not accumulate them. This is done with exact string + // surgery (not URIBuilder) because the signature is a byte-exact MAC computed over the URL + // string: normalizing/re-encoding the URL here (e.g. percent-encoding ':' and '/' in DOIs, + // or handling spaces/unicode) would change the bytes that get hashed and break validation. + baseUrl = stripReservedParameters(baseUrl); boolean firstParam = !baseUrl.contains("?"); StringBuilder signedUrlBuilder = new StringBuilder(baseUrl); @@ -87,6 +79,38 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin return signedUrl; } + /** + * Removes any reserved signing parameters ("until", "user", "method", "token", "key", + * "signed") from the query string of the given URL while preserving the exact byte + * representation of the path and of every remaining query parameter. Unlike URIBuilder, this + * does not decode/re-encode the URL, which is required because {@link #signUrl} and + * {@link #isValidUrl} compute a byte-exact MAC over the URL string. + * + * @param baseUrl the URL to clean (may or may not contain a query string) + * @return the URL with reserved parameters removed, otherwise unchanged byte-for-byte + */ + static String stripReservedParameters(String baseUrl) { + int queryStart = baseUrl.indexOf('?'); + if (queryStart < 0) { + return baseUrl; + } + String path = baseUrl.substring(0, queryStart); + String query = baseUrl.substring(queryStart + 1); + StringBuilder kept = new StringBuilder(); + for (String pair : query.split("&")) { + int equals = pair.indexOf('='); + String name = (equals < 0) ? pair : pair.substring(0, equals); + if (reservedParameters.contains(name)) { + continue; + } + if (kept.length() > 0) { + kept.append('&'); + } + kept.append(pair); + } + return kept.length() > 0 ? path + "?" + kept : path; + } + /** * This method will only return true if the URL and parameters except the * "token" are unchanged from the original/match the values sent to this method, diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java index d92f8822e59..36e05a80b70 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -8,6 +8,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -86,4 +87,69 @@ public void testSignAndValidateWithParams() { System.out.println(signedUrl3); assertTrue(signedUrl3.contains("&p2&")); // Show that this works with params that have no value } + + @Test + public void testStripReservedParametersPreservesSpecialCharacters() { + // The signature is a byte-exact MAC over the URL string, so removing the reserved signing + // params must not re-encode anything else: a DOI's ':' and '/' must survive unchanged. + String doiUrl = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar"; + assertEquals(doiUrl, UrlSignerUtil.stripReservedParameters(doiUrl)); + + // Reserved params (here token/user/signed) are removed; everything else is left byte-for-byte. + String withReserved = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&token=spoofed&user=Mallory&signed=true&foo=bar"; + assertEquals("http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar", + UrlSignerUtil.stripReservedParameters(withReserved)); + + // A URL with no query string is returned unchanged. + String noQuery = "http://localhost:8080/api/v1/datasets/:persistentId"; + assertEquals(noQuery, UrlSignerUtil.stripReservedParameters(noQuery)); + } + + @Test + public void testSignAndValidateSpecialCharacters() { + final int longTimeout = 1000; + final String user = "Alice"; + final String method = "GET"; + final String key = "abracadabara open sesame"; + + // DOIs (':' and '/'), pre-encoded values, spaces, unicode and embedded URLs must all sign + // and then validate when the signed URL is presented back verbatim (as the server returns it). + String[] baseUrls = new String[] { + "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar", + "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi%3A10.5072%2FFK2%2FABC123", + "http://localhost:8080/api/v1/search?q=hello%20world&persistentId=doi:10.1/2", + "http://localhost:8080/api/v1/search?q=hello world&pid=doi:10.1/2", + "http://localhost:8080/api/v1/search?q=café&name=測試", + "http://localhost:8080/api/v1/redirect?url=http%3A%2F%2Fexample.com%2Ff%3Fa%3D1%26b%3D2" + }; + for (String baseUrl : baseUrls) { + String signedUrl = UrlSignerUtil.signUrl(baseUrl, longTimeout, user, method, key); + // The base URL is preserved byte-for-byte in the signed URL (no re-encoding). + assertTrue(signedUrl.startsWith(baseUrl + "&"), + "base URL must be preserved byte-for-byte: " + signedUrl); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl, user, method, key), + "signed URL should validate when used verbatim: " + signedUrl); + } + } + + @Test + public void testSignedUrlIsByteExact() { + // Documents and locks in the byte-exact contract: the signature is computed over the URL + // string exactly as provided. A client that re-encodes the URL before presenting it back + // (e.g. percent-encoding the DOI) must fail validation. This is the regression that broke + // signing for special characters when the URL was normalized via URIBuilder. + final int longTimeout = 1000; + final String user = "Alice"; + final String method = "GET"; + final String key = "abracadabara open sesame"; + + String baseUrl = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123"; + String signedUrl = UrlSignerUtil.signUrl(baseUrl, longTimeout, user, method, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl, user, method, key)); + + // Re-encoding ':' and '/' in the DOI changes the signed bytes, so it must be rejected. + String reEncoded = signedUrl.replace("doi:10.5072/FK2/ABC123", "doi%3A10.5072%2FFK2%2FABC123"); + assertFalse(UrlSignerUtil.isValidUrl(reEncoded, user, method, key), + "a re-encoded variant must not validate (byte-exact contract)"); + } } From 96be125110420321370f38365bc768798ca3579d Mon Sep 17 00:00:00 2001 From: ErykKul Date: Thu, 4 Jun 2026 15:47:32 +0200 Subject: [PATCH 2/7] Extra URL signing tests to prevent regression --- .../iq/dataverse/util/UrlSignerUtilTest.java | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java index 36e05a80b70..4ae6a8d3f79 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -152,4 +152,110 @@ public void testSignedUrlIsByteExact() { assertFalse(UrlSignerUtil.isValidUrl(reEncoded, user, method, key), "a re-encoded variant must not validate (byte-exact contract)"); } + + /** + * Signs {@code baseUrl} and asserts that the signed URL is exactly the provided URL followed by + * a well-formed signature (until/user/method/token), and that it validates when presented back + * verbatim. Returns the signed URL so callers can make further (encoding) assertions on it. + */ + private static String assertSignedExactlyAndValid(String baseUrl, String user, String key) { + String signed = UrlSignerUtil.signUrl(baseUrl, 1000, user, "GET", key); + // What is signed is exactly what was provided (no re-encoding/normalization of the base URL)... + assertTrue(signed.startsWith(baseUrl), + "what is signed must be exactly what was provided: " + baseUrl + "\n got: " + signed); + // ...plus only the signature parameters, appended after it. + String signature = signed.substring(baseUrl.length()); + String separator = baseUrl.contains("?") ? "&" : "\\?"; + assertTrue(signature.matches(separator + "until=[^&]+&user=" + user + "&method=GET&token=[0-9a-f]{128}"), + "only the signature parameters may be appended after the provided URL: " + signature); + // ...and the signature validates when the signed URL is used verbatim (as the client does). + assertTrue(UrlSignerUtil.isValidUrl(signed, user, "GET", key), + "signed URL must validate when presented back verbatim: " + signed); + return signed; + } + + @Test + public void testSignAndValidateRdmIntegrationUrls() { + // Backend regression guard: sign every URL shape that rdm-integration sends through the + // signing client and confirm (a) what is signed is exactly the URL provided plus a valid + // signature, (b) the signature validates verbatim, and (c) the encoding is preserved exactly + // (escaped values stay escaped, raw values stay raw). URL templates mirror rdm-integration: + // plugin/impl/globus/streams.go, dataverse/dataverse_read.go, dataverse/dataverse_write.go, + // plugin/impl/dataverse/query.go. If the backend ever re-encodes the URL again (the original + // regression), these assertions fail. + final String user = "rdmUser"; + final String key = "abracadabara open sesame"; + final String s = "https://demo.dataverse.org"; + final String pid = "doi:10.5072/FK2/ABC123"; // raw, as most rdm paths send it + final String escPid = "doi%3A10.5072%2FFK2%2FABC123"; // url.QueryEscape form + + // Paths that send the persistentId RAW (GetNodeMap, CheckPermission, all globus flows, + // all write flows, the dataverse plugin). The ':' and '/' in the DOI must survive unchanged. + String[] rawPidUrls = new String[] { + s + "/api/v1/datasets/:persistentId/versions/:latest/files?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId?persistentId=" + pid, + s + "/api/v1/admin/permissions/:persistentId?persistentId=" + pid + "&unblock-key=UNBLOCK", + s + "/api/v1/datasets/:persistentId/requestGlobusUploadPaths?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/addGlobusFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/requestGlobusDownload?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/monitorGlobusDownload?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/globusDownloadParameters?persistentId=" + pid + "&downloadId=globus-task-123", + s + "/api/v1/datasets/:persistentId/add?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/addFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/replaceFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/deleteFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/cleanStorage?persistentId=" + pid, + }; + for (String baseUrl : rawPidUrls) { + String signed = assertSignedExactlyAndValid(baseUrl, user, key); + assertTrue(signed.contains("persistentId=" + pid), + "raw persistentId must stay raw (not percent-encoded): " + signed); + assertFalse(signed.contains(escPid), + "raw persistentId must not be escaped by the backend: " + signed); + } + + // Paths that send the persistentId URL-ESCAPED (GetDatasetMetadata, GetDatasetUserPermissions). + // The escaped value must remain escaped (the backend must not decode then re-encode it). + String[] escapedPidUrls = new String[] { + s + "/api/v1/datasets/:persistentId?persistentId=" + escPid + "&excludeFiles=true", + s + "/api/v1/datasets/:persistentId/userPermissions?persistentId=" + escPid, + }; + for (String baseUrl : escapedPidUrls) { + String signed = assertSignedExactlyAndValid(baseUrl, user, key); + assertTrue(signed.contains("persistentId=" + escPid), + "escaped persistentId must stay escaped: " + signed); + assertFalse(signed.contains("persistentId=" + pid), + "escaped persistentId must not be decoded to raw: " + signed); + } + + // mydata/retrieve: URL-escaped search term, '+' for spaces, and repeated query parameters. + String mydata = s + "/api/v1/mydata/retrieve?selected_page=1&dvobject_types=Dataset" + + "&published_states=Published&published_states=Unpublished&published_states=Draft" + + "&role_ids=1&role_ids=6&mydata_search_term=text%3A%22hello+world%22"; + String signedMydata = assertSignedExactlyAndValid(mydata, user, key); + assertTrue(signedMydata.contains("mydata_search_term=text%3A%22hello+world%22"), + "escaped search term (including '%3A', '%22' and '+') must be preserved exactly: " + signedMydata); + assertTrue(signedMydata.contains("published_states=Published&published_states=Unpublished&published_states=Draft"), + "repeated query parameters must be preserved: " + signedMydata); + + // Numeric-id / no-persistentId paths (datafile, files, numeric dataset id, users/:me). + String[] otherUrls = new String[] { + s + "/api/v1/access/datafile/123/metadata/ddi", + s + "/api/v1/access/datafile/123", + s + "/api/v1/files/123", + s + "/api/v1/users/:me", + s + "/api/v1/datasets/42/versions/:latest?excludeFiles=true", + }; + for (String baseUrl : otherUrls) { + assertSignedExactlyAndValid(baseUrl, user, key); + } + + // noSlashPermissionUrl sends an empty leading query segment ("?&unblock-key=..."); the backend + // drops the empty segment, so it does not round-trip byte-for-byte, but it still signs and + // validates (the client uses the returned signed URL verbatim). + String emptySegment = s + "/api/v1/admin/permissions/42?&unblock-key=UNBLOCK"; + String signedEmptySegment = UrlSignerUtil.signUrl(emptySegment, 1000, user, "GET", key); + assertTrue(UrlSignerUtil.isValidUrl(signedEmptySegment, user, "GET", key), + "permissions URL with an empty leading query segment must validate: " + signedEmptySegment); + } } From 88125820759a05f022de1a55b847393d7dfe8c29 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Thu, 4 Jun 2026 16:29:14 +0200 Subject: [PATCH 3/7] More URL signing tests --- .../api/auth/SignedUrlAuthMechanismTest.java | 59 +++++++++++++++++++ .../SignedUrlContainerRequestTestFake.java | 6 +- .../doubles/SignedUrlUriInfoTestFake.java | 12 ++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java index 6fd7d2e1d8e..4ccc0b8c90c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; @@ -96,4 +97,62 @@ public void testFindUserFromRequest_SignedUrlTokenProvided_UserDoesNotExistForTh assertEquals(RESPONSE_MESSAGE_BAD_SIGNED_URL, wrappedUnauthorizedAuthErrorResponse.getMessage()); } + + // ---- End-to-end signature validation through the REAL SignedUrlAuthMechanism ---- + // These drive the actual validation path: sign a URL, then validate it the way the server does + // (URLDecoder.decode(requestUri) -> UrlSignerUtil.isValidUrl), proving a signed URL produced for + // an rdm-integration-style URL actually authenticates the user (and that tampering does not). + // No signing-secret is configured in this unit context, so the signing key is just the API token. + + private void givenUserWithSigningKey(String key) { + AuthenticationServiceBean authStub = Mockito.mock(AuthenticationServiceBean.class); + Mockito.when(authStub.getAuthenticatedUser(TEST_SIGNED_URL_USER_ID)).thenReturn(testAuthenticatedUser); + ApiToken apiToken = Mockito.mock(ApiToken.class); + Mockito.when(apiToken.getTokenString()).thenReturn(key); + Mockito.when(authStub.findApiTokenByUser(testAuthenticatedUser)).thenReturn(apiToken); + sut.authSvc = authStub; + } + + @Test + public void testEndToEnd_rawPersistentIdSignedUrl_authenticatesUser() throws WrappedAuthErrorResponse { + givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); + String base = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; + // The client signs this URL and requests it verbatim (a raw DOI is unchanged by URLDecoder.decode). + String signedUrl = UrlSignerUtil.signUrl(base, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + + ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, signedUrl); + + assertEquals(testAuthenticatedUser, sut.findUserFromRequest(request), + "a signed raw-DOI URL must authenticate end to end (decode + validate)"); + } + + @Test + public void testEndToEnd_escapedPersistentIdReconstructedRequest_authenticatesUser() throws WrappedAuthErrorResponse { + givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); + // The signing client un-escapes before signing, so the server signs the DECODED form... + String decodedBase = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; + String signedDecoded = UrlSignerUtil.signUrl(decodedBase, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + // ...but the actual request carries the ESCAPED persistentId (the URL the caller built). The + // server's URLDecoder.decode turns it back into the signed (decoded) form, so it validates. + String escapedBase = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi%3A10.5072%2FFK2%2FABC"; + String requestUri = escapedBase + signedDecoded.substring(decodedBase.length()); + + ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, requestUri); + + assertEquals(testAuthenticatedUser, sut.findUserFromRequest(request), + "an escaped persistentId must authenticate end to end (server signs decoded; validation decodes the request)"); + } + + @Test + public void testEndToEnd_tamperedSignedUrl_userNotAuthenticated() { + givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); + String base = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; + String signedUrl = UrlSignerUtil.signUrl(base, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + // Alter the signed portion of the URL after signing -> the signature must no longer validate. + String tampered = signedUrl.replace("FK2/ABC", "FK2/HACKED"); + + ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, tampered); + + assertThrows(WrappedUnauthorizedAuthErrorResponse.class, () -> sut.findUserFromRequest(request)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java index df37f6723d3..6a1a440f713 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java @@ -6,7 +6,11 @@ public class SignedUrlContainerRequestTestFake extends ContainerRequestTestFake private final UriInfo uriInfo; public SignedUrlContainerRequestTestFake(String signedUrlToken, String signedUrlUserId) { - this.uriInfo = new SignedUrlUriInfoTestFake(signedUrlToken, signedUrlUserId); + this(signedUrlToken, signedUrlUserId, null); + } + + public SignedUrlContainerRequestTestFake(String signedUrlToken, String signedUrlUserId, String requestUriOverride) { + this.uriInfo = new SignedUrlUriInfoTestFake(signedUrlToken, signedUrlUserId, requestUriOverride); } @Override diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java index fa9da7fc8de..1ac3f44ac17 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java @@ -15,18 +15,30 @@ public class SignedUrlUriInfoTestFake extends UriInfoTestFake { private final String signedUrlToken; private final String signedUrlUserId; + private final String requestUriOverride; private static final String SIGNED_URL_BASE_URL = "http://localhost:8080/api/test1"; private static final Integer SIGNED_URL_TIMEOUT = 1000; public SignedUrlUriInfoTestFake(String signedUrlToken, String signedUrlUserId) { + this(signedUrlToken, signedUrlUserId, null); + } + + // Lets a test supply the exact request URI the server would see (e.g. a signed URL for a + // specific base URL, or a reconstructed/escaped presentation), so the real validation path + // (URLDecoder.decode + UrlSignerUtil.isValidUrl) can be exercised end to end. + public SignedUrlUriInfoTestFake(String signedUrlToken, String signedUrlUserId, String requestUriOverride) { this.signedUrlToken = signedUrlToken; this.signedUrlUserId = signedUrlUserId; + this.requestUriOverride = requestUriOverride; } @Override public URI getRequestUri() { + if (requestUriOverride != null) { + return URI.create(requestUriOverride); + } return URI.create(UrlSignerUtil.signUrl(SIGNED_URL_BASE_URL, SIGNED_URL_TIMEOUT, signedUrlUserId, GET, signedUrlToken)); } From 9cb852a4aac5ade984b13c3ec3f10ec06c403144 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Thu, 4 Jun 2026 20:21:11 +0200 Subject: [PATCH 4/7] Clean up URL signing tests and comments --- .../edu/harvard/iq/dataverse/api/Admin.java | 3 +- .../iq/dataverse/util/UrlSignerUtil.java | 19 +-- .../api/auth/SignedUrlAuthMechanismTest.java | 94 +++++++++----- .../doubles/SignedUrlUriInfoTestFake.java | 5 +- .../iq/dataverse/util/UrlSignerUtilTest.java | 115 +----------------- 5 files changed, 73 insertions(+), 163 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 031a8a6845e..91d932d12d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2454,8 +2454,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur return error(Response.Status.FORBIDDEN, "Requesting signed URLs is restricted to superusers."); } - // A signing secret must be configured. Without it the signing key would fall back to just - // the user's API token, which is too weak. Fail fast rather than hand out weakly-signed URLs. + // Require a signing secret: without it the key is only the user's API token, which is too weak. if (JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("").isEmpty()) { return error(Response.Status.BAD_REQUEST, "Requesting signed URLs requires a signing secret to be configured. Please set the dataverse.api.signing-secret JVM option."); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 49ae548c9d0..9bc39598dbb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -43,12 +43,9 @@ public class UrlSignerUtil { */ public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - // Remove any reserved signing parameters ("until", "user", "method", "token", "key", - // "signed") that may already be present in the base URL, so they cannot be spoofed and so - // re-signing an already-signed URL does not accumulate them. This is done with exact string - // surgery (not URIBuilder) because the signature is a byte-exact MAC computed over the URL - // string: normalizing/re-encoding the URL here (e.g. percent-encoding ':' and '/' in DOIs, - // or handling spaces/unicode) would change the bytes that get hashed and break validation. + // Strip reserved signing params that may already be in the base URL. Done with exact-string + // surgery (not URIBuilder): the signature is a byte-exact MAC, so re-encoding the URL here + // (e.g. percent-encoding ':' and '/' in DOIs) would change the hashed bytes and break it. baseUrl = stripReservedParameters(baseUrl); boolean firstParam = !baseUrl.contains("?"); StringBuilder signedUrlBuilder = new StringBuilder(baseUrl); @@ -80,14 +77,8 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin } /** - * Removes any reserved signing parameters ("until", "user", "method", "token", "key", - * "signed") from the query string of the given URL while preserving the exact byte - * representation of the path and of every remaining query parameter. Unlike URIBuilder, this - * does not decode/re-encode the URL, which is required because {@link #signUrl} and - * {@link #isValidUrl} compute a byte-exact MAC over the URL string. - * - * @param baseUrl the URL to clean (may or may not contain a query string) - * @return the URL with reserved parameters removed, otherwise unchanged byte-for-byte + * Removes the reserved signing parameters from the query, preserving the exact bytes of the path + * and of every other parameter (unlike URIBuilder, which would re-encode and break the MAC). */ static String stripReservedParameters(String baseUrl) { int queryStart = baseUrl.indexOf('?'); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java index 4ccc0b8c90c..076894f4b10 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java @@ -12,6 +12,10 @@ import jakarta.ws.rs.container.ContainerRequestContext; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.util.List; + import static edu.harvard.iq.dataverse.api.auth.SignedUrlAuthMechanism.RESPONSE_MESSAGE_BAD_SIGNED_URL; import static org.junit.jupiter.api.Assertions.*; @@ -98,11 +102,9 @@ public void testFindUserFromRequest_SignedUrlTokenProvided_UserDoesNotExistForTh assertEquals(RESPONSE_MESSAGE_BAD_SIGNED_URL, wrappedUnauthorizedAuthErrorResponse.getMessage()); } - // ---- End-to-end signature validation through the REAL SignedUrlAuthMechanism ---- - // These drive the actual validation path: sign a URL, then validate it the way the server does - // (URLDecoder.decode(requestUri) -> UrlSignerUtil.isValidUrl), proving a signed URL produced for - // an rdm-integration-style URL actually authenticates the user (and that tampering does not). - // No signing-secret is configured in this unit context, so the signing key is just the API token. + // End-to-end validation through the REAL SignedUrlAuthMechanism (URLDecoder.decode + isValidUrl), + // which the isValidUrl-only tests in UrlSignerUtilTest do not exercise. No signing secret is + // configured here, so the signing key is just the API token. private void givenUserWithSigningKey(String key) { AuthenticationServiceBean authStub = Mockito.mock(AuthenticationServiceBean.class); @@ -114,45 +116,71 @@ private void givenUserWithSigningKey(String key) { } @Test - public void testEndToEnd_rawPersistentIdSignedUrl_authenticatesUser() throws WrappedAuthErrorResponse { + public void testEndToEnd_tamperedSignedUrl_userNotAuthenticated() { givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); String base = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; - // The client signs this URL and requests it verbatim (a raw DOI is unchanged by URLDecoder.decode). String signedUrl = UrlSignerUtil.signUrl(base, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + // Alter the signed portion of the URL after signing -> the signature must no longer validate. + String tampered = signedUrl.replace("FK2/ABC", "FK2/HACKED"); - ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, signedUrl); + ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, tampered); - assertEquals(testAuthenticatedUser, sut.findUserFromRequest(request), - "a signed raw-DOI URL must authenticate end to end (decode + validate)"); + assertThrows(WrappedUnauthorizedAuthErrorResponse.class, () -> sut.findUserFromRequest(request)); } - @Test - public void testEndToEnd_escapedPersistentIdReconstructedRequest_authenticatesUser() throws WrappedAuthErrorResponse { + // Runs the real rdm flow: un-escape, sign, request the original (encoded) URL + signature, then the + // server URL-decodes the request and checks it. Returns true iff the user authenticates. + private boolean validatesEndToEndAsRdmClient(String urlAsClientBuilds) { givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); - // The signing client un-escapes before signing, so the server signs the DECODED form... - String decodedBase = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; - String signedDecoded = UrlSignerUtil.signUrl(decodedBase, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); - // ...but the actual request carries the ESCAPED persistentId (the URL the caller built). The - // server's URLDecoder.decode turns it back into the signed (decoded) form, so it validates. - String escapedBase = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi%3A10.5072%2FFK2%2FABC"; - String requestUri = escapedBase + signedDecoded.substring(decodedBase.length()); - + String canonical = URLDecoder.decode(urlAsClientBuilds, StandardCharsets.UTF_8); + String signed = UrlSignerUtil.signUrl(canonical, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + String requestUri = urlAsClientBuilds + signed.substring(canonical.length()); ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, requestUri); - - assertEquals(testAuthenticatedUser, sut.findUserFromRequest(request), - "an escaped persistentId must authenticate end to end (server signs decoded; validation decodes the request)"); + try { + return testAuthenticatedUser.equals(sut.findUserFromRequest(request)); + } catch (WrappedAuthErrorResponse e) { + return false; + } } @Test - public void testEndToEnd_tamperedSignedUrl_userNotAuthenticated() { - givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); - String base = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; - String signedUrl = UrlSignerUtil.signUrl(base, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); - // Alter the signed portion of the URL after signing -> the signature must no longer validate. - String tampered = signedUrl.replace("FK2/ABC", "FK2/HACKED"); - - ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, tampered); - - assertThrows(WrappedUnauthorizedAuthErrorResponse.class, () -> sut.findUserFromRequest(request)); + public void testEndToEnd_allRdmIntegrationUrls_authenticate() { + final String s = "https://demo.dataverse.org"; + final String pid = "doi:10.5072/FK2/ABC"; // raw, as most rdm paths send it + final String escPid = "doi%3A10.5072%2FFK2%2FABC"; // url.QueryEscape form (GetDatasetMetadata, GetDatasetUserPermissions) + + // Every URL shape rdm-integration signs - each must authenticate end to end. + List urls = List.of( + // raw persistentId in the query (GetNodeMap, CheckPermission, globus, writes, dataverse plugin) + s + "/api/v1/datasets/:persistentId/versions/:latest/files?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId?persistentId=" + pid, + s + "/api/v1/admin/permissions/:persistentId?persistentId=" + pid + "&unblock-key=UNBLOCK", + s + "/api/v1/datasets/:persistentId/requestGlobusUploadPaths?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/addGlobusFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/requestGlobusDownload?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/monitorGlobusDownload?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/globusDownloadParameters?persistentId=" + pid + "&downloadId=globus-task-123", + s + "/api/v1/datasets/:persistentId/add?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/addFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/replaceFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/deleteFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/cleanStorage?persistentId=" + pid, + // url-escaped persistentId (GetDatasetMetadata, GetDatasetUserPermissions) + s + "/api/v1/datasets/:persistentId?persistentId=" + escPid + "&excludeFiles=true", + s + "/api/v1/datasets/:persistentId/userPermissions?persistentId=" + escPid, + // mydata/retrieve: url-escaped search term, '+' for spaces, repeated query params + s + "/api/v1/mydata/retrieve?selected_page=1&dvobject_types=Dataset" + + "&published_states=Published&published_states=Unpublished&published_states=Draft" + + "&role_ids=1&role_ids=6&mydata_search_term=text%3A%22hello+world%22", + // numeric-id / no-persistentId paths + s + "/api/v1/access/datafile/123/metadata/ddi", + s + "/api/v1/access/datafile/123", + s + "/api/v1/files/123", + s + "/api/v1/users/:me", + s + "/api/v1/datasets/42/versions/:latest?excludeFiles=true" + ); + for (String url : urls) { + assertTrue(validatesEndToEndAsRdmClient(url), "signed URL must authenticate end to end: " + url); + } } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java index 1ac3f44ac17..5edd31509f8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java @@ -25,9 +25,8 @@ public SignedUrlUriInfoTestFake(String signedUrlToken, String signedUrlUserId) { this(signedUrlToken, signedUrlUserId, null); } - // Lets a test supply the exact request URI the server would see (e.g. a signed URL for a - // specific base URL, or a reconstructed/escaped presentation), so the real validation path - // (URLDecoder.decode + UrlSignerUtil.isValidUrl) can be exercised end to end. + // Lets a test supply the exact request URI the server would see, to exercise the real + // URLDecoder.decode + isValidUrl validation path. public SignedUrlUriInfoTestFake(String signedUrlToken, String signedUrlUserId, String requestUriOverride) { this.signedUrlToken = signedUrlToken; this.signedUrlUserId = signedUrlUserId; diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java index 4ae6a8d3f79..94b76a9678b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -113,7 +113,8 @@ public void testSignAndValidateSpecialCharacters() { final String key = "abracadabara open sesame"; // DOIs (':' and '/'), pre-encoded values, spaces, unicode and embedded URLs must all sign - // and then validate when the signed URL is presented back verbatim (as the server returns it). + // byte-exact and be accepted by isValidUrl over those exact bytes (the signing primitive; + // end-to-end validation with the server's URLDecoder.decode is in SignedUrlAuthMechanismTest). String[] baseUrls = new String[] { "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar", "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi%3A10.5072%2FFK2%2FABC123", @@ -134,10 +135,8 @@ public void testSignAndValidateSpecialCharacters() { @Test public void testSignedUrlIsByteExact() { - // Documents and locks in the byte-exact contract: the signature is computed over the URL - // string exactly as provided. A client that re-encodes the URL before presenting it back - // (e.g. percent-encoding the DOI) must fail validation. This is the regression that broke - // signing for special characters when the URL was normalized via URIBuilder. + // Byte-exact contract: the signature is over the URL as provided, so a re-encoded variant + // must fail validation. This is the regression that URIBuilder normalization caused. final int longTimeout = 1000; final String user = "Alice"; final String method = "GET"; @@ -152,110 +151,4 @@ public void testSignedUrlIsByteExact() { assertFalse(UrlSignerUtil.isValidUrl(reEncoded, user, method, key), "a re-encoded variant must not validate (byte-exact contract)"); } - - /** - * Signs {@code baseUrl} and asserts that the signed URL is exactly the provided URL followed by - * a well-formed signature (until/user/method/token), and that it validates when presented back - * verbatim. Returns the signed URL so callers can make further (encoding) assertions on it. - */ - private static String assertSignedExactlyAndValid(String baseUrl, String user, String key) { - String signed = UrlSignerUtil.signUrl(baseUrl, 1000, user, "GET", key); - // What is signed is exactly what was provided (no re-encoding/normalization of the base URL)... - assertTrue(signed.startsWith(baseUrl), - "what is signed must be exactly what was provided: " + baseUrl + "\n got: " + signed); - // ...plus only the signature parameters, appended after it. - String signature = signed.substring(baseUrl.length()); - String separator = baseUrl.contains("?") ? "&" : "\\?"; - assertTrue(signature.matches(separator + "until=[^&]+&user=" + user + "&method=GET&token=[0-9a-f]{128}"), - "only the signature parameters may be appended after the provided URL: " + signature); - // ...and the signature validates when the signed URL is used verbatim (as the client does). - assertTrue(UrlSignerUtil.isValidUrl(signed, user, "GET", key), - "signed URL must validate when presented back verbatim: " + signed); - return signed; - } - - @Test - public void testSignAndValidateRdmIntegrationUrls() { - // Backend regression guard: sign every URL shape that rdm-integration sends through the - // signing client and confirm (a) what is signed is exactly the URL provided plus a valid - // signature, (b) the signature validates verbatim, and (c) the encoding is preserved exactly - // (escaped values stay escaped, raw values stay raw). URL templates mirror rdm-integration: - // plugin/impl/globus/streams.go, dataverse/dataverse_read.go, dataverse/dataverse_write.go, - // plugin/impl/dataverse/query.go. If the backend ever re-encodes the URL again (the original - // regression), these assertions fail. - final String user = "rdmUser"; - final String key = "abracadabara open sesame"; - final String s = "https://demo.dataverse.org"; - final String pid = "doi:10.5072/FK2/ABC123"; // raw, as most rdm paths send it - final String escPid = "doi%3A10.5072%2FFK2%2FABC123"; // url.QueryEscape form - - // Paths that send the persistentId RAW (GetNodeMap, CheckPermission, all globus flows, - // all write flows, the dataverse plugin). The ':' and '/' in the DOI must survive unchanged. - String[] rawPidUrls = new String[] { - s + "/api/v1/datasets/:persistentId/versions/:latest/files?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId?persistentId=" + pid, - s + "/api/v1/admin/permissions/:persistentId?persistentId=" + pid + "&unblock-key=UNBLOCK", - s + "/api/v1/datasets/:persistentId/requestGlobusUploadPaths?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/addGlobusFiles?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/requestGlobusDownload?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/monitorGlobusDownload?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/globusDownloadParameters?persistentId=" + pid + "&downloadId=globus-task-123", - s + "/api/v1/datasets/:persistentId/add?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/addFiles?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/replaceFiles?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/deleteFiles?persistentId=" + pid, - s + "/api/v1/datasets/:persistentId/cleanStorage?persistentId=" + pid, - }; - for (String baseUrl : rawPidUrls) { - String signed = assertSignedExactlyAndValid(baseUrl, user, key); - assertTrue(signed.contains("persistentId=" + pid), - "raw persistentId must stay raw (not percent-encoded): " + signed); - assertFalse(signed.contains(escPid), - "raw persistentId must not be escaped by the backend: " + signed); - } - - // Paths that send the persistentId URL-ESCAPED (GetDatasetMetadata, GetDatasetUserPermissions). - // The escaped value must remain escaped (the backend must not decode then re-encode it). - String[] escapedPidUrls = new String[] { - s + "/api/v1/datasets/:persistentId?persistentId=" + escPid + "&excludeFiles=true", - s + "/api/v1/datasets/:persistentId/userPermissions?persistentId=" + escPid, - }; - for (String baseUrl : escapedPidUrls) { - String signed = assertSignedExactlyAndValid(baseUrl, user, key); - assertTrue(signed.contains("persistentId=" + escPid), - "escaped persistentId must stay escaped: " + signed); - assertFalse(signed.contains("persistentId=" + pid), - "escaped persistentId must not be decoded to raw: " + signed); - } - - // mydata/retrieve: URL-escaped search term, '+' for spaces, and repeated query parameters. - String mydata = s + "/api/v1/mydata/retrieve?selected_page=1&dvobject_types=Dataset" - + "&published_states=Published&published_states=Unpublished&published_states=Draft" - + "&role_ids=1&role_ids=6&mydata_search_term=text%3A%22hello+world%22"; - String signedMydata = assertSignedExactlyAndValid(mydata, user, key); - assertTrue(signedMydata.contains("mydata_search_term=text%3A%22hello+world%22"), - "escaped search term (including '%3A', '%22' and '+') must be preserved exactly: " + signedMydata); - assertTrue(signedMydata.contains("published_states=Published&published_states=Unpublished&published_states=Draft"), - "repeated query parameters must be preserved: " + signedMydata); - - // Numeric-id / no-persistentId paths (datafile, files, numeric dataset id, users/:me). - String[] otherUrls = new String[] { - s + "/api/v1/access/datafile/123/metadata/ddi", - s + "/api/v1/access/datafile/123", - s + "/api/v1/files/123", - s + "/api/v1/users/:me", - s + "/api/v1/datasets/42/versions/:latest?excludeFiles=true", - }; - for (String baseUrl : otherUrls) { - assertSignedExactlyAndValid(baseUrl, user, key); - } - - // noSlashPermissionUrl sends an empty leading query segment ("?&unblock-key=..."); the backend - // drops the empty segment, so it does not round-trip byte-for-byte, but it still signs and - // validates (the client uses the returned signed URL verbatim). - String emptySegment = s + "/api/v1/admin/permissions/42?&unblock-key=UNBLOCK"; - String signedEmptySegment = UrlSignerUtil.signUrl(emptySegment, 1000, user, "GET", key); - assertTrue(UrlSignerUtil.isValidUrl(signedEmptySegment, user, "GET", key), - "permissions URL with an empty leading query segment must validate: " + signedEmptySegment); - } } From 942f3cf3ffc63ab7c93b23adaeb0423fe58a508a Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 8 Jun 2026 15:57:40 +0200 Subject: [PATCH 5/7] updated the release note --- doc/release-notes/fix-url-signing-special-characters.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/fix-url-signing-special-characters.md b/doc/release-notes/fix-url-signing-special-characters.md index 48fff1ae79d..f60763f2d05 100644 --- a/doc/release-notes/fix-url-signing-special-characters.md +++ b/doc/release-notes/fix-url-signing-special-characters.md @@ -22,4 +22,6 @@ configured, the endpoint now returns an error instead of issuing a weakly-signed **Upgrade note:** installations that use signed URLs through this endpoint (including the `rdm-integration` connector) must set `dataverse.api.signing-secret`. See the [Configuration Guide](https://guides.dataverse.org/en/latest/installation/config.html#dataverse-api-signing-secret). -Treat the value like a password. +Treat the value like a password. Because the signing secret is part of the signing key, setting (or +later changing) it invalidates previously issued signed URLs: any existing signed URLs that have not +yet expired will stop working, and clients/integrations will need to request new ones. From 4387ee97d05bd687280f6a4f9b76ebe9cd17447e Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 8 Jun 2026 16:33:52 +0200 Subject: [PATCH 6/7] Clarify URL-signing release note and comment --- .../fix-url-signing-special-characters.md | 33 +++++++++++-------- .../iq/dataverse/util/UrlSignerUtil.java | 8 +++-- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/doc/release-notes/fix-url-signing-special-characters.md b/doc/release-notes/fix-url-signing-special-characters.md index f60763f2d05..b547d64c540 100644 --- a/doc/release-notes/fix-url-signing-special-characters.md +++ b/doc/release-notes/fix-url-signing-special-characters.md @@ -1,23 +1,30 @@ ### Signed URLs work again for URLs with special characters Requesting a signed URL (e.g. via `/api/admin/requestSignedUrl`, used by external tools, the Globus -integration and third-party integrations) was broken for URLs whose query contained special -characters; most notably persistent IDs such as `doi:10.5072/FK2/ABC` (which contain `:` and -`/`), as well as spaces, percent-encoded values and non-ASCII characters. The signing logic had -started normalizing/re-encoding the URL before signing it, while the signature is a byte-exact MAC -over the URL string; the re-encoded bytes no longer matched what callers presented back, so -validation failed with "signature does not match"/authentication errors. +integration and third-party integrations such as the `rdm-integration` connector) was broken in 6.10 +for URLs whose query contained special characters — most notably persistent IDs such as +`doi:10.5072/FK2/ABC` (which contain `:` and `/`), as well as spaces, percent-encoded values and +non-ASCII characters. In 6.10 the signing step began re-encoding/normalizing the URL (for example +percent-encoding `:` and `/`) before computing the signature, while the request is validated against +the URL the caller actually presents back. The re-encoded signature no longer matched, so validation +failed with authentication / "signature does not match" errors. -Signing is now byte-exact again: the base URL is preserved exactly as provided (reserved signing -parameters are still stripped, but without re-encoding the rest of the URL). Clients must continue to -present the signed URL exactly as Dataverse returned it. +Signing no longer re-encodes the URL: it is signed exactly as provided, with only the reserved +signing parameters (`until`, `user`, `method`, `token`, `key`, `signed`) stripped out; the rest of +the URL is left untouched, character for character. + +**This restores the URL-signing behavior used before 6.10, so it is compatible with older versions +and with existing integrations.** Clients and connectors that build or consume signed URLs the way +they did before 6.10 keep working unchanged, signatures are computed the same way as before the +regression, and URLs containing special characters validate again. No client-side changes are +required. ### A signing secret is now required to request signed URLs -The `/api/admin/requestSignedUrl` endpoint now requires a non-empty signing secret -(`dataverse.api.signing-secret`) to be configured. Previously an unset secret silently fell back to -using only the user's API token as the signing key, which is too weak. If the secret is not -configured, the endpoint now returns an error instead of issuing a weakly-signed URL. +Separately from the fix above, the `/api/admin/requestSignedUrl` endpoint now requires a non-empty +signing secret (`dataverse.api.signing-secret`) to be configured. Previously an unset secret silently +fell back to using only the user's API token as the signing key, which is too weak. If the secret is +not configured, the endpoint now returns an error instead of issuing a weakly-signed URL. **Upgrade note:** installations that use signed URLs through this endpoint (including the `rdm-integration` connector) must set `dataverse.api.signing-secret`. See the diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 9bc39598dbb..2a918f5fb16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -43,9 +43,11 @@ public class UrlSignerUtil { */ public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - // Strip reserved signing params that may already be in the base URL. Done with exact-string - // surgery (not URIBuilder): the signature is a byte-exact MAC, so re-encoding the URL here - // (e.g. percent-encoding ':' and '/' in DOIs) would change the hashed bytes and break it. + // Strip reserved signing params that may already be in the base URL, using exact-string + // surgery rather than URIBuilder. The URL must be signed exactly as provided (the pre-6.10 + // behavior): validation reconstructs the signing string from the URL-decoded request, so + // re-encoding here (e.g. percent-encoding ':' and '/' in DOIs) would change the signed bytes + // and the signature would no longer match. baseUrl = stripReservedParameters(baseUrl); boolean firstParam = !baseUrl.contains("?"); StringBuilder signedUrlBuilder = new StringBuilder(baseUrl); From 8b1ab7b06b998f89be624070e39ad4cdef8f1b54 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 8 Jun 2026 16:43:02 +0200 Subject: [PATCH 7/7] Return 500 instead of 400 when signing secret is not configured --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 9985e60de70..e1ff9bb54fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2456,7 +2456,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur // Require a signing secret: without it the key is only the user's API token, which is too weak. if (JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("").isEmpty()) { - return error(Response.Status.BAD_REQUEST, + return error(Response.Status.INTERNAL_SERVER_ERROR, "Requesting signed URLs requires a signing secret to be configured. Please set the dataverse.api.signing-secret JVM option."); }