diff --git a/doc/release-notes/fix-url-signing-special-characters.md b/doc/release-notes/fix-url-signing-special-characters.md new file mode 100644 index 00000000000..b547d64c540 --- /dev/null +++ b/doc/release-notes/fix-url-signing-special-characters.md @@ -0,0 +1,34 @@ +### Signed URLs work again for URLs with special characters + +Requesting a signed URL (e.g. via `/api/admin/requestSignedUrl`, used by external tools, the Globus +integration and third-party integrations such as the `rdm-integration` connector) was broken in 6.10 +for URLs whose query contained special characters — most notably persistent IDs such as +`doi:10.5072/FK2/ABC` (which contain `:` and `/`), as well as spaces, percent-encoded values and +non-ASCII characters. In 6.10 the signing step began re-encoding/normalizing the URL (for example +percent-encoding `:` and `/`) before computing the signature, while the request is validated against +the URL the caller actually presents back. The re-encoded signature no longer matched, so validation +failed with authentication / "signature does not match" errors. + +Signing no longer re-encodes the URL: it is signed exactly as provided, with only the reserved +signing parameters (`until`, `user`, `method`, `token`, `key`, `signed`) stripped out; the rest of +the URL is left untouched, character for character. + +**This restores the URL-signing behavior used before 6.10, so it is compatible with older versions +and with existing integrations.** Clients and connectors that build or consume signed URLs the way +they did before 6.10 keep working unchanged, signatures are computed the same way as before the +regression, and URLs containing special characters validate again. No client-side changes are +required. + +### A signing secret is now required to request signed URLs + +Separately from the fix above, the `/api/admin/requestSignedUrl` endpoint now requires a non-empty +signing secret (`dataverse.api.signing-secret`) to be configured. Previously an unset secret silently +fell back to using only the user's API token as the signing key, which is too weak. If the secret is +not configured, the endpoint now returns an error instead of issuing a weakly-signed URL. + +**Upgrade note:** installations that use signed URLs through this endpoint (including the +`rdm-integration` connector) must set `dataverse.api.signing-secret`. See the +[Configuration Guide](https://guides.dataverse.org/en/latest/installation/config.html#dataverse-api-signing-secret). +Treat the value like a password. Because the signing secret is part of the signing key, setting (or +later changing) it invalidates previously issued signed URLs: any existing signed URLs that have not +yet expired will stop working, and clients/integrations will need to request new ones. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2a6fdfa324..ef28b54c651 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3349,9 +3349,10 @@ are time limited and only allow the action of the API call in the URL. See :ref: :ref:`api-native-signed-url` for more details. The key used to sign a URL is created from the API token of the creating user plus a signing-secret provided by an administrator. -**Using a signing-secret is highly recommended.** This setting defaults to an empty string. Using a non-empty -signing-secret makes it impossible for someone who knows an API token from forging signed URLs and provides extra security by -making the overall signing key longer. +**A non-empty signing-secret is required to request signed URLs through the API.** If it is not configured, the +``/api/admin/requestSignedUrl`` endpoint (see :ref:`api-native-signed-url`) returns an error instead of issuing a +weakly-signed URL. (The setting otherwise defaults to an empty string.) A non-empty signing-secret makes it impossible for +someone who only knows an API token to forge signed URLs, and provides extra security by making the overall signing key longer. **WARNING**: *Since the signing-secret is sensitive, you should treat it like a password.* diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index b24bf0ed6f6..86aff92e523 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -60,6 +60,7 @@ services: -Ddataverse.pid.fake.label=FakeDOIProvider -Ddataverse.pid.fake.authority=10.5072 -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.api.signing-secret=dev-only-signing-secret-change-me -Ddataverse.cors.origin=* \ -Ddataverse.cors.methods=GET,POST,PUT,DELETE,OPTIONS \ -Ddataverse.cors.headers.allow=range,content-type,x-dataverse-key,accept \ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 919fa7f67f9..e1ff9bb54fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2453,7 +2453,13 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur if (superuser == null || !superuser.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Requesting signed URLs is restricted to superusers."); } - + + // Require a signing secret: without it the key is only the user's API token, which is too weak. + if (JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("").isEmpty()) { + return error(Response.Status.INTERNAL_SERVER_ERROR, + "Requesting signed URLs requires a signing secret to be configured. Please set the dataverse.api.signing-secret JVM option."); + } + String userId = urlInfo.getString("user"); String key=null; if (userId != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 222d265a0c3..2a918f5fb16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -2,12 +2,10 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.NameValuePair; -import org.apache.http.client.utils.URIBuilder; import org.apache.http.client.utils.URLEncodedUtils; import org.joda.time.LocalDateTime; import java.net.MalformedURLException; -import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.List; @@ -45,19 +43,12 @@ public class UrlSignerUtil { */ public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - // check for reserved parameter names ("until","user", "method", or "token") - String[] urlQP = baseUrl.split("\\?"); - if (urlQP.length > 1) { - try { - URIBuilder uriBuilder = new URIBuilder(baseUrl); - List params = uriBuilder.getQueryParams(); - params.removeIf(pair -> reservedParameters.contains(pair.getName())); - uriBuilder.setParameters(params); - baseUrl = uriBuilder.build().toString(); - } catch (URISyntaxException e) { - logger.severe("Invalid URL for signing: " + baseUrl + " " + e.getMessage()); - } - } + // Strip reserved signing params that may already be in the base URL, using exact-string + // surgery rather than URIBuilder. The URL must be signed exactly as provided (the pre-6.10 + // behavior): validation reconstructs the signing string from the URL-decoded request, so + // re-encoding here (e.g. percent-encoding ':' and '/' in DOIs) would change the signed bytes + // and the signature would no longer match. + baseUrl = stripReservedParameters(baseUrl); boolean firstParam = !baseUrl.contains("?"); StringBuilder signedUrlBuilder = new StringBuilder(baseUrl); @@ -87,6 +78,32 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin return signedUrl; } + /** + * Removes the reserved signing parameters from the query, preserving the exact bytes of the path + * and of every other parameter (unlike URIBuilder, which would re-encode and break the MAC). + */ + static String stripReservedParameters(String baseUrl) { + int queryStart = baseUrl.indexOf('?'); + if (queryStart < 0) { + return baseUrl; + } + String path = baseUrl.substring(0, queryStart); + String query = baseUrl.substring(queryStart + 1); + StringBuilder kept = new StringBuilder(); + for (String pair : query.split("&")) { + int equals = pair.indexOf('='); + String name = (equals < 0) ? pair : pair.substring(0, equals); + if (reservedParameters.contains(name)) { + continue; + } + if (kept.length() > 0) { + kept.append('&'); + } + kept.append(pair); + } + return kept.length() > 0 ? path + "?" + kept : path; + } + /** * This method will only return true if the URL and parameters except the * "token" are unchanged from the original/match the values sent to this method, diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java index 6fd7d2e1d8e..076894f4b10 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanismTest.java @@ -5,12 +5,17 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; import jakarta.ws.rs.container.ContainerRequestContext; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.util.List; + import static edu.harvard.iq.dataverse.api.auth.SignedUrlAuthMechanism.RESPONSE_MESSAGE_BAD_SIGNED_URL; import static org.junit.jupiter.api.Assertions.*; @@ -96,4 +101,86 @@ public void testFindUserFromRequest_SignedUrlTokenProvided_UserDoesNotExistForTh assertEquals(RESPONSE_MESSAGE_BAD_SIGNED_URL, wrappedUnauthorizedAuthErrorResponse.getMessage()); } + + // End-to-end validation through the REAL SignedUrlAuthMechanism (URLDecoder.decode + isValidUrl), + // which the isValidUrl-only tests in UrlSignerUtilTest do not exercise. No signing secret is + // configured here, so the signing key is just the API token. + + private void givenUserWithSigningKey(String key) { + AuthenticationServiceBean authStub = Mockito.mock(AuthenticationServiceBean.class); + Mockito.when(authStub.getAuthenticatedUser(TEST_SIGNED_URL_USER_ID)).thenReturn(testAuthenticatedUser); + ApiToken apiToken = Mockito.mock(ApiToken.class); + Mockito.when(apiToken.getTokenString()).thenReturn(key); + Mockito.when(authStub.findApiTokenByUser(testAuthenticatedUser)).thenReturn(apiToken); + sut.authSvc = authStub; + } + + @Test + public void testEndToEnd_tamperedSignedUrl_userNotAuthenticated() { + givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); + String base = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC"; + String signedUrl = UrlSignerUtil.signUrl(base, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + // Alter the signed portion of the URL after signing -> the signature must no longer validate. + String tampered = signedUrl.replace("FK2/ABC", "FK2/HACKED"); + + ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, tampered); + + assertThrows(WrappedUnauthorizedAuthErrorResponse.class, () -> sut.findUserFromRequest(request)); + } + + // Runs the real rdm flow: un-escape, sign, request the original (encoded) URL + signature, then the + // server URL-decodes the request and checks it. Returns true iff the user authenticates. + private boolean validatesEndToEndAsRdmClient(String urlAsClientBuilds) { + givenUserWithSigningKey(TEST_SIGNED_URL_TOKEN); + String canonical = URLDecoder.decode(urlAsClientBuilds, StandardCharsets.UTF_8); + String signed = UrlSignerUtil.signUrl(canonical, 1000, TEST_SIGNED_URL_USER_ID, "GET", TEST_SIGNED_URL_TOKEN); + String requestUri = urlAsClientBuilds + signed.substring(canonical.length()); + ContainerRequestContext request = new SignedUrlContainerRequestTestFake(TEST_SIGNED_URL_TOKEN, TEST_SIGNED_URL_USER_ID, requestUri); + try { + return testAuthenticatedUser.equals(sut.findUserFromRequest(request)); + } catch (WrappedAuthErrorResponse e) { + return false; + } + } + + @Test + public void testEndToEnd_allRdmIntegrationUrls_authenticate() { + final String s = "https://demo.dataverse.org"; + final String pid = "doi:10.5072/FK2/ABC"; // raw, as most rdm paths send it + final String escPid = "doi%3A10.5072%2FFK2%2FABC"; // url.QueryEscape form (GetDatasetMetadata, GetDatasetUserPermissions) + + // Every URL shape rdm-integration signs - each must authenticate end to end. + List urls = List.of( + // raw persistentId in the query (GetNodeMap, CheckPermission, globus, writes, dataverse plugin) + s + "/api/v1/datasets/:persistentId/versions/:latest/files?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId?persistentId=" + pid, + s + "/api/v1/admin/permissions/:persistentId?persistentId=" + pid + "&unblock-key=UNBLOCK", + s + "/api/v1/datasets/:persistentId/requestGlobusUploadPaths?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/addGlobusFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/requestGlobusDownload?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/monitorGlobusDownload?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/globusDownloadParameters?persistentId=" + pid + "&downloadId=globus-task-123", + s + "/api/v1/datasets/:persistentId/add?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/addFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/replaceFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/deleteFiles?persistentId=" + pid, + s + "/api/v1/datasets/:persistentId/cleanStorage?persistentId=" + pid, + // url-escaped persistentId (GetDatasetMetadata, GetDatasetUserPermissions) + s + "/api/v1/datasets/:persistentId?persistentId=" + escPid + "&excludeFiles=true", + s + "/api/v1/datasets/:persistentId/userPermissions?persistentId=" + escPid, + // mydata/retrieve: url-escaped search term, '+' for spaces, repeated query params + s + "/api/v1/mydata/retrieve?selected_page=1&dvobject_types=Dataset" + + "&published_states=Published&published_states=Unpublished&published_states=Draft" + + "&role_ids=1&role_ids=6&mydata_search_term=text%3A%22hello+world%22", + // numeric-id / no-persistentId paths + s + "/api/v1/access/datafile/123/metadata/ddi", + s + "/api/v1/access/datafile/123", + s + "/api/v1/files/123", + s + "/api/v1/users/:me", + s + "/api/v1/datasets/42/versions/:latest?excludeFiles=true" + ); + for (String url : urls) { + assertTrue(validatesEndToEndAsRdmClient(url), "signed URL must authenticate end to end: " + url); + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java index df37f6723d3..6a1a440f713 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlContainerRequestTestFake.java @@ -6,7 +6,11 @@ public class SignedUrlContainerRequestTestFake extends ContainerRequestTestFake private final UriInfo uriInfo; public SignedUrlContainerRequestTestFake(String signedUrlToken, String signedUrlUserId) { - this.uriInfo = new SignedUrlUriInfoTestFake(signedUrlToken, signedUrlUserId); + this(signedUrlToken, signedUrlUserId, null); + } + + public SignedUrlContainerRequestTestFake(String signedUrlToken, String signedUrlUserId, String requestUriOverride) { + this.uriInfo = new SignedUrlUriInfoTestFake(signedUrlToken, signedUrlUserId, requestUriOverride); } @Override diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java index fa9da7fc8de..5edd31509f8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/doubles/SignedUrlUriInfoTestFake.java @@ -15,18 +15,29 @@ public class SignedUrlUriInfoTestFake extends UriInfoTestFake { private final String signedUrlToken; private final String signedUrlUserId; + private final String requestUriOverride; private static final String SIGNED_URL_BASE_URL = "http://localhost:8080/api/test1"; private static final Integer SIGNED_URL_TIMEOUT = 1000; public SignedUrlUriInfoTestFake(String signedUrlToken, String signedUrlUserId) { + this(signedUrlToken, signedUrlUserId, null); + } + + // Lets a test supply the exact request URI the server would see, to exercise the real + // URLDecoder.decode + isValidUrl validation path. + public SignedUrlUriInfoTestFake(String signedUrlToken, String signedUrlUserId, String requestUriOverride) { this.signedUrlToken = signedUrlToken; this.signedUrlUserId = signedUrlUserId; + this.requestUriOverride = requestUriOverride; } @Override public URI getRequestUri() { + if (requestUriOverride != null) { + return URI.create(requestUriOverride); + } return URI.create(UrlSignerUtil.signUrl(SIGNED_URL_BASE_URL, SIGNED_URL_TIMEOUT, signedUrlUserId, GET, signedUrlToken)); } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java index d92f8822e59..94b76a9678b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -8,6 +8,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -86,4 +87,68 @@ public void testSignAndValidateWithParams() { System.out.println(signedUrl3); assertTrue(signedUrl3.contains("&p2&")); // Show that this works with params that have no value } + + @Test + public void testStripReservedParametersPreservesSpecialCharacters() { + // The signature is a byte-exact MAC over the URL string, so removing the reserved signing + // params must not re-encode anything else: a DOI's ':' and '/' must survive unchanged. + String doiUrl = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar"; + assertEquals(doiUrl, UrlSignerUtil.stripReservedParameters(doiUrl)); + + // Reserved params (here token/user/signed) are removed; everything else is left byte-for-byte. + String withReserved = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&token=spoofed&user=Mallory&signed=true&foo=bar"; + assertEquals("http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar", + UrlSignerUtil.stripReservedParameters(withReserved)); + + // A URL with no query string is returned unchanged. + String noQuery = "http://localhost:8080/api/v1/datasets/:persistentId"; + assertEquals(noQuery, UrlSignerUtil.stripReservedParameters(noQuery)); + } + + @Test + public void testSignAndValidateSpecialCharacters() { + final int longTimeout = 1000; + final String user = "Alice"; + final String method = "GET"; + final String key = "abracadabara open sesame"; + + // DOIs (':' and '/'), pre-encoded values, spaces, unicode and embedded URLs must all sign + // byte-exact and be accepted by isValidUrl over those exact bytes (the signing primitive; + // end-to-end validation with the server's URLDecoder.decode is in SignedUrlAuthMechanismTest). + String[] baseUrls = new String[] { + "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123&foo=bar", + "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi%3A10.5072%2FFK2%2FABC123", + "http://localhost:8080/api/v1/search?q=hello%20world&persistentId=doi:10.1/2", + "http://localhost:8080/api/v1/search?q=hello world&pid=doi:10.1/2", + "http://localhost:8080/api/v1/search?q=café&name=測試", + "http://localhost:8080/api/v1/redirect?url=http%3A%2F%2Fexample.com%2Ff%3Fa%3D1%26b%3D2" + }; + for (String baseUrl : baseUrls) { + String signedUrl = UrlSignerUtil.signUrl(baseUrl, longTimeout, user, method, key); + // The base URL is preserved byte-for-byte in the signed URL (no re-encoding). + assertTrue(signedUrl.startsWith(baseUrl + "&"), + "base URL must be preserved byte-for-byte: " + signedUrl); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl, user, method, key), + "signed URL should validate when used verbatim: " + signedUrl); + } + } + + @Test + public void testSignedUrlIsByteExact() { + // Byte-exact contract: the signature is over the URL as provided, so a re-encoded variant + // must fail validation. This is the regression that URIBuilder normalization caused. + final int longTimeout = 1000; + final String user = "Alice"; + final String method = "GET"; + final String key = "abracadabara open sesame"; + + String baseUrl = "http://localhost:8080/api/v1/datasets/:persistentId?persistentId=doi:10.5072/FK2/ABC123"; + String signedUrl = UrlSignerUtil.signUrl(baseUrl, longTimeout, user, method, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl, user, method, key)); + + // Re-encoding ':' and '/' in the DOI changes the signed bytes, so it must be rejected. + String reEncoded = signedUrl.replace("doi:10.5072/FK2/ABC123", "doi%3A10.5072%2FFK2%2FABC123"); + assertFalse(UrlSignerUtil.isValidUrl(reEncoded, user, method, key), + "a re-encoded variant must not validate (byte-exact contract)"); + } }