From 8279e65a2df7f510d0012276acaeef8b38bb2273 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Jul 2025 10:56:57 +0200 Subject: [PATCH 01/69] feat: add `parse` method and unit tests for `SettingsServiceBean.Key` #11639 Implemented a method to parse strings into `SettingsServiceBean.Key` values, handling null and invalid inputs gracefully. Added corresponding unit tests to verify behavior and maintain consistency. --- .../settings/SettingsServiceBean.java | 32 +++++++++++++ .../settings/SettingsServiceBeanTest.java | 48 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index b323a9b7861..60d72785f9b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -694,6 +694,38 @@ Whether Harvesting (OAI) service is enabled public String toString() { return ":" + name(); } + + /** + * Parses the input string to match a corresponding {@code SettingsServiceBean.Key}. + * The method expects the input string to start with a colon (:) followed by the key name. + * If the key name matches one of the existing {@code SettingsServiceBean.Key} enumerations, + * the corresponding key is returned. The check is case-sensitive. + * + * @param key the input string in the format ":KeyName", where "KeyName" corresponds + * to the name of an enumeration in {@code SettingsServiceBean.Key}. + * If {@code key} is null, blank, does not start with a colon (:), or does not + * match any known key, the method returns {@code null}. + * @return the corresponding {@code SettingsServiceBean.Key} if the key matches one + * of the predefined keys, or {@code null} if no match is found. + */ + public static SettingsServiceBean.Key parse(String key) { + // Null safety and format check + if (key == null || key.isBlank() || key.charAt(0) != ':') return null; + + // Cut off the ":" we verified is present before + String normalizedKey = key.substring(1); + + // Iterate through all the known keys and return on match (case sensitive!) + // We are case sensitive here because Dataverse implicitely uses case sensitive keys everywhere! + for (SettingsServiceBean.Key k : SettingsServiceBean.Key.values()) { + if (k.name().equals(normalizedKey)) { + return k; + } + } + + // Fall through on no match + return null; + } } @PersistenceContext diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java new file mode 100644 index 00000000000..8d68f48a5ae --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -0,0 +1,48 @@ +package edu.harvard.iq.dataverse.settings; + +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class SettingsServiceBeanTest { + + @Nested + class KeyEnumTest { + static List parseTestParameters() { + return List.of( + Arguments.of(null, null), + Arguments.of("", null), + Arguments.of(" ", null), + Arguments.of("foobar", null), + Arguments.of("ShowMuteOptions", null), + Arguments.of(":FooBar", null), + Arguments.of(":ShowMuteOptions", SettingsServiceBean.Key.ShowMuteOptions) + ); + } + + @MethodSource("parseTestParameters") + @ParameterizedTest + void testParse(String sut, SettingsServiceBean.Key expected) { + assertEquals(expected, SettingsServiceBean.Key.parse(sut)); + } + + @Test + void testToString() { + // Make sure we test the intended behavior so it doesn't change by accident. + assertEquals(":ShowMuteOptions", SettingsServiceBean.Key.ShowMuteOptions.toString()); + } + + @Test + void testRoundtrip() { + for (SettingsServiceBean.Key key : SettingsServiceBean.Key.values()) { + assertEquals(key, SettingsServiceBean.Key.parse(key.toString())); + } + } + } +} \ No newline at end of file From 0a9575329a749bea856c1a34d2b5ff173680a9bd Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Jul 2025 11:09:06 +0200 Subject: [PATCH 02/69] feat: add `listAllWithoutLocalizations` method to filter settings without language data #11639 Replaced usage of `listAll` with the new `listAllWithoutLocalizations` method in various parts of the codebase for improved clarity and targeted queries. Added the corresponding named query `Setting.findAllWithoutLang`. Before, listAll was used in context without localization present or dropping the l10n details from outputs. For example, the Admin API to get all settings neglected to share the localized information, making one setting appear multiple times without the l10n information present. This is still subject to change, enabling the API endpoints to keep this information around. --- .../edu/harvard/iq/dataverse/EditDatafilesPage.java | 2 +- .../java/edu/harvard/iq/dataverse/SettingsWrapper.java | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- .../edu/harvard/iq/dataverse/settings/Setting.java | 2 ++ .../iq/dataverse/settings/SettingsServiceBean.java | 10 ++++++++-- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 3fa1c8b2c10..b7d28ceabfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -383,7 +383,7 @@ public String getHumanPerFormatTabularLimits() { public String populateHumanPerFormatTabularLimits() { String keyPrefix = ":TabularIngestSizeLimit:"; List formatLimits = new ArrayList<>(); - for (Setting setting : settingsService.listAll()) { + for (Setting setting : settingsService.listAllWithoutLocalizations()) { String name = setting.getName(); if (!name.startsWith(keyPrefix)) { continue; diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 3ff27699379..41fc605bfb3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -218,7 +218,7 @@ public Integer getInteger(String settingKey, Integer defaultValue) { private void initSettingsMap() { // initialize settings map settingsMap = new HashMap<>(); - for (Setting setting : settingsService.listAll()) { + for (Setting setting : settingsService.listAllWithoutLocalizations()) { settingsMap.put(setting.getName(), setting.getContent()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d55f582ecae..f6e622304d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -198,7 +198,7 @@ public class Admin extends AbstractApiBean { @GET public Response listAllSettings() { JsonObjectBuilder bld = jsonObjectBuilder(); - settingsSvc.listAll().forEach(s -> bld.add(s.getName(), s.getContent())); + settingsSvc.listAllWithoutLocalizations().forEach(s -> bld.add(s.getName(), s.getContent())); return ok(bld); } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index b1910a2fbb5..e429d685c3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -19,6 +19,8 @@ query="DELETE FROM Setting s WHERE s.name=:name AND s.lang IS NULL"), @NamedQuery( name="Setting.findAll", query="SELECT s FROM Setting s"), + @NamedQuery( name="Setting.findAllWithoutLang", + query="SELECT s FROM Setting s WHERE s.lang IS NULL"), @NamedQuery( name="Setting.findByName", query = "SELECT s FROM Setting s WHERE s.name=:name AND s.lang IS NULL" ), @NamedQuery( name="Setting.deleteByNameAndLang", diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 60d72785f9b..6239c38ad3f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1009,8 +1009,14 @@ public void delete( String name, String lang ) { .executeUpdate(); } - public Set listAll() { - return new HashSet<>(em.createNamedQuery("Setting.findAll", Setting.class).getResultList()); + /** + * Retrieves all settings that do not have any language localizations. + * This method uses a named query to fetch settings where the language field is null. + * + * @return a set of {@code Setting} objects that do not have language localizations. + */ + public Set listAllWithoutLocalizations() { + return new HashSet<>(em.createNamedQuery("Setting.findAllWithoutLang", Setting.class).getResultList()); } public Map getBaseMetadataLanguageMap(Map languageMap, boolean refresh) { From 54c3f6e9b9858ccfa5be09abc92c53b00e314092 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Jul 2025 14:54:58 +0200 Subject: [PATCH 03/69] feat: implement `listAllAsJson` for settings retrieval with localization support #11639 Replaced `listAllWithoutLocalizations` in the Admin API with the new `listAllAsJson` method, enabling inclusion of localized setting variants in API responses. Added OpenAPI annotations for improved documentation. Introduced comprehensive unit tests to validate behavior of localized and non-localized settings handling. --- .../edu/harvard/iq/dataverse/api/Admin.java | 17 +++- .../settings/SettingsServiceBean.java | 46 +++++++++++ .../settings/SettingsServiceBeanTest.java | 80 +++++++++++++++++++ 3 files changed, 139 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index f6e622304d8..69ed33f60dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -132,6 +132,11 @@ import jakarta.ws.rs.QueryParam; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.StreamingOutput; +import org.eclipse.microprofile.openapi.annotations.media.Content; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponse; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponses; + import java.nio.file.Paths; import java.util.TreeMap; @@ -193,13 +198,17 @@ public class Admin extends AbstractApiBean { public static final String listUsersPartialAPIPath = "list-users"; public static final String listUsersFullAPIPath = "/api/admin/" + listUsersPartialAPIPath; - + @Path("settings") @GET + @APIResponses({ + @APIResponse(responseCode = "200", + description = "All database options successfully queried", + // The schema may be extended later to better describe what the JSON object looks like. + content = @Content(schema = @Schema(implementation = JsonObject.class))), + }) public Response listAllSettings() { - JsonObjectBuilder bld = jsonObjectBuilder(); - settingsSvc.listAllWithoutLocalizations().forEach(s -> bld.add(s.getName(), s.getContent())); - return ok(bld); + return ok(settingsSvc.listAllAsJson()); } @Path("settings/{name}") diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 6239c38ad3f..48c37480e0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -8,8 +8,10 @@ import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Named; +import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; @@ -28,6 +30,7 @@ import java.util.StringTokenizer; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; /** * Service bean accessing a persistent hash map, used as settings in the application. @@ -1019,6 +1022,49 @@ public Set listAllWithoutLocalizations() { return new HashSet<>(em.createNamedQuery("Setting.findAllWithoutLang", Setting.class).getResultList()); } + /** + * Retrieves all available application settings as a JSON object. + * The method fetches settings from the database, organizes them into localized + * and non-localized entries, and builds a JSON representation of the dataset. + * Non-localized settings are added directly as key-value pairs, while localized + * settings are grouped under their associated keys with language-specific mappings. + * Note: settings may exist with both non-localized and localized variant. + * The non-localized variant will be added as "base" locale. + * + * @return a {@code JsonObject} containing all application settings, organized + * as key-value pairs for non-localized settings, or as sub-objects + * for settings with language localizations. + */ + public JsonObject listAllAsJson() { + Set settings = new HashSet<>(em.createNamedQuery("Setting.findAll", Setting.class).getResultList()); + + Set settingsWithL10n = settings.stream() + .filter(s -> s.getLang() != null) + .map(Setting::getName) + .collect(Collectors.toUnmodifiableSet()); + Map localizedSettings = new HashMap<>(); + + JsonObjectBuilder response = Json.createObjectBuilder(); + + // Iterate over all the settings and add them to the response. + settings.forEach(setting -> { + // Simple case: This settings is not localized, go ahead and add it. + if (!settingsWithL10n.contains(setting.getName())) { + response.add(setting.getName(), setting.getContent()); + // Localized case: We can't just add it, we need to have a sub-object. + // Also, we don't know the order of the settings or when all localized variants are done. + } else { + localizedSettings.computeIfAbsent(setting.getName(), name -> Json.createObjectBuilder()); + localizedSettings.get(setting.getName()) + .add(setting.getLang() == null ? "base" : setting.getLang(), setting.getContent()); + } + }); + + // We now know that we processed all settings, so add all the l10n builders at once. + localizedSettings.forEach(response::add); + return response.build(); + } + public Map getBaseMetadataLanguageMap(Map languageMap, boolean refresh) { if (languageMap == null || refresh) { languageMap = new HashMap(); diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index 8d68f48a5ae..a80b4786982 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -1,14 +1,22 @@ package edu.harvard.iq.dataverse.settings; +import jakarta.json.JsonObject; +import jakarta.persistence.EntityManager; +import jakarta.persistence.TypedQuery; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.ArgumentMatchers; +import java.util.Collections; import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; class SettingsServiceBeanTest { @@ -45,4 +53,76 @@ void testRoundtrip() { } } } + + @Nested + class ListAllAsJsonTest { + + static TypedQuery typedQuery = mock(TypedQuery.class); + static EntityManager em = mock(EntityManager.class); + static SettingsServiceBean settingsServiceBean = new SettingsServiceBean(); + + @BeforeAll + static void setup() { + settingsServiceBean.em = em; + + when(em.createNamedQuery( + ArgumentMatchers.eq("Setting.findAll"), + ArgumentMatchers.eq(Setting.class))) + .thenReturn(typedQuery); + } + + @Test + void testListAllAsJson_noSettings() { + // Given + List emptyList = Collections.emptyList(); + when(typedQuery.getResultList()).thenReturn(emptyList); + + // When + JsonObject result = settingsServiceBean.listAllAsJson(); + + // Then + assertEquals(0, result.size()); + } + + @Test + void testListAllAsJson_nonLocalizedSettings() { + // Given + List resultList = List.of( + new Setting("testKey1", "testValue1"), + new Setting("testKey2", "testValue2") + ); + when(typedQuery.getResultList()).thenReturn(resultList); + + // When + JsonObject result = settingsServiceBean.listAllAsJson(); + + // Then + assertEquals(2, result.size()); + assertEquals("testValue1", result.getString("testKey1")); + assertEquals("testValue2", result.getString("testKey2")); + } + + @Test + void testListAllAsJson_localizedSettings() { + // Given + List resultList = List.of( + new Setting("localizedKey", "value_base"), + new Setting("localizedKey", "en", "value_en"), + new Setting("localizedKey", "fr", "value_fr") + ); + when(typedQuery.getResultList()).thenReturn(resultList); + + // When + JsonObject result = settingsServiceBean.listAllAsJson(); + + // Then + assertEquals(1, result.size()); + JsonObject localizedSetting = result.getJsonObject("localizedKey"); + + assertEquals(3, localizedSetting.size()); + assertEquals("value_base", localizedSetting.getString("base")); + assertEquals("value_en", localizedSetting.getString("en")); + assertEquals("value_fr", localizedSetting.getString("fr")); + } + } } \ No newline at end of file From 0ebe683f97435a1350b0c6c1f93db486f8cec6ce Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Jul 2025 17:14:26 +0200 Subject: [PATCH 04/69] chore: add JaCoCo args placeholders to pom.xml This is because Intellij IDEA's MavenJUnitPatcher only correctly uses the maven-surefire-plugin if all interpolated properties are present. If a single property is missing, the will not be used by IntelliJ's test runner, which is horrible to debug. See also https://github.com/kreiger/idea-maven-test-profiles-argline --- pom.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pom.xml b/pom.xml index ceb5ea28d84..822eaaeb79d 100644 --- a/pom.xml +++ b/pom.xml @@ -21,6 +21,8 @@ false false integration + + From 684c7d1110b7fb7c760b3f54bca3bcb62eafc532 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Jul 2025 17:18:07 +0200 Subject: [PATCH 05/69] feat: replace tabular ingest size limits configuration using JSON #11639 Introduced new methods in `SystemConfig` to support tabular ingest size limits as JSON objects or single values. This replaces the old way of using colon-separated format suffixes with the setting name. Enhanced flexibility with format-specific or universal defaults. Updated `EditDatafilesPage` to populate human-readable format-specific limits. Added comprehensive unit tests to validate behavior, including edge cases for invalid configurations. --- .../iq/dataverse/EditDatafilesPage.java | 20 +-- .../iq/dataverse/util/SystemConfig.java | 131 +++++++++++++----- .../iq/dataverse/EditDatafilesPageTest.java | 64 +++++++++ .../iq/dataverse/util/SystemConfigTest.java | 88 +++++++++++- 4 files changed, 253 insertions(+), 50 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/EditDatafilesPageTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b7d28ceabfa..fd0f3be9871 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -81,6 +81,8 @@ import java.util.Collection; import java.util.Set; import java.util.logging.Level; +import java.util.stream.Collectors; + import jakarta.faces.event.AjaxBehaviorEvent; import jakarta.faces.event.FacesEvent; import jakarta.servlet.ServletOutputStream; @@ -381,19 +383,11 @@ public String getHumanPerFormatTabularLimits() { } public String populateHumanPerFormatTabularLimits() { - String keyPrefix = ":TabularIngestSizeLimit:"; - List formatLimits = new ArrayList<>(); - for (Setting setting : settingsService.listAllWithoutLocalizations()) { - String name = setting.getName(); - if (!name.startsWith(keyPrefix)) { - continue; - } - String tabularName = setting.getName().substring(keyPrefix.length()); - String bytes = setting.getContent(); - String humanReadableSize = FileSizeChecker.bytesToHumanReadable(Long.valueOf(bytes)); - formatLimits.add(tabularName + ": " + humanReadableSize); - } - return String.join(", ", formatLimits); + return systemConfig.getTabularIngestSizeLimits().entrySet().stream() + // The human-readable list shall not contain the setting for non-matching formats + .filter(entry -> ! entry.getKey().equals(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)) + .map(entry -> entry.getKey() + ": " + FileSizeChecker.bytesToHumanReadable(entry.getValue())) + .collect(Collectors.joining(", ")); } public Integer getFileUploadsAvailable() { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 69f9262ab5b..5061a0a70e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; +import jakarta.json.stream.JsonParsingException; import org.passay.CharacterRule; import jakarta.ejb.EJB; @@ -27,6 +28,7 @@ import java.net.UnknownHostException; import java.time.Year; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -488,49 +490,106 @@ public Integer getSearchHighlightFragmentSize() { } return null; } - - public long getTabularIngestSizeLimit() { - // This method will return the blanket ingestable size limit, if - // set on the system. I.e., the universal limit that applies to all - // tabular ingests, regardless of fromat: - - String limitEntry = settingsService.getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); - + + /** + * The default key used to identify tabular ingest size limits. + * This value represents the standard or fallback configuration. + * For any other valid format strings, see implementations of {@code TabularDataFileReader.getFormatName()}. + */ + public static final String TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY = "default"; + + /** + * Retrieves the tabular ingest size limits based on the system configuration. + * The size limits can be defined as a JSON object with format-specific limits, a single numeric value + * applied to all formats, or might not exist, in which case the default limit is applied. + * + * Note that the format names in the configuration will be transformed to lowercase for user convenience + * of how people like to write their formats best. + * + * If the configuration contains invalid data (e.g., unparsable JSON or non-numeric values), + * all tabular ingest operations are disabled by setting size limits to 0. + * + * TODO: At some later point, if and when the DB lookups or JSON parsing takes a toll to heavy to bear, + * we may introduce a caching singleton for these. (With TTL or using events to invalidate on update.) + * + * @return a map where the keys represent format names or a default key, and the values represent the maximum allowed size for each format. + */ + public Map getTabularIngestSizeLimits() { + String limitEntry = settingsService.getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); if (limitEntry != null) { - try { - Long sizeOption = Long.valueOf(limitEntry); - return sizeOption; - } catch (NumberFormatException nfe) { - logger.warning("Invalid value for TabularIngestSizeLimit option? - " + limitEntry); + // Case A: the setting is using JSON to support multiple formats + if (limitEntry.trim().startsWith("{")) { + try { + JsonObject limits = Json.createReader(new StringReader(limitEntry)).readObject(); + + Map limitsMap = new HashMap<>(); + // We add the default in case the JSON does not contain the default (which is optional). + limitsMap.put(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, -1L); + + for (String formatName : limits.keySet()) { + // We deliberatly do not validate the formatNames here for backward compatibility. + // But we transform to lowercase here, so the casing doesn't matter for lookups. + String lowercaseFormatName = formatName.toLowerCase(); + + try { + Long sizeOption = Long.valueOf(limits.getString(formatName)); + limitsMap.put(lowercaseFormatName, sizeOption); + } catch (NumberFormatException nfe) { + logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe); + logger.warning("Disabling all tabular ingest completely until fixed!"); + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); + } + } + + return Collections.unmodifiableMap(limitsMap); + } catch (JsonParsingException e) { + logger.warning("Invalid TabularIngestSizeLimit option found, cannot parse JSON: " + e.getMessage()); + logger.warning("Disabling all tabular ingest completely until fixed!"); + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); + } + // Case B: It might be just a simple Long, providing a default for all formats. + } else { + try { + Long limit = Long.valueOf(limitEntry); + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, limit); + } catch (NumberFormatException nfe) { + logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe); + logger.warning("Disabling all tabular ingest completely until fixed!"); + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); + } } } - // -1 means no limit is set; - // 0 on the other hand would mean that ingest is fully disabled for - // tabular data. - return -1; + + // Default is not to limit at all + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, -1L); } + /** + * This method will return the blanket ingestable size limit, if set on the system. + * I.e., the universal limit that applies to all tabular ingests, regardless of fromat. + * @return -1 = unlimited if not set, 0 if disabled or invalid, some long number of bytes otherwise + */ + public long getTabularIngestSizeLimit() { + return getTabularIngestSizeLimits().get(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY); + } + + /** + * Retrieves the size limit for tabular data ingestion based on the provided format name. + * The format name will be converted to lowercase, making sure the casing doesn't matter. + * + * @param formatName The name of the format for which the size limit is requested + * See also implementations of {@code TabularDataFileReader.getFormatName()} for examples. + * @return The size limit in bytes for tabular data ingestion associated with the specified format name, + * or the default size limit if no format-specific limit is found or its name is invalid (null, blank, ...). + * -1 = unlimited if not set, 0 if disabled or invalid, some long number of bytes otherwise + */ public long getTabularIngestSizeLimit(String formatName) { - // This method returns the size limit set specifically for this format name, - // if available, otherwise - the blanket limit that applies to all tabular - // ingests regardless of a format. - - if (formatName == null || formatName.equals("")) { - return getTabularIngestSizeLimit(); + if (formatName != null && !formatName.isBlank()) { + // We convert to lowercase so it doesn't matter which variant someone uses in the JSON config + String convertedFormatName = formatName.toLowerCase(); + return getTabularIngestSizeLimits().getOrDefault(convertedFormatName, getTabularIngestSizeLimit()); } - - String limitEntry = settingsService.get(SettingsServiceBean.Key.TabularIngestSizeLimit.toString() + ":" + formatName); - - if (limitEntry != null) { - try { - Long sizeOption = Long.valueOf(limitEntry); - return sizeOption; - } catch (NumberFormatException nfe) { - logger.warning("Invalid value for TabularIngestSizeLimit:" + formatName + "? - " + limitEntry ); - } - } - - return getTabularIngestSizeLimit(); + return getTabularIngestSizeLimit(); } public boolean isOAIServerEnabled() { diff --git a/src/test/java/edu/harvard/iq/dataverse/EditDatafilesPageTest.java b/src/test/java/edu/harvard/iq/dataverse/EditDatafilesPageTest.java new file mode 100644 index 00000000000..11578b71f0e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/EditDatafilesPageTest.java @@ -0,0 +1,64 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.util.SystemConfig; +import org.junit.jupiter.api.Test; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +class EditDatafilesPageTest { + + @InjectMocks + private EditDatafilesPage editDatafilesPage; + + @Mock + private SystemConfig systemConfig; + + public EditDatafilesPageTest() { + MockitoAnnotations.openMocks(this); + } + + @Test + void testPopulateHumanPerFormatTabularLimits_WithEmptyLimits() { + Map tabularLimits = new HashMap<>(); + when(systemConfig.getTabularIngestSizeLimits()).thenReturn(tabularLimits); + + String result = editDatafilesPage.populateHumanPerFormatTabularLimits(); + + assertEquals("", result, "Expected no formatted limits when the map is empty"); + } + + @Test + void testPopulateHumanPerFormatTabularLimits_WithNonDefaultLimits() { + Map tabularLimits = new HashMap<>(); + tabularLimits.put("csv", 10485760L); // 10MB + tabularLimits.put("tsv", 5242880L); // 5MB + when(systemConfig.getTabularIngestSizeLimits()).thenReturn(tabularLimits); + + String result = editDatafilesPage.populateHumanPerFormatTabularLimits(); + + assertTrue(result.contains("csv: 10.0 MB"), "Expected CSV limit in human-readable format, but got: " + result); + assertTrue(result.contains("tsv: 5.0 MB"), "Expected TSV limit in human-readable format, but got: " + result); + } + + @Test + void testPopulateHumanPerFormatTabularLimits_WithDefaultKey() { + Map tabularLimits = new HashMap<>(); + tabularLimits.put(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 2097152L); // 2MB + tabularLimits.put("csv", 10485760L); // 10MB + when(systemConfig.getTabularIngestSizeLimits()).thenReturn(tabularLimits); + + String result = editDatafilesPage.populateHumanPerFormatTabularLimits(); + + assertTrue(result.contains("csv: 10.0 MB"), "Expected CSV limit in human-readable format, but got: " + result); + assertFalse(result.contains("default"), "Default key should be excluded from the output"); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 82b89bca678..f50b6023480 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -12,6 +12,8 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import java.util.Map; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.doReturn; @@ -142,5 +144,89 @@ void testGetThumbnailSizeLimit() { assertEquals(1000000l, SystemConfig.getThumbnailSizeLimit("PDF")); assertEquals(0l, SystemConfig.getThumbnailSizeLimit("NoSuchType")); } - + + @Test + void testGetTabularIngestSizeLimitsWithoutSetting() { + // given + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(1, result.size()); + assertEquals(-1L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + } + + @Test + void testGetTabularIngestSizeLimitsWithValidJson() { + // given + String validJson = "{\"csV\": \"5000\", \"tSv\": \"10000\"}"; + doReturn(validJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(3, result.size()); + assertEquals(-1L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + assertEquals(5000L, result.get("csv")); + assertEquals(10000L, result.get("tsv")); + } + + @Test + void testGetTabularIngestSizeLimitsWithSingleValue() { + // given + String singleValue = "8000"; + doReturn(singleValue).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(1, result.size()); + assertEquals(8000L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + } + + @Test + void testGetTabularIngestSizeLimitsWithSingleInvalidValue() { + // given + String singleValue = "this-aint-no-number"; + doReturn(singleValue).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(1, result.size()); + assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + } + + @Test + void testGetTabularIngestSizeLimitsWithInvalidJson() { + // given + String invalidJson = "{invalid:}"; + doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(1, result.size()); + assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + } + + @Test + void testGetTabularIngestSizeLimitsWithInvalidNumberInValidJson() { + // given + String invalidJson = "{\"csv\": \"this-is-not-a-number\", \"tSv\": \"10000\"}"; + doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(1, result.size()); + assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + } } From b11b722ed75f0feb19858a3d2bb264ecf269d62d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 15 Jul 2025 17:52:55 +0200 Subject: [PATCH 06/69] feat(db): migrate TabularIngestSizeLimit settings to JSON format with Flyway #11639 Implemented migration script to replace old format-specific TabularIngestSizeLimit settings with a unified JSON-based structure. Validates and handles non-numeric values, ensures backward compatibility, and cleans up obsolete settings. --- src/main/resources/db/migration/V6.7.0.1.sql | 78 ++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/main/resources/db/migration/V6.7.0.1.sql diff --git a/src/main/resources/db/migration/V6.7.0.1.sql b/src/main/resources/db/migration/V6.7.0.1.sql new file mode 100644 index 00000000000..733a10d951f --- /dev/null +++ b/src/main/resources/db/migration/V6.7.0.1.sql @@ -0,0 +1,78 @@ +-- This script migrates the old TabularIngestSizeLimit database setting using format suffixes to a JSON based approach. + +DO $$ + DECLARE + base_setting_content TEXT; + format_settings_cursor CURSOR FOR + SELECT name, content + FROM Setting + WHERE name LIKE ':TabularIngestSizeLimit:%' + AND lang IS NULL; + format_record RECORD; + format_name TEXT; + format_value BIGINT; + json_object JSONB := '{}'; + has_format_settings BOOLEAN := FALSE; + warning_message TEXT; + BEGIN + -- Check if there are any format-specific settings + SELECT EXISTS( + SELECT 1 FROM Setting + WHERE name LIKE ':TabularIngestSizeLimit:%' + AND lang IS NULL + ) INTO has_format_settings; + + -- Only proceed if we have format-specific settings + IF NOT has_format_settings THEN + RAISE NOTICE 'No format-specific TabularIngestSizeLimit settings found. Skipping migration.'; + RETURN; + END IF; + + -- Get the base setting (without format suffix) if it exists + SELECT content INTO base_setting_content + FROM Setting + WHERE name = ':TabularIngestSizeLimit' + AND lang IS NULL; + + -- Add base setting to JSON object if it exists + IF base_setting_content IS NOT NULL THEN + -- Validate that base setting is numeric + BEGIN + format_value := base_setting_content::BIGINT; + json_object := json_object || jsonb_build_object('default', format_value); + EXCEPTION WHEN invalid_text_representation THEN + RAISE WARNING 'Base TabularIngestSizeLimit setting contains non-numeric value: %. Setting to 0 (disabling ingest!).', base_setting_content; + json_object := json_object || jsonb_build_object('default', 0); + END; + END IF; + + -- Process format-specific settings + FOR format_record IN format_settings_cursor LOOP + -- Extract format name (everything after ":TabularIngestSizeLimit:") + format_name := substring(format_record.name from ':TabularIngestSizeLimit:(.*)'); + + -- Validate and convert the content to numeric + BEGIN + format_value := format_record.content::BIGINT; + json_object := json_object || jsonb_build_object(format_name, format_value); + EXCEPTION WHEN invalid_text_representation THEN + warning_message := format('Format-specific TabularIngestSizeLimit setting %s contains non-numeric value: %s. Setting to 0 (disabling ingest!).', + format_record.name, format_record.content); + RAISE WARNING '%', warning_message; + json_object := json_object || jsonb_build_object(format_name, 0); + END; + END LOOP; + + -- Insert or update the new JSON-based setting + INSERT INTO Setting (name, content, lang) + VALUES (':TabularIngestSizeLimit', json_object::TEXT, NULL) + ON CONFLICT (name) WHERE lang IS NULL + DO UPDATE SET content = EXCLUDED.content; + + -- Delete all format-specific settings + DELETE FROM Setting + WHERE name LIKE ':TabularIngestSizeLimit:%' + AND lang IS NULL; + + RAISE NOTICE 'Successfully migrated TabularIngestSizeLimit settings to JSON format: %', json_object::TEXT; + END $$; \ No newline at end of file From 92adb8ada567a73f67605e4618a8642c18db6329 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 16 Jul 2025 11:00:58 +0200 Subject: [PATCH 07/69] docs: improve TabularIngestSizeLimit documentation with JSON examples and defaults #11639 Refined the explanation of tabular ingest size limits, introduced examples for JSON configuration, clarified default behavior, and updated guidance on per-format overrides. --- .../source/installation/config.rst | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 14bf33c9482..63eb4612f7f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4373,30 +4373,45 @@ In the UI, users trying to download a zip file larger than the Dataverse install :TabularIngestSizeLimit +++++++++++++++++++++++ -Threshold in bytes for limiting whether or not "ingest" it attempted for tabular files (which can be resource intensive). For example, with the below in place, files greater than 2 GB in size will not go through the ingest process: +Threshold in bytes for limiting whether or not "ingest" is attempted for an uploaded tabular file (which can be resource intensive). +For example, with the below in place, files greater than 2 GB in size will not go through the ingest process: ``curl -X PUT -d 2000000000 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` -(You can set this value to 0 to prevent files from being ingested at all.) +You can set this value to ``0`` to prevent files from being ingested at all. +The default is ``-1``, meaning no file size limit is applied. -You can override this global setting on a per-format basis for the following formats: +Using a JSON-based setting, you can override this global setting on a per-format basis for the following formats: - DTA - POR - SAV - Rdata - CSV -- XLSX (in lower-case) +- XLSX -For example : +The JSON follows this form, all fields optional: -* if you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB, use this setting: +.. code:: json -``curl -X PUT -d 1000000 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit:Rdata`` + { + "default": -1, + "formatX": 0, + "formatY": 10, + "formatZ": 100 + } + +The ``default`` key represents the global default, with it being absent meaning the global default of ``-1`` applies. +Add a format name (as listed above) to change the limit for this particular format. + +Examples: + +1. If you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB but otherwise unlimited: -* if you want your Dataverse installation to not attempt to ingest XLSX files at all, use this setting: + ``curl -X PUT -d '{"Rdata":1000000}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` +2. If you want your Dataverse installation to not attempt to ingest XLSX files at all and apply a global limit of 512 MiB, use this setting: -``curl -X PUT -d 0 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit:xlsx`` + ``curl -X PUT -d '{"default":536870912, "XSLX":0}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` :ZipUploadFilesLimit ++++++++++++++++++++ From b17e52a444f09186cd20ba57c4c22204fb0343c1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 16 Jul 2025 12:06:39 +0200 Subject: [PATCH 08/69] fix: handle unsupported JSON integers in tabular ingest size limits #11639 Updated warnings and validation logic to enforce string literals for size limits. Added a new test to ensure proper handling of JSON configurations with unsupported integer types for tabular ingest size limits. Improved related documentation for clarity. --- doc/sphinx-guides/source/installation/config.rst | 15 ++++++++------- .../harvard/iq/dataverse/util/SystemConfig.java | 6 +++++- .../iq/dataverse/util/SystemConfigTest.java | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 63eb4612f7f..0541f9f301d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4395,23 +4395,24 @@ The JSON follows this form, all fields optional: .. code:: json { - "default": -1, - "formatX": 0, - "formatY": 10, - "formatZ": 100 + "default": "-1", + "formatX": "0", + "formatY": "10", + "formatZ": "100" } -The ``default`` key represents the global default, with it being absent meaning the global default of ``-1`` applies. +The ``default`` key represents the global default (with it being absent meaning the implicit global default of ``-1`` applies). Add a format name (as listed above) to change the limit for this particular format. +Any size limits must be provided as string literals (in quotes), not number literals! Examples: 1. If you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB but otherwise unlimited: - ``curl -X PUT -d '{"Rdata":1000000}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` + ``curl -X PUT -d '{"Rdata":"1000000"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` 2. If you want your Dataverse installation to not attempt to ingest XLSX files at all and apply a global limit of 512 MiB, use this setting: - ``curl -X PUT -d '{"default":536870912, "XSLX":0}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` + ``curl -X PUT -d '{"default":"536870912", "XSLX":"0"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` :ZipUploadFilesLimit ++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 5061a0a70e7..71f24b1fe3a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -534,8 +534,12 @@ public Map getTabularIngestSizeLimits() { try { Long sizeOption = Long.valueOf(limits.getString(formatName)); limitsMap.put(lowercaseFormatName, sizeOption); + } catch (ClassCastException cce) { + logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long from JSON integer. You must provide the long number as string (use quotes) for format " + formatName); + logger.warning("Disabling all tabular ingest completely until fixed!"); + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); } catch (NumberFormatException nfe) { - logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe); + logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a number)"); logger.warning("Disabling all tabular ingest completely until fixed!"); return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index f50b6023480..06026962d2c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -202,6 +202,20 @@ void testGetTabularIngestSizeLimitsWithSingleInvalidValue() { assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); } + @Test + void testGetTabularIngestSizeLimitsWithJsonButUnsupportedJsonInt() { + // given + String invalidJson = "{\"default\": 0}"; + doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(1, result.size()); + assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + } + @Test void testGetTabularIngestSizeLimitsWithInvalidJson() { // given From 493239652eae37a84ed2237e89234be792601d99 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 16 Jul 2025 17:14:41 +0200 Subject: [PATCH 09/69] refactor: simplify `listAllAsJson` method and update corresponding test #11639 Streamlined the logic of `listAllAsJson` to use a flattened structure for localized settings, improving maintainability and backward compatibility. Updated test assertions in `SettingsServiceBeanTest` to reflect the new structure. Added enhanced method documentation. --- .../settings/SettingsServiceBean.java | 64 ++++++++++--------- .../settings/SettingsServiceBeanTest.java | 11 ++-- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 48c37480e0e..0f3d9557e6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1016,52 +1016,56 @@ public void delete( String name, String lang ) { * Retrieves all settings that do not have any language localizations. * This method uses a named query to fetch settings where the language field is null. * - * @return a set of {@code Setting} objects that do not have language localizations. + * @return a set of {@link Setting} objects that do not have language localizations. */ public Set listAllWithoutLocalizations() { return new HashSet<>(em.createNamedQuery("Setting.findAllWithoutLang", Setting.class).getResultList()); } /** - * Retrieves all available application settings as a JSON object. - * The method fetches settings from the database, organizes them into localized - * and non-localized entries, and builds a JSON representation of the dataset. - * Non-localized settings are added directly as key-value pairs, while localized - * settings are grouped under their associated keys with language-specific mappings. - * Note: settings may exist with both non-localized and localized variant. - * The non-localized variant will be added as "base" locale. + * Retrieves all settings from the database and converts them into a JSON object. + * Each setting is represented as a key-value pair in the JSON object. The key + * is the setting name, optionally appended with the language if the setting is + * language-specific, while the value corresponds to the setting's content. * - * @return a {@code JsonObject} containing all application settings, organized - * as key-value pairs for non-localized settings, or as sub-objects - * for settings with language localizations. + * @return A {@link JsonObject} containing all settings from the database, structured + * with their names (and languages, if applicable) as keys and their + * respective contents as values. + * Shortened Example: + * + * { + * ":FilePIDsEnabled": "false", + * ":ApplicationTermsOfUse": "Non-localized default / fallback terms.", + * ":ApplicationTermsOfUse/lang/fr": "Il s'agit de termes localisés en français.", + * ":MaxFileUploadSizeInBytes": { + * "default": "2147483648", + * "fileOne": "4000000000", + * "s3": "8000000000" + * } + * } + * + * + * @implNote The reason to use a flattened approach for the localized settings is to stay backward compatible. + * Per good practice, a bulk operation should be a composite of the single operation. + * As you need to provide the language parameter to query or put them single, the localization is not + * part of the content model, but of the {@link Setting} data model. Using a JSON sub-object or using + * a separated approach is possible, but adds additional complexity. In case of the sub-object it even + * violates that the value you retrieve from the bulk operation can be used for a single operation again. + * As long as we do not update our content model, but store the language as part of the data model, + * this flattening seems to be the most balanced compromise. */ public JsonObject listAllAsJson() { Set settings = new HashSet<>(em.createNamedQuery("Setting.findAll", Setting.class).getResultList()); - - Set settingsWithL10n = settings.stream() - .filter(s -> s.getLang() != null) - .map(Setting::getName) - .collect(Collectors.toUnmodifiableSet()); - Map localizedSettings = new HashMap<>(); - JsonObjectBuilder response = Json.createObjectBuilder(); // Iterate over all the settings and add them to the response. settings.forEach(setting -> { - // Simple case: This settings is not localized, go ahead and add it. - if (!settingsWithL10n.contains(setting.getName())) { - response.add(setting.getName(), setting.getContent()); - // Localized case: We can't just add it, we need to have a sub-object. - // Also, we don't know the order of the settings or when all localized variants are done. - } else { - localizedSettings.computeIfAbsent(setting.getName(), name -> Json.createObjectBuilder()); - localizedSettings.get(setting.getName()) - .add(setting.getLang() == null ? "base" : setting.getLang(), setting.getContent()); - } + response.add( + setting.getName() + (setting.getLang() == null ? "" : "/lang/"+setting.getLang()), + setting.getContent() + ); }); - // We now know that we processed all settings, so add all the l10n builders at once. - localizedSettings.forEach(response::add); return response.build(); } diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index a80b4786982..47feea5a2fa 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -116,13 +116,10 @@ void testListAllAsJson_localizedSettings() { JsonObject result = settingsServiceBean.listAllAsJson(); // Then - assertEquals(1, result.size()); - JsonObject localizedSetting = result.getJsonObject("localizedKey"); - - assertEquals(3, localizedSetting.size()); - assertEquals("value_base", localizedSetting.getString("base")); - assertEquals("value_en", localizedSetting.getString("en")); - assertEquals("value_fr", localizedSetting.getString("fr")); + assertEquals(3, result.size()); + assertEquals("value_base", result.getString("localizedKey")); + assertEquals("value_en", result.getString("localizedKey/lang/en")); + assertEquals("value_fr", result.getString("localizedKey/lang/fr")); } } } \ No newline at end of file From d153cacbd3dd311fecc55f90fabc206b002982d0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 16 Jul 2025 17:26:31 +0200 Subject: [PATCH 10/69] feat: add validation for settings names and language codes in Admin Settings API #11639 Introduced validation logic for setting names and ISO 639-1 language codes across Admin API endpoints. Ensures meaningful error messages for invalid inputs, improving robustness and user feedback. Refactored related methods accordingly. Added missing GET endpoint for localized settings. --- .../edu/harvard/iq/dataverse/api/Admin.java | 86 ++++++++++++++++--- 1 file changed, 74 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 69ed33f60dc..6add116834d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -70,6 +70,7 @@ import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.function.Predicate; @@ -210,42 +211,103 @@ public class Admin extends AbstractApiBean { public Response listAllSettings() { return ok(settingsSvc.listAllAsJson()); } + + private void validateSettingName(String name) throws IllegalArgumentException { + if (SettingsServiceBean.Key.parse(name) == null) { + // If there is more than one colon, this may be someone trying to use the old suffix settings. + // Change the error message for that slightly. + if (name.replace(":","").length() < name.length() - 1) { + throw new IllegalArgumentException("The name of the setting may not have a colon separated suffix since Dataverse 6.8. Please update your scripts."); + } + throw new IllegalArgumentException("The name of the setting is required."); + } + } + + private void validateSettingLang(String lang) throws IllegalArgumentException { + if (lang == null || lang.length() != 2 || !Arrays.asList(Locale.getISOLanguages()).contains(lang)) { + throw new IllegalArgumentException("The language '" + lang + "' is not a valid ISO 639-1 language code."); + } + } @Path("settings/{name}") @PUT public Response putSetting(@PathParam("name") String name, String content) { - Setting s = settingsSvc.set(name, content); - return ok(jsonObjectBuilder().add(s.getName(), s.getContent())); + try { + validateSettingName(name); + + Setting s = settingsSvc.set(name, content); + return ok("Setting " + name + " added."); + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } } @Path("settings/{name}/lang/{lang}") @PUT public Response putSettingLang(@PathParam("name") String name, @PathParam("lang") String lang, String content) { - Setting s = settingsSvc.set(name, lang, content); - return ok("Setting " + name + " - " + lang + " - added."); + try { + validateSettingName(name); + validateSettingLang(lang); + + Setting s = settingsSvc.set(name, lang, content); + return ok("Setting " + name + " added for language " + lang + "."); + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } } @Path("settings/{name}") @GET public Response getSetting(@PathParam("name") String name) { - String s = settingsSvc.get(name); - - return (s != null) ? ok(s) : notFound("Setting " + name + " not found"); + try { + validateSettingName(name); + + String content = settingsSvc.get(name); + return (content != null) ? ok(content) : notFound("Setting " + name + " not found."); + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } + } + + @Path("settings/{name}/lang/{lang}") + @GET + public Response getSetting(@PathParam("name") String name, @PathParam("lang") String lang) { + try { + validateSettingName(name); + validateSettingLang(lang); + + String content = settingsSvc.get(name, lang); + return (content != null) ? ok(content) : notFound("Setting " + name + " for language " + lang + " not found."); + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } } @Path("settings/{name}") @DELETE public Response deleteSetting(@PathParam("name") String name) { - settingsSvc.delete(name); - - return ok("Setting " + name + " deleted."); + try { + validateSettingName(name); + + settingsSvc.delete(name); + return ok("Setting " + name + " deleted."); + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } } @Path("settings/{name}/lang/{lang}") @DELETE public Response deleteSettingLang(@PathParam("name") String name, @PathParam("lang") String lang) { - settingsSvc.delete(name, lang); - return ok("Setting " + name + " - " + lang + " deleted."); + try { + validateSettingName(name); + validateSettingLang(lang); + + settingsSvc.delete(name, lang); + return ok("Setting " + name + " for language " + lang + " deleted."); + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } } @Path("template/{id}") From 3975db076923d8ae9e82337575e61b6e16512ad8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 17 Jul 2025 17:07:54 +0200 Subject: [PATCH 11/69] feat: support JSON objects in `listAllAsJson` settings output and update tests #11639 Enhanced `listAllAsJson` to handle settings with JSON content as proper JSON objects in the API response. Introduced a constant for localization key separator. Updated unit tests to cover new behavior with JSON settings. --- .../settings/SettingsServiceBean.java | 16 ++++++++---- .../settings/SettingsServiceBeanTest.java | 26 +++++++++++++++++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 0f3d9557e6b..365362d577c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1022,6 +1022,8 @@ public Set listAllWithoutLocalizations() { return new HashSet<>(em.createNamedQuery("Setting.findAllWithoutLang", Setting.class).getResultList()); } + public static final String L10N_KEY_SEPARATOR = "/lang/"; + /** * Retrieves all settings from the database and converts them into a JSON object. * Each setting is represented as a key-value pair in the JSON object. The key @@ -1060,11 +1062,15 @@ public JsonObject listAllAsJson() { // Iterate over all the settings and add them to the response. settings.forEach(setting -> { - response.add( - setting.getName() + (setting.getLang() == null ? "" : "/lang/"+setting.getLang()), - setting.getContent() - ); - }); + String name = setting.getName() + (setting.getLang() == null ? "" : L10N_KEY_SEPARATOR + setting.getLang()); + + // In case the setting is a JSON object, treat it a such in the output (so the API can return valid JSON) + if (setting.getContent().trim().startsWith("{")) + response.add(name, Json.createObjectBuilder(JsonUtil.getJsonObject(setting.getContent()))); + else + response.add(name, setting.getContent()); + } + ); return response.build(); } diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index 47feea5a2fa..e52c7ea7887 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -89,7 +89,7 @@ void testListAllAsJson_nonLocalizedSettings() { // Given List resultList = List.of( new Setting("testKey1", "testValue1"), - new Setting("testKey2", "testValue2") + new Setting("testKey2", "12345") ); when(typedQuery.getResultList()).thenReturn(resultList); @@ -99,7 +99,29 @@ void testListAllAsJson_nonLocalizedSettings() { // Then assertEquals(2, result.size()); assertEquals("testValue1", result.getString("testKey1")); - assertEquals("testValue2", result.getString("testKey2")); + assertEquals("12345", result.getString("testKey2")); + } + + @Test + void testListAllAsJson_jsonSetting() { + // Given + JsonObject expected = Json.createObjectBuilder() + .add("default", "2147483648") + .add("fileOne", "4000000000") + .add("s3", "8000000000") + .build(); + + List resultList = List.of( + new Setting(SettingsServiceBean.Key.MaxFileUploadSizeInBytes.toString(), "{\"default\":\"2147483648\",\"fileOne\":\"4000000000\",\"s3\":\"8000000000\"}") + ); + when(typedQuery.getResultList()).thenReturn(resultList); + + // When + JsonObject result = settingsServiceBean.listAllAsJson(); + + // Then + assertEquals(1, result.size()); + assertEquals(expected.toString(), result.getJsonObject(SettingsServiceBean.Key.MaxFileUploadSizeInBytes.toString()).toString()); } @Test From 865ed5cc4dab2a9c283cfdce8b521896f3a5c0be Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 17 Jul 2025 17:13:03 +0200 Subject: [PATCH 12/69] refactor: centralize validation logic for settings names and language codes in `SettingsServiceBean` #11639 Moved `validateSettingName` and `validateSettingLang` from `Admin` to `SettingsServiceBean` to improve reusability and maintain consistency. Updated tests and API endpoints to use the centralized methods. Expanded test coverage for validation scenarios. --- .../edu/harvard/iq/dataverse/api/Admin.java | 36 ++++--------- .../settings/SettingsServiceBean.java | 40 +++++++++++++++ .../settings/SettingsServiceBeanTest.java | 51 +++++++++++++++++++ 3 files changed, 100 insertions(+), 27 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 6add116834d..fbab4e529b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -70,7 +70,6 @@ import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.util.Collections; -import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.function.Predicate; @@ -212,28 +211,11 @@ public Response listAllSettings() { return ok(settingsSvc.listAllAsJson()); } - private void validateSettingName(String name) throws IllegalArgumentException { - if (SettingsServiceBean.Key.parse(name) == null) { - // If there is more than one colon, this may be someone trying to use the old suffix settings. - // Change the error message for that slightly. - if (name.replace(":","").length() < name.length() - 1) { - throw new IllegalArgumentException("The name of the setting may not have a colon separated suffix since Dataverse 6.8. Please update your scripts."); - } - throw new IllegalArgumentException("The name of the setting is required."); - } - } - - private void validateSettingLang(String lang) throws IllegalArgumentException { - if (lang == null || lang.length() != 2 || !Arrays.asList(Locale.getISOLanguages()).contains(lang)) { - throw new IllegalArgumentException("The language '" + lang + "' is not a valid ISO 639-1 language code."); - } - } - @Path("settings/{name}") @PUT public Response putSetting(@PathParam("name") String name, String content) { try { - validateSettingName(name); + SettingsServiceBean.validateSettingName(name); Setting s = settingsSvc.set(name, content); return ok("Setting " + name + " added."); @@ -246,8 +228,8 @@ public Response putSetting(@PathParam("name") String name, String content) { @PUT public Response putSettingLang(@PathParam("name") String name, @PathParam("lang") String lang, String content) { try { - validateSettingName(name); - validateSettingLang(lang); + SettingsServiceBean.validateSettingName(name); + SettingsServiceBean.validateSettingLang(lang); Setting s = settingsSvc.set(name, lang, content); return ok("Setting " + name + " added for language " + lang + "."); @@ -260,7 +242,7 @@ public Response putSettingLang(@PathParam("name") String name, @PathParam("lang" @GET public Response getSetting(@PathParam("name") String name) { try { - validateSettingName(name); + SettingsServiceBean.validateSettingName(name); String content = settingsSvc.get(name); return (content != null) ? ok(content) : notFound("Setting " + name + " not found."); @@ -273,8 +255,8 @@ public Response getSetting(@PathParam("name") String name) { @GET public Response getSetting(@PathParam("name") String name, @PathParam("lang") String lang) { try { - validateSettingName(name); - validateSettingLang(lang); + SettingsServiceBean.validateSettingName(name); + SettingsServiceBean.validateSettingLang(lang); String content = settingsSvc.get(name, lang); return (content != null) ? ok(content) : notFound("Setting " + name + " for language " + lang + " not found."); @@ -287,7 +269,7 @@ public Response getSetting(@PathParam("name") String name, @PathParam("lang") St @DELETE public Response deleteSetting(@PathParam("name") String name) { try { - validateSettingName(name); + SettingsServiceBean.validateSettingName(name); settingsSvc.delete(name); return ok("Setting " + name + " deleted."); @@ -300,8 +282,8 @@ public Response deleteSetting(@PathParam("name") String name) { @DELETE public Response deleteSettingLang(@PathParam("name") String name, @PathParam("lang") String lang) { try { - validateSettingName(name); - validateSettingLang(lang); + SettingsServiceBean.validateSettingName(name); + SettingsServiceBean.validateSettingLang(lang); settingsSvc.delete(name, lang); return ok("Setting " + name + " for language " + lang + " deleted."); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 365362d577c..732b2ffad49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -20,11 +20,13 @@ import org.json.JSONException; import org.json.JSONObject; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.StringTokenizer; @@ -1123,4 +1125,42 @@ public Set getConfiguredLanguages() { return langs; } + + /** + * Validates the provided setting name to ensure it meets the required format. + * Throws an {@code IllegalArgumentException} if the name is invalid, including cases + * where it contains a colon-separated suffix that is no longer supported. + * + * @param name The name of the setting to be validated. + * It must adhere to the allowable setting name format. + * Names with more than one colon, which may indicate deprecated suffix formats, are not allowed. + * @throws IllegalArgumentException if the setting name is invalid. + */ + public static void validateSettingName(String name) { + if (SettingsServiceBean.Key.parse(name) == null) { + // If there is more than one colon, this may be someone trying to use the old suffix settings. + // Change the error message for that slightly. + if (name.replace(":","").length() < name.length() - 1) { + throw new IllegalArgumentException("The name of the setting may not have a colon separated suffix since Dataverse 6.8. Please update your scripts."); + } + throw new IllegalArgumentException("The name of the setting is invalid."); + } + } + + /** + * Validates the provided language code to ensure it adheres to the ISO 639-1 format. + * This method checks that the language code is not null, has a length of 2 characters, + * and exists within the list of valid ISO 639-1 language codes. If the validation + * fails, an {@code IllegalArgumentException} is thrown. + * + * @param lang the language code to be validated. It must be a non-null, + * 2-character string representing a valid ISO 639-1 language code. + * @throws IllegalArgumentException if the language code is invalid. + */ + public static void validateSettingLang(String lang) { + if (lang == null || lang.length() != 2 || !Arrays.asList(Locale.getISOLanguages()).contains(lang)) { + throw new IllegalArgumentException("The language '" + lang + "' is not a valid ISO 639-1 language code."); + } + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index e52c7ea7887..3f65e5317a4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.settings; +import jakarta.json.Json; import jakarta.json.JsonObject; import jakarta.persistence.EntityManager; import jakarta.persistence.TypedQuery; @@ -8,13 +9,17 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import org.mockito.ArgumentMatchers; import java.util.Collections; import java.util.List; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -54,6 +59,52 @@ void testRoundtrip() { } } + @Nested + class ValidateSettingNameTest { + + @ValueSource(strings = {":ShowMuteOptions", ":AllowApiTokenLookupViaApi", ":OAuth2CallbackUrl"}) + @ParameterizedTest + void testValidateSettingName_validNames(String name) { + assertDoesNotThrow(() -> SettingsServiceBean.validateSettingName(name)); + } + + @CsvSource({ + "invalidName, 'The name of the setting is invalid.'", + ":invalid:suffix, 'The name of the setting may not have a colon separated suffix since Dataverse 6.8. Please update your scripts.'", + ":NonExistentKey, 'The name of the setting is invalid.'", + ":ShowMuteOptions/lang/en, 'The name of the setting is invalid.'" + }) + @ParameterizedTest + void testValidateSettingName_invalidNames(String name, String expectedMessage) { + IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, + () -> SettingsServiceBean.validateSettingName(name)); + assertEquals(expectedMessage, exception.getMessage()); + } + } + + @Nested + class ValidateSettingLangTest { + + @ValueSource(strings = {"en", "fr", "de"}) + @ParameterizedTest + void testValidateSettingLang_validLanguage(String language) { + assertDoesNotThrow(() -> SettingsServiceBean.validateSettingLang(language)); + } + + @CsvSource({ + ", 'The language ''null'' is not a valid ISO 639-1 language code.'", + "e, 'The language ''e'' is not a valid ISO 639-1 language code.'", + "xyz, 'The language ''xyz'' is not a valid ISO 639-1 language code.'", + "zz, 'The language ''zz'' is not a valid ISO 639-1 language code.'" + }) + @ParameterizedTest + void testValidateSettingLang_invalidLanguage(String language, String expectedMessage) { + IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, + () -> SettingsServiceBean.validateSettingLang(language)); + assertEquals(expectedMessage, exception.getMessage()); + } + } + @Nested class ListAllAsJsonTest { From 8fe97546e2830fd688ca58ca65d6e21bfb53e6c9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 17 Jul 2025 17:14:25 +0200 Subject: [PATCH 13/69] feat: add `validateKeys` method to validate settings keys and update tests #11639 Introduced `validateKeys` method in `SettingsServiceBean` to ensure proper settings key validation, including localized and non-localized cases. Added parameterized test coverage for validation scenarios in `SettingsServiceBeanTest`. --- .../settings/SettingsServiceBean.java | 30 +++++++++++++++- .../settings/SettingsServiceBeanTest.java | 36 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 732b2ffad49..4050b6f9e20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1124,7 +1124,35 @@ public Set getConfiguredLanguages() { langs.addAll(configuredLocales.keySet()); return langs; } - + + /** + * Validates the keys in the provided settings JSON object. + * This method checks if each key follows the required format and rules. + * If a key is invalid, it is added to the list of invalid keys. + * + * @param settings the JsonObject containing the keys to be validated + * @return a list of invalid keys as an unmodifiable list + */ + public static List validateKeys(JsonObject settings) { + List invalidKeys = new ArrayList<>(); + for (String key : settings.keySet()) { + try { + // Case A: localized setting, validate setting and language + if (key.contains(L10N_KEY_SEPARATOR)) { + String name = key.substring(0, key.indexOf(L10N_KEY_SEPARATOR)); + String lang = key.substring(key.indexOf(L10N_KEY_SEPARATOR) + L10N_KEY_SEPARATOR.length()); + validateSettingName(name); + validateSettingLang(lang); + // Case B: Simple, non-localized setting name + } else { + validateSettingName(key); + } + } catch (IllegalArgumentException iae) { + invalidKeys.add(key); + } + } + return Collections.unmodifiableList(invalidKeys); + } /** * Validates the provided setting name to ensure it meets the required format. diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index 3f65e5317a4..f504c668dd8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -105,6 +105,42 @@ void testValidateSettingLang_invalidLanguage(String language, String expectedMes } } + @Nested + class ValidateKeysTest { + static List validateKeysTestParameters() { + return List.of( + Arguments.of( + Json.createObjectBuilder() + .add(":ApplicationTermsOfUse", "validValue1") + .add(":ApplicationTermsOfUse/lang/en", "validValue2") + .build(), + List.of() + ), + Arguments.of( + Json.createObjectBuilder() + .add(":Invalid:Key", "value1") + .add(":NonExistentKey/lang/fr", "value2") + .build(), + List.of(":Invalid:Key", ":NonExistentKey/lang/fr") + ), + Arguments.of( + Json.createObjectBuilder() + .add(":ApplicationTermsOfUse", "value3") + .add("NoColonKey", "value4") + .build(), + List.of("NoColonKey") + ) + ); + } + + @MethodSource("validateKeysTestParameters") + @ParameterizedTest + void testValidateKeys(JsonObject input, List expectedInvalidKeys) { + List result = SettingsServiceBean.validateKeys(input); + assertEquals(expectedInvalidKeys, result); + } + } + @Nested class ListAllAsJsonTest { From 515472d301c9d32621bcc4e77a8ba73df3c76c06 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 17 Jul 2025 17:16:41 +0200 Subject: [PATCH 14/69] feat: add `SettingsServiceBean.convertJsonToSettings()` method and unit tests #11639 Introduced `convertJsonToSettings` in `SettingsServiceBean` to transform JSON objects into `Setting` instances, supporting language-specific keys. Added comprehensive unit tests to verify functionality with various JSON structures. --- .../settings/SettingsServiceBean.java | 35 ++++++++ .../settings/SettingsServiceBeanTest.java | 79 +++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 4050b6f9e20..9526f1f5b4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1077,6 +1077,41 @@ public JsonObject listAllAsJson() { return response.build(); } + /** + * Converts a JSON object representing settings into a list of Setting objects. + * Each entry in the JSON object is processed to create a Setting instance. + * If the key includes a language (indicated by a separator), the language + * information is extracted and included in the Setting object. + * Note: This method expects a pre-validated JsonObject and will happily create + * nonsense settings for you otherwise. + * + * @param settings a (pre-validated) {@link JsonObject} containing key-value pairs where + * each key represents a setting name (and optionally a language code), + * and each value represents the associated content. + * @return a {@link List} of {@link Setting} objects parsed from the input JSON object. + */ + static List convertJsonToSettings(JsonObject settings) { + return settings.entrySet().stream() + .map(entry -> { + String key = entry.getKey(); + String value = entry.getValue().toString() + // This is necessary to avoid storing the quotes inthe DB when a setting is a simple value. + // JsonValue will escape any JsonString with such quotes. + .replaceFirst("^\"", "") + .replaceFirst("\"$", ""); + + if (key.contains(L10N_KEY_SEPARATOR)) { + // Handle localized settings + String name = key.substring(0, key.indexOf(L10N_KEY_SEPARATOR)); + String lang = key.substring(key.indexOf(L10N_KEY_SEPARATOR) + L10N_KEY_SEPARATOR.length()); + return new Setting(name, lang, value); + } else { + return new Setting(key, value); + } + }) + .collect(Collectors.toList()); + } + public Map getBaseMetadataLanguageMap(Map languageMap, boolean refresh) { if (languageMap == null || refresh) { languageMap = new HashMap(); diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index f504c668dd8..34d791d8fc3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -231,4 +231,83 @@ void testListAllAsJson_localizedSettings() { assertEquals("value_fr", result.getString("localizedKey/lang/fr")); } } + + @Nested + class ConvertJsonToSettingsTest { + + @Test + void testConvertJsonToSettings_simpleKeyValues() { + // Given + JsonObject input = Json.createObjectBuilder() + .add(":Key1", "Value1") + .add(":Key2", "123456") + // The REST API endpoint presents a JsonObject, which may have number literals in it. + // Check that we can cope with that. + .add(":Key3", 123456) + .build(); + + // When + List result = SettingsServiceBean.convertJsonToSettings(input); + + // Then + assertEquals(3, result.size()); + assertEquals(new Setting(":Key1", "Value1"), result.get(0)); + assertEquals(new Setting(":Key2", "123456"), result.get(1)); + assertEquals(new Setting(":Key3", "123456"), result.get(2)); + } + + @Test + void testConvertJsonToSettings_localizedKeysWithSimpleValues() { + // Given + JsonObject input = Json.createObjectBuilder() + .add(":LocalizedKey/lang/en", "EnglishValue") + .add(":LocalizedKey/lang/fr", "FrenchValue") + .build(); + + // When + List result = SettingsServiceBean.convertJsonToSettings(input); + + // Then + assertEquals(2, result.size()); + assertEquals(new Setting(":LocalizedKey", "en", "EnglishValue"), result.get(0)); + assertEquals(new Setting(":LocalizedKey", "fr", "FrenchValue"), result.get(1)); + } + + @Test + void testConvertJsonToSettings_emptyJson() { + // Given + JsonObject input = Json.createObjectBuilder().build(); + + // When + List result = SettingsServiceBean.convertJsonToSettings(input); + + // Then + assertEquals(0, result.size()); + } + + @Test + void testConvertJsonToSettings_complexJsonValue() { + // Given + JsonObject input = Json.createObjectBuilder() + .add( + ":MaxFileUploadSizeInBytes", + Json.createObjectBuilder() + .add("default", "2147483648") + .add("fileOne", "4000000000") + .add("s3", "8000000000") + .build()) + .build(); + + // When + List result = SettingsServiceBean.convertJsonToSettings(input); + + // Then + assertEquals(1, result.size()); + assertEquals(new Setting(":MaxFileUploadSizeInBytes", + "{\"default\":\"2147483648\",\"fileOne\":\"4000000000\",\"s3\":\"8000000000\"}"), + result.get(0)); + } + + + } } \ No newline at end of file From a1061115726573a610339667c5096854946a5bc3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 17 Jul 2025 17:21:12 +0200 Subject: [PATCH 15/69] feat: add `putAllSettings` endpoint to Admin API for bulk settings update #11639 Introduced a new `PUT /api/admin/settings` endpoint to update all settings in bulk with JSON input. Added `setAllFromJson` method and placeholder implementation for `replaceAllSettings` in `SettingsServiceBean`. Validates input structure and ensures atomic updates. --- .../edu/harvard/iq/dataverse/api/Admin.java | 23 ++++++++++ .../settings/SettingsServiceBean.java | 42 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fbab4e529b1..e00a93a826f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -63,6 +63,7 @@ import jakarta.ws.rs.PathParam; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.Response; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -211,6 +212,28 @@ public Response listAllSettings() { return ok(settingsSvc.listAllAsJson()); } + @Path("settings") + @PUT + @Consumes(MediaType.APPLICATION_JSON) + @APIResponses({ + @APIResponse(responseCode = "200", description = "All database options successfully updated") + }) + public Response putAllSettings(JsonObject settings) { + try { + // Basic JSON structure validation only + if (settings == null || settings.isEmpty()) { + return error(Response.Status.BAD_REQUEST, "Empty or invalid JSON object"); + } + + // Transfer to domain objects and deeper validation to be handled by the service layer. + settingsSvc.setAllFromJson(settings); + return ok("All database options successfully updated."); + + } catch (IllegalArgumentException iae) { + return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } + } + @Path("settings/{name}") @PUT public Response putSetting(@PathParam("name") String name, String content) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 9526f1f5b4a..dbb437810e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -16,10 +16,12 @@ import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import jakarta.transaction.Transactional; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -1077,6 +1079,31 @@ public JsonObject listAllAsJson() { return response.build(); } + /** + * Updates all settings by replacing them with the settings provided in the given JSON object. + * This method validates the keys and values from the JSON object, converts them into + * a list of Setting objects, and performs an atomic update of the internal settings. + * + * @param settings a JsonObject containing the new settings to apply. + * Each key corresponds to a setting name, and each value corresponds + * to its respective value. The keys and values will be validated before + * applying the updates. + * @throws IllegalArgumentException if the JSON object contains invalid keys or invalid settings. + */ + public void setAllFromJson(JsonObject settings) { + // Validate the input + List invalidKeys = validateKeys(settings); + if (!invalidKeys.isEmpty()) { + throw new IllegalArgumentException("Invalid key(s): " + String.join(", ", invalidKeys)); + } + + // Convert JSON to Setting objects + List newSettings = convertJsonToSettings(settings); + + // Perform atomic update (replace all settings) + replaceAllSettings(newSettings); + } + /** * Converts a JSON object representing settings into a list of Setting objects. * Each entry in the JSON object is processed to create a Setting instance. @@ -1112,6 +1139,21 @@ static List convertJsonToSettings(JsonObject settings) { .collect(Collectors.toList()); } + /** + * Replaces all existing settings with a new list of settings within a single transaction. + * The operation is atomic, ensuring that either all changes are applied or none in case of a failure. + * + * @param newSettings the list of new {@link Setting} objects to replace the existing settings. + * Must not be null; an empty list clears all settings. + */ + @Transactional + public void replaceAllSettings(List newSettings) { + // Implementation for atomic replacement + // This would involve clearing existing settings and inserting new ones + // within the same transaction + throw new IllegalStateException("Not yet implemented"); + } + public Map getBaseMetadataLanguageMap(Map languageMap, boolean refresh) { if (languageMap == null || refresh) { languageMap = new HashMap(); From 11c8f8211ea27755635731ecd97082f89ccf7d60 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 13:39:48 +0200 Subject: [PATCH 16/69] style: normalize spacing in `Setting` entity named queries #11639 --- src/main/java/edu/harvard/iq/dataverse/settings/Setting.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index e429d685c3e..da548195de7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -24,10 +24,9 @@ @NamedQuery( name="Setting.findByName", query = "SELECT s FROM Setting s WHERE s.name=:name AND s.lang IS NULL" ), @NamedQuery( name="Setting.deleteByNameAndLang", - query="DELETE FROM Setting s WHERE s.name=:name AND s.lang=:lang"), + query="DELETE FROM Setting s WHERE s.name=:name AND s.lang=:lang"), @NamedQuery( name="Setting.findByNameAndLang", - query = "SELECT s FROM Setting s WHERE s.name=:name AND s.lang=:lang" ) - + query="SELECT s FROM Setting s WHERE s.name=:name AND s.lang=:lang") }) @Entity public class Setting implements Serializable { From 71a403c749079376d2f7046b33524de0a8414f94 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 13:40:46 +0200 Subject: [PATCH 17/69] fix: add unique constraint to `Setting` entity for `name` and `lang` fields #11639 --- src/main/java/edu/harvard/iq/dataverse/settings/Setting.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index da548195de7..48477b13a3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -9,6 +9,8 @@ import jakarta.persistence.NamedQuery; import jakarta.persistence.GeneratedValue; import jakarta.persistence.GenerationType; +import jakarta.persistence.Table; +import jakarta.persistence.UniqueConstraint; /** * A single value in the config of dataverse. @@ -29,6 +31,9 @@ query="SELECT s FROM Setting s WHERE s.name=:name AND s.lang=:lang") }) @Entity +@Table(uniqueConstraints = { + @UniqueConstraint(name = "UC_setting_name_lang", columnNames = {"name", "lang"}), +}) public class Setting implements Serializable { @Id From a9b19740c1ea86724afa6b60e42b2b51372e6a9c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 13:59:24 +0200 Subject: [PATCH 18/69] refactor: update `Setting` entity and queries to treat empty string as default for `lang` #11639 Replaced `NULL` with empty string for `lang` field in `Setting` entity to align with SQL standards and ensure unique constraint behavior. Updated queries, methods, and added safeguards to handle empty `lang` consistently. Enhanced documentation for clarity. --- .../iq/dataverse/settings/Setting.java | 52 +++++++++++++++---- .../settings/SettingsServiceBean.java | 14 ++++- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index 48477b13a3e..9c6b1a20baf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -18,13 +18,13 @@ */ @NamedQueries({ @NamedQuery( name="Setting.deleteByName", - query="DELETE FROM Setting s WHERE s.name=:name AND s.lang IS NULL"), + query="DELETE FROM Setting s WHERE s.name=:name AND s.lang=''"), @NamedQuery( name="Setting.findAll", query="SELECT s FROM Setting s"), @NamedQuery( name="Setting.findAllWithoutLang", - query="SELECT s FROM Setting s WHERE s.lang IS NULL"), + query="SELECT s FROM Setting s WHERE s.lang=''"), @NamedQuery( name="Setting.findByName", - query = "SELECT s FROM Setting s WHERE s.name=:name AND s.lang IS NULL" ), + query="SELECT s FROM Setting s WHERE s.name=:name AND s.lang=''"), @NamedQuery( name="Setting.deleteByNameAndLang", query="DELETE FROM Setting s WHERE s.name=:name AND s.lang=:lang"), @NamedQuery( name="Setting.findByNameAndLang", @@ -42,9 +42,15 @@ public class Setting implements Serializable { @Column(columnDefinition = "TEXT") private String name; - - @Column(columnDefinition = "TEXT") - private String lang; + + /** + * The default value is an empty string, which indicates no specific language is set. + * Using a NULL value here instead would allow the UNIQUE constraint to fail blocking duplicate settings. + * Allowing multiple null within a UNIQUE constraint is part of the SQL standard, which Postgres follows. + * As it stores ISO codes, 10 chars is good enough (ISO codes are 2-8 chars by spec) + */ + @Column(length = 10, nullable = false) + private String lang = ""; @Column(columnDefinition = "TEXT") private String content; @@ -56,11 +62,21 @@ public Setting(String name, String content) { this.name = name; this.content = content; } - + + /** + * Constructs a new Setting object with the specified name, language, and content. + * + * @param name the name of the setting; must not be null + * @param lang the language of the setting, represented as an ISO code; must not be null; + * may be empty to represent a non-localized setting. + * @param content the content or value associated with this setting + * @throws NullPointerException if the name or lang parameters are null + */ public Setting(String name, String lang, String content) { + Objects.requireNonNull(lang, "Setting lang cannot be null"); this.name = name; - this.content = content; this.lang = lang; + this.content = content; } public String getName() { @@ -78,12 +94,28 @@ public String getContent() { public void setContent(String content) { this.content = content; } - + + /** + * Retrieves the language associated with this Setting instance. + * The language is represented as an ISO code string. + * An empty string indicates that no specific localization is set. + * + * @return the language code of this Setting; never null + */ public String getLang() { return lang; } - + + /** + * Sets the language for this Setting instance. + * The language is represented as a non-null ISO code string. + * An empty string indicates that no specific localization shall be set. + * + * @param lang the language code to set; must not be null + * @throws NullPointerException if the provided lang parameter is null + */ public void setLang(String lang) { + Objects.requireNonNull(lang, "Setting lang cannot be null"); this.lang = lang; } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index dbb437810e4..a90561f642c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -896,6 +896,9 @@ public String get( String name, String defaultValue ) { } public String get(String name, String lang, String defaultValue ) { + // Database safeguard, as the default is an empty string + if (lang == null) lang = ""; + List tokens = em.createNamedQuery("Setting.findByNameAndLang", Setting.class) .setParameter("name", name ) .setParameter("lang", lang ) @@ -912,6 +915,9 @@ public String getValueForKey( Key key, String defaultValue ) { } public String getValueForKey( Key key, String lang, String defaultValue ) { + // Database safeguard, as the default is an empty string + if (lang == null) lang = ""; + return get( key.toString(), lang, defaultValue ); } @@ -939,6 +945,9 @@ public Setting set( String name, String content ) { } public Setting set( String name, String lang, String content ) { + // Database safeguard, as the default is an empty string + if (lang == null) lang = ""; + Setting s = null; List tokens = em.createNamedQuery("Setting.findByNameAndLang", Setting.class) @@ -1008,6 +1017,9 @@ public void delete( String name ) { } public void delete( String name, String lang ) { + // Database safeguard, as the default is an empty string + if (lang == null) lang = ""; + actionLogSvc.log( new ActionLogRecord(ActionLogRecord.ActionType.Setting, "delete") .setInfo(name)); em.createNamedQuery("Setting.deleteByNameAndLang") @@ -1066,7 +1078,7 @@ public JsonObject listAllAsJson() { // Iterate over all the settings and add them to the response. settings.forEach(setting -> { - String name = setting.getName() + (setting.getLang() == null ? "" : L10N_KEY_SEPARATOR + setting.getLang()); + String name = setting.getName() + (setting.getLang().isEmpty() ? "" : L10N_KEY_SEPARATOR + setting.getLang()); // In case the setting is a JSON object, treat it a such in the output (so the API can return valid JSON) if (setting.getContent().trim().startsWith("{")) From 5ae758c601d10e806159c48e5fbe5d1b68302155 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 14:01:38 +0200 Subject: [PATCH 19/69] refactor: enforce non-null constraint, VARCHAR type, and limit length for `name` in `Setting` entity #11639 Updated `name` field to enforce a maximum length of 200 characters on a VARCHAR field for better performance (replacing the TEXT column type) and mark it as non-nullable (null settings make no sense at all). Added `Objects.requireNonNull` validation for `name` in entity methods and constructors to ensure consistent behavior. --- .../java/edu/harvard/iq/dataverse/settings/Setting.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index 9c6b1a20baf..aa9f6559e35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -40,7 +40,7 @@ public class Setting implements Serializable { @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; - @Column(columnDefinition = "TEXT") + @Column(length = 200, nullable = false) private String name; /** @@ -59,8 +59,9 @@ public Setting() { } public Setting(String name, String content) { - this.name = name; - this.content = content; + Objects.requireNonNull(name, "Setting name cannot be null"); + this.name = name; + this.content = content; } /** @@ -73,6 +74,7 @@ public Setting(String name, String content) { * @throws NullPointerException if the name or lang parameters are null */ public Setting(String name, String lang, String content) { + Objects.requireNonNull(name, "Setting name cannot be null"); Objects.requireNonNull(lang, "Setting lang cannot be null"); this.name = name; this.lang = lang; @@ -84,6 +86,7 @@ public String getName() { } public void setName(String name) { + Objects.requireNonNull(name, "Setting name cannot be null"); this.name = name; } From 974d6c4c1634dbd6fefb21838b40646ebc913ac0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 14:01:49 +0200 Subject: [PATCH 20/69] chore: update migration script comment for clarity #11639 --- src/main/resources/db/migration/V6.7.0.1.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/db/migration/V6.7.0.1.sql b/src/main/resources/db/migration/V6.7.0.1.sql index 733a10d951f..aa3683873ca 100644 --- a/src/main/resources/db/migration/V6.7.0.1.sql +++ b/src/main/resources/db/migration/V6.7.0.1.sql @@ -1,4 +1,4 @@ --- This script migrates the old TabularIngestSizeLimit database setting using format suffixes to a JSON based approach. +-- Migrates the old TabularIngestSizeLimit database setting using format suffixes to a JSON based approach. See #11639 DO $$ DECLARE From c3f1ed7de4b99a79c67cd2840bc1cf1643f3e5bd Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 14:02:34 +0200 Subject: [PATCH 21/69] chore: add migration script to update `Setting` table structure #11639 Introduced SQL migration to optimize `Setting` table by switching `TEXT` columns to `VARCHAR` for better performance, enforcing `NOT NULL` constraints, adding a unique constraint for `name` and `lang`, and setting default empty string for `lang`. Includes logic to handle existing data and conditional checks for schema changes. --- src/main/resources/db/migration/V6.7.0.2.sql | 35 ++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 src/main/resources/db/migration/V6.7.0.2.sql diff --git a/src/main/resources/db/migration/V6.7.0.2.sql b/src/main/resources/db/migration/V6.7.0.2.sql new file mode 100644 index 00000000000..177f88332b7 --- /dev/null +++ b/src/main/resources/db/migration/V6.7.0.2.sql @@ -0,0 +1,35 @@ +-- Update Setting table structure for changes from #11639 +-- 1. Change column types from TEXT to VARCHAR for better performance +-- 2. Update lang column to use empty string default instead of NULL (avoid non-unique pairs) +-- 3. Add NOT NULL constraints and unique constraint for name+lang pairs + +-- First, update any existing NULL lang values to empty string +UPDATE Setting SET lang = '' WHERE lang IS NULL; + +-- Postgres doesn't support IF NOT EXISTS for ALTER COLUMN or ADD CONSTRAINT, so we need conditional logic +DO $$ +BEGIN + -- Only alter columns if they need to be changed + IF EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'Setting' AND column_name = 'name' + AND (data_type = 'text' OR is_nullable = 'YES')) THEN + ALTER TABLE setting ALTER COLUMN name TYPE VARCHAR(255); + ALTER TABLE Setting ALTER COLUMN name SET NOT NULL; + END IF; + + IF EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'Setting' AND column_name = 'lang' + AND (data_type = 'text' OR is_nullable = 'YES')) THEN + ALTER TABLE Setting ALTER COLUMN lang TYPE VARCHAR(10); + ALTER TABLE Setting ALTER COLUMN lang SET NOT NULL; + ALTER TABLE Setting ALTER COLUMN lang SET DEFAULT ''; + END IF; + + IF NOT EXISTS (SELECT 1 FROM information_schema.table_constraints + WHERE table_name = 'Setting' + AND constraint_name = 'UC_setting_name_lang' + AND constraint_type = 'UNIQUE') THEN + ALTER TABLE Setting ADD CONSTRAINT UC_setting_name_lang UNIQUE (name, lang); + END IF; + +END $$; From 782542f7291e25950afeba62496a726eced8327f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 14:03:42 +0200 Subject: [PATCH 22/69] refactor: enhance `Setting` entity equality and constructor for clarity and robustness #11639 Refined `equals` and `hashCode` methods to base equality comparison on `name` and `lang` fields, improving consistency and alignment with usage patterns. Updated the no-argument constructor to protected, adding explanatory comments for enforced design intent and JPA compatibility. --- .../iq/dataverse/settings/Setting.java | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index aa9f6559e35..0f240302366 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -55,7 +55,9 @@ public class Setting implements Serializable { @Column(columnDefinition = "TEXT") private String content; - public Setting() { + protected Setting() { + // Intentionally left blank - no empty settings allowed. + // Protected visibility to allow JPA to work. } public Setting(String name, String content) { @@ -124,26 +126,33 @@ public void setLang(String lang) { @Override public int hashCode() { - int hash = 7; - hash = 73 * hash + Objects.hashCode(this.name); - return hash; + return Objects.hash(name, lang); } - + + /** + * Compares this Setting instance to another object for equality. Two Setting + * objects are considered equal if their {@code name} and {@code lang} fields are + * both equal. + * @implNote We do not use the {@code id} and {@code content} fields for the comparison. + * This is due to how these objects usually are used: + * - Mutable content to use for comparison may break collections. + * - Configuration management requires stable identity based on setting's name and localization. + * The content of the settings is irrelevant for lookups. + * + * @param obj the object to compare this Setting with + * @return {@code true} if the specified object is equal to this Setting, {@code false} otherwise + */ @Override public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if ( !(obj instanceof Setting) ) { - return false; + if (this == obj) { + return true; } - final Setting other = (Setting) obj; - if (!Objects.equals(this.name, other.name)) { + if (!(obj instanceof Setting other)) { return false; } - return Objects.equals(this.content, other.content); + return Objects.equals(this.name, other.name) && Objects.equals(this.lang, other.lang); } - + @Override public String toString() { return "[Setting name:" + getName() + " value:" + getContent() + "]"; From aed9f360987cc4aef0198be61ca7c933f2ed49f4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 15:24:56 +0200 Subject: [PATCH 23/69] refactor: rename `BuiltinUsers.KEY` setting to `:BuiltinUsersKey` for alignment and consistency #11639 Updated references, documentation, and added migration script to reflect the renaming and ensure adherence to naming conventions. Marked the setting as deprecated for removal. --- doc/sphinx-guides/source/api/native-api.rst | 4 ++-- doc/sphinx-guides/source/developers/testing.rst | 2 +- doc/sphinx-guides/source/installation/config.rst | 12 +++++++----- .../scripts/bootstrap/demo/init.sh | 2 +- scripts/api/post-install-api-block.sh | 2 +- scripts/api/setup-all.sh | 4 ++-- scripts/api/setup-users.sh | 2 +- scripts/issues/2454/run-test.sh | 2 +- .../edu/harvard/iq/dataverse/api/BuiltinUsers.java | 4 +--- .../iq/dataverse/settings/SettingsServiceBean.java | 9 +++++++++ src/main/resources/db/migration/V6.7.0.1.sql | 14 ++++++++++++-- 11 files changed, 38 insertions(+), 19 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index fa4b4611559..7dc50e7a532 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5485,13 +5485,13 @@ Builtin users are known as "Username/Email and Password" users in the :doc:`/use Create a Builtin User ~~~~~~~~~~~~~~~~~~~~~ -For security reasons, builtin users cannot be created via API unless the team who runs the Dataverse installation has populated a database setting called ``BuiltinUsers.KEY``, which is described under :ref:`securing-your-installation` and :ref:`database-settings` sections of Configuration in the Installation Guide. You will need to know the value of ``BuiltinUsers.KEY`` before you can proceed. +For security reasons, builtin users cannot be created via API unless the team who runs the Dataverse installation has populated a database setting called ``:BuiltinUsersKey``, which is described under :ref:`securing-your-installation` and :ref:`database-settings` sections of Configuration in the Installation Guide. You will need to know the value of ``:BuiltinUsersKey`` before you can proceed. To create a builtin user via API, you must first construct a JSON document. You can download :download:`user-add.json <../_static/api/user-add.json>` or copy the text below as a starting point and edit as necessary. .. literalinclude:: ../_static/api/user-add.json -Place this ``user-add.json`` file in your current directory and run the following curl command, substituting variables as necessary. Note that both the password of the new user and the value of ``BuiltinUsers.KEY`` are passed as query parameters:: +Place this ``user-add.json`` file in your current directory and run the following curl command, substituting variables as necessary. Note that both the password of the new user and the value of ``:BuiltinUsersKey`` are passed as query parameters:: curl -d @user-add.json -H "Content-type:application/json" "$SERVER_URL/api/builtin-users?password=$NEWUSER_PASSWORD&key=$BUILTIN_USERS_KEY" diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index 1690864d453..fa279b3dbab 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -209,7 +209,7 @@ The Burrito Key For reasons that have been lost to the mists of time, the Dataverse software really wants you to to have a burrito. Specifically, if you're trying to run REST Assured tests and see the error "Dataverse config issue: No API key defined for built in user management", you must run the following curl command (or make an equivalent change to your database): -``curl -X PUT -d 'burrito' http://localhost:8080/api/admin/settings/BuiltinUsers.KEY`` +``curl -X PUT -d 'burrito' http://localhost:8080/api/admin/settings/:BuiltinUsersKey`` Without this "burrito" key in place, REST Assured will not be able to create users. We create users to create objects we want to test, such as collections, datasets, and files. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 0541f9f301d..8722da19b78 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -25,7 +25,7 @@ The default password for the "dataverseAdmin" superuser account is "admin", as m Blocking API Endpoints ++++++++++++++++++++++ -The :doc:`/api/native-api` contains a useful but potentially dangerous set of API endpoints called "admin" that allows you to change system settings, make ordinary users into superusers, and more. The "builtin-users" endpoints let admins do tasks such as creating a local/builtin user account if they know the key defined in :ref:`BuiltinUsers.KEY`. +The :doc:`/api/native-api` contains a useful but potentially dangerous set of API endpoints called "admin" that allows you to change system settings, make ordinary users into superusers, and more. The "builtin-users" endpoints let admins do tasks such as creating a local/builtin user account if they know the key defined in :ref:`:BuiltinUsersKey`. By default in the code, most of these API endpoints can be operated on remotely and a number of endpoints do not require authentication. However, the endpoints "admin" and "builtin-users" are limited to localhost out of the box by the installer, using the JvmSettings :ref:`dataverse.api.blocked.endpoints` and :ref:`dataverse.api.blocked.policy`. @@ -781,7 +781,7 @@ Both Local and Remote Auth The ``authenticationproviderrow`` database table controls which "authentication providers" are available within a Dataverse installation. Out of the box, a single row with an id of "builtin" will be present. For each user in a Dataverse installation, the ``authenticateduserlookup`` table will have a value under ``authenticationproviderid`` that matches this id. For example, the default "dataverseAdmin" user will have the value "builtin" under ``authenticationproviderid``. Why is this important? Users are tied to a specific authentication provider but conversion mechanisms are available to switch a user from one authentication provider to the other. As explained in the :doc:`/user/account` section of the User Guide, a graphical workflow is provided for end users to convert from the "builtin" authentication provider to a remote provider. Conversion from a remote authentication provider to the builtin provider can be performed by a sysadmin with access to the "admin" API. See the :doc:`/api/native-api` section of the API Guide for how to list users and authentication providers as JSON. -Adding and enabling a second authentication provider (:ref:`native-api-add-auth-provider` and :ref:`api-toggle-auth-provider`) will result in the Log In page showing additional providers for your users to choose from. By default, the Log In page will show the "builtin" provider, but you can adjust this via the :ref:`conf-default-auth-provider` configuration option. Further customization can be achieved by setting :ref:`conf-allow-signup` to "false", thus preventing users from creating local accounts via the web interface. Please note that local accounts can also be created through the API by enabling the ``builtin-users`` endpoint (:ref:`:BlockedApiEndpoints`) and setting the ``BuiltinUsers.KEY`` database setting (:ref:`BuiltinUsers.KEY`). +Adding and enabling a second authentication provider (:ref:`native-api-add-auth-provider` and :ref:`api-toggle-auth-provider`) will result in the Log In page showing additional providers for your users to choose from. By default, the Log In page will show the "builtin" provider, but you can adjust this via the :ref:`conf-default-auth-provider` configuration option. Further customization can be achieved by setting :ref:`conf-allow-signup` to "false", thus preventing users from creating local accounts via the web interface. Please note that local accounts can also be created through the API by enabling the ``builtin-users`` endpoint (:ref:`:BlockedApiEndpoints`) and setting the ``:BuiltinUsersKey`` database setting (:ref:`:BuiltinUsersKey`). To configure Shibboleth see the :doc:`shibboleth` section and to configure OAuth see the :doc:`oauth2` section. @@ -3933,14 +3933,16 @@ Now that ``:BlockedApiKey`` has been enabled, blocked APIs can be accessed using ``curl https://demo.dataverse.org/api/admin/settings?unblock-key=theKeyYouChose`` -.. _BuiltinUsers.KEY: +.. _:BuiltinUsersKey: -BuiltinUsers.KEY +:BuiltinUsersKey ++++++++++++++++ The key required to create users via API as documented at :doc:`/api/native-api`. Unlike other database settings, this one doesn't start with a colon. -``curl -X PUT -d builtInS3kretKey http://localhost:8080/api/admin/settings/BuiltinUsers.KEY`` +``curl -X PUT -d builtInS3kretKey http://localhost:8080/api/admin/settings/:BuiltinUsersKey`` + +Note: this key used to be named ``BuiltinUsers.KEY`` until Dataverse 6.8. :SearchApiRequiresToken +++++++++++++++++++++++ diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh index aa73cb5edff..b2735b50b28 100644 --- a/modules/container-configbaker/scripts/bootstrap/demo/init.sh +++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh @@ -31,7 +31,7 @@ fi echo "" echo "Revoke the key that allows for creation of builtin users..." -curl -sS -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" +curl -sS -X DELETE "${DATAVERSE_URL}/api/admin/settings/:BuiltinUsersKey" # TODO: stop using these deprecated database settings. See https://github.com/IQSS/dataverse/pull/11454 echo "" diff --git a/scripts/api/post-install-api-block.sh b/scripts/api/post-install-api-block.sh index 4cc0ac783f7..f7753665b5b 100755 --- a/scripts/api/post-install-api-block.sh +++ b/scripts/api/post-install-api-block.sh @@ -4,7 +4,7 @@ # the sensitive API endpoints, in order to block it for the general public. # First, revoke the authentication token from the built-in user: -curl -X DELETE $SERVER/admin/settings/BuiltinUsers.KEY +curl -X DELETE "$SERVER/admin/settings/:BuiltinUsersKey" # Block the sensitive endpoints: # Relevant settings: diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index b7f962209e4..bd0bd77c52b 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -57,7 +57,7 @@ echo "- Allow internal signup" curl -X PUT -d yes "${DATAVERSE_URL}/api/admin/settings/:AllowSignUp" curl -X PUT -d "/dataverseuser.xhtml?editMode=CREATE" "${DATAVERSE_URL}/api/admin/settings/:SignUpUrl" -curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" +curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/:BuiltinUsersKey" curl -X PUT -d localhost-only "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" curl -X PUT -d 'native/http' "${DATAVERSE_URL}/api/admin/settings/:UploadMethods" echo @@ -91,7 +91,7 @@ if [ $SECURESETUP = 1 ] then # Revoke the "burrito" super-key; # Block sensitive API endpoints; - curl -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" + curl -X DELETE "${DATAVERSE_URL}/api/admin/settings/:BuiltinUsersKey" curl -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints" echo "Access to the /api/admin and /api/test is now disabled, except for connections from localhost." else diff --git a/scripts/api/setup-users.sh b/scripts/api/setup-users.sh index 141e1b3150f..7df771dc0fe 100755 --- a/scripts/api/setup-users.sh +++ b/scripts/api/setup-users.sh @@ -5,7 +5,7 @@ SERVER=http://localhost:8080/api echo Setting up users on $SERVER echo ============================================== -curl -X PUT -d burrito $SERVER/admin/settings/BuiltinUsers.KEY +curl -X PUT -d burrito "$SERVER/admin/settings/:BuiltinUsersKey" peteResp=$(curl -s -H "Content-type:application/json" -X POST -d @data/userPete.json "$SERVER/builtin-users?password=pete&key=burrito") diff --git a/scripts/issues/2454/run-test.sh b/scripts/issues/2454/run-test.sh index 49eb45a8a5e..5ae0ac33f4d 100755 --- a/scripts/issues/2454/run-test.sh +++ b/scripts/issues/2454/run-test.sh @@ -39,7 +39,7 @@ if [ $SETUP_NEEDED == "yes" ]; then echo $ROOT_USER api key is $ROOT_KEY # Create @anAuthUser - USER_CREATION_KEY=$($DB "SELECT content FROM setting WHERE name='BuiltinUsers.KEY'") + USER_CREATION_KEY=$($DB "SELECT content FROM setting WHERE name=':BuiltinUsersKey'") AN_AUTH_USER_KEY=$( curl -s -X POST -d@anAuthUser.json -H"Content-type:application/json" $ENDPOINT/builtin-users?password=XXX\&key=$USER_CREATION_KEY | jq .data.apiToken | tr -d \") ANOTHER_AUTH_USER_KEY=$( curl -s -X POST -d@anotherAuthUser.json -H"Content-type:application/json" $ENDPOINT/builtin-users?password=XXX\&key=$USER_CREATION_KEY | jq .data.apiToken | tr -d \") echo diff --git a/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java b/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java index 317f7d6c870..79d5682d4f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java @@ -40,8 +40,6 @@ public class BuiltinUsers extends AbstractApiBean { private static final Logger logger = Logger.getLogger(BuiltinUsers.class.getName()); - private static final String API_KEY_IN_SETTINGS = "BuiltinUsers.KEY"; - @EJB protected BuiltinUserServiceBean builtinUserSvc; @@ -129,7 +127,7 @@ private Response internalSave(BuiltinUser user, String password, String key) { } private Response internalSave(BuiltinUser user, String password, String key, Boolean sendEmailNotification) { - String expectedKey = settingsSvc.get(API_KEY_IN_SETTINGS); + String expectedKey = settingsSvc.getValueForKey(SettingsServiceBean.Key.BuiltinUsersKey); if (expectedKey == null) { return error(Status.SERVICE_UNAVAILABLE, "Dataverse config issue: No API key defined for built in user management"); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index a90561f642c..e9275792129 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -166,6 +166,15 @@ public enum Key { @Deprecated(forRemoval = true, since = "2025-04-29") BlockedApiPolicy, + /** + * A special secret that, if set, needs to be given when trying to manage internal users. + * This key was formerly known as "BuiltinUsers.KEY", which never was a setting name aligning with the others. + * At some future point this setting should be moved to JvmSettings (so we consume proper secrets) + * or plainly removed with the transition to the SPA frontend requiring an external IdP. + */ + @Deprecated(forRemoval = true, since = "2025-08-01") + BuiltinUsersKey, + /** * For development only (see dev guide for details). Backed by an enum * of possible account types. diff --git a/src/main/resources/db/migration/V6.7.0.1.sql b/src/main/resources/db/migration/V6.7.0.1.sql index aa3683873ca..cfe1e75d0b5 100644 --- a/src/main/resources/db/migration/V6.7.0.1.sql +++ b/src/main/resources/db/migration/V6.7.0.1.sql @@ -1,5 +1,6 @@ --- Migrates the old TabularIngestSizeLimit database setting using format suffixes to a JSON based approach. See #11639 - +-- Migrates the old database setting to their valid and aligned successors. #11639 +-- 1. ":TabularIngestSizeLimit" database setting used format suffixes, move to a JSON-based approach +-- 2. (see below) "BuiltinUsers.KEY" was never aligned with any of the other settings names. DO $$ DECLARE base_setting_content TEXT; @@ -75,4 +76,13 @@ DO $$ AND lang IS NULL; RAISE NOTICE 'Successfully migrated TabularIngestSizeLimit settings to JSON format: %', json_object::TEXT; + END $$; + +-- 2. Migrate BuiltinUsers.KEY to the new setting name +DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM Setting WHERE name = 'BuiltinUsers.KEY') THEN + INSERT INTO Setting (name, lang, content) VALUES (':BuiltinUsersKey', NULL, (SELECT content FROM Setting WHERE name = 'BuiltinUsers.KEY')); + DELETE FROM Setting WHERE name = 'BuiltinUsers.KEY'; + END IF; END $$; \ No newline at end of file From 48db4b39dde54e2161ac89a89c4828d4295bd9cd Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 17:28:32 +0200 Subject: [PATCH 24/69] refactor: extract `convertToJsonKey` method in `SettingsServiceBean` for reusability #11639 Moved logic for constructing JSON keys into a dedicated method `convertToJsonKey`, improving code clarity and reducing duplication. --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index e9275792129..78ffb92c819 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1087,7 +1087,7 @@ public JsonObject listAllAsJson() { // Iterate over all the settings and add them to the response. settings.forEach(setting -> { - String name = setting.getName() + (setting.getLang().isEmpty() ? "" : L10N_KEY_SEPARATOR + setting.getLang()); + String name = convertToJsonKey(setting); // In case the setting is a JSON object, treat it a such in the output (so the API can return valid JSON) if (setting.getContent().trim().startsWith("{")) @@ -1223,6 +1223,10 @@ public Set getConfiguredLanguages() { return langs; } + public static String convertToJsonKey(Setting setting) { + return setting.getName() + (setting.getLang().isEmpty() ? "" : L10N_KEY_SEPARATOR + setting.getLang()); + } + /** * Validates the keys in the provided settings JSON object. * This method checks if each key follows the required format and rules. From a736052069973f83d7a126bbbafe61940231363f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 17:30:21 +0200 Subject: [PATCH 25/69] feat: implement atomic bulk settings replacement with detailed operation tracking in `SettingsServiceBean` #11639 Added `replaceAllSettings` method to support atomic replacement of settings with clear distinctions for created, updated, and deleted items. Enhanced clarity by introducing an `Op` enum for operation types and a `convertToJson` helper method to generate JSON representations of changes. --- .../settings/SettingsServiceBean.java | 112 ++++++++++++++++-- 1 file changed, 103 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 78ffb92c819..e0bab4dadc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -10,6 +10,7 @@ import jakarta.inject.Named; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; @@ -32,6 +33,7 @@ import java.util.Map; import java.util.Set; import java.util.StringTokenizer; +import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -1161,18 +1163,110 @@ static List convertJsonToSettings(JsonObject settings) { } /** - * Replaces all existing settings with a new list of settings within a single transaction. - * The operation is atomic, ensuring that either all changes are applied or none in case of a failure. + * Enum representing the types of operations that are performed on a bulk operation with settings. + */ + static enum Op { + UPDATED, + CREATED, + DELETED; + + static JsonObjectBuilder convertToJson(Map operationalDetails) { + // Create a nice represenation of what happened as Json + JsonObjectBuilder jbo = Json.createObjectBuilder(); + JsonArrayBuilder created = Json.createArrayBuilder(); + JsonArrayBuilder updated = Json.createArrayBuilder(); + JsonArrayBuilder deleted = Json.createArrayBuilder(); + + operationalDetails.forEach((setting, op) -> { + String name = convertToJsonKey(setting); + switch (op) { + case CREATED -> created.add(name); + case UPDATED -> updated.add(name); + case DELETED -> deleted.add(name); + } + }); + + return jbo + .add("created", created) + .add("updated", updated) + .add("deleted", deleted); + } + } + + /** + * Replaces all existing settings in the database with the provided set of new settings. + * This method performs the following actions: + * - Deletes any existing settings that are not present in the provided new settings. + * - Updates the content of existing settings that match the keys in the provided new settings. + * - Creates new settings that are not present in the database. * - * @param newSettings the list of new {@link Setting} objects to replace the existing settings. - * Must not be null; an empty list clears all settings. + * @param newSettings the set of new settings to replace the existing ones. + * Each setting is uniquely identified by its name and language. + * Must not be null (it may be empty). + * @return a map tracking the operations performed on each setting. The map's keys + * are the settings involved, and the values are the types of operations + * performed (CREATED, UPDATED, DELETED). */ @Transactional - public void replaceAllSettings(List newSettings) { - // Implementation for atomic replacement - // This would involve clearing existing settings and inserting new ones - // within the same transaction - throw new IllegalStateException("Not yet implemented"); + public Map replaceAllSettings(Set newSettings) { + Objects.requireNonNull(newSettings, "The list of new settings cannot be null (it may be empty)."); + + // Get all existing settings as a map for O(1) lookup + List existingSettings = em.createNamedQuery("Setting.findAll", Setting.class).getResultList(); + Map existingByKey = existingSettings.stream() + .collect(Collectors.toMap( + setting -> setting.getName() + "|" + setting.getLang(), + Function.identity() + )); + + // Create map of new settings for O(1) lookup + Map newByKey = newSettings.stream() + .collect(Collectors.toMap( + setting -> setting.getName() + "|" + setting.getLang(), + Function.identity() + )); + + // Track operations for return value + Map opsTracking = new HashMap<>(); + + // Process existing settings + for (Map.Entry entry : existingByKey.entrySet()) { + String key = entry.getKey(); + Setting existingSetting = entry.getValue(); + + // Setting exists in DB but not in new set - delete it + if (!newByKey.containsKey(key)) { + em.remove(existingSetting); + opsTracking.put(existingSetting, Op.DELETED); + + // Setting exists in both - update with new values + } else { + Setting newSetting = newByKey.get(key); + // We use the already managed entity and update it with the content of the new setting. + // (This means we don't need to call em.merge(), the ORM will track and execute it for us.) + existingSetting.setContent(newSetting.getContent()); + opsTracking.put(existingSetting, Op.UPDATED); + } + } + + // Process new settings - create those not in existing set + for (Map.Entry entry : newByKey.entrySet()) { + String key = entry.getKey(); + Setting newSetting = entry.getValue(); + + if (!existingByKey.containsKey(key)) { + // Setting is new - persist it + em.persist(newSetting); + opsTracking.put(newSetting, Op.CREATED); + } + // If it exists, it was already handled in the previous loop + } + + // Flush changes to ensure consistency before transaction is committed (will also ensure merge() is called). + em.flush(); + + return opsTracking; + } public Map getBaseMetadataLanguageMap(Map languageMap, boolean refresh) { From c68ac57f9cd0f7ce232e40f154dab54d5d920e0e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 17:32:32 +0200 Subject: [PATCH 26/69] refactor: change `List` to `Set` for `convertJsonToSettings` #11639 - enforce unique `Setting` objects - enable easier detection of differences between two sets if settings --- .../settings/SettingsServiceBean.java | 9 ++++--- .../settings/SettingsServiceBeanTest.java | 25 +++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index e0bab4dadc9..f30814a2a4b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1121,7 +1121,7 @@ public void setAllFromJson(JsonObject settings) { } // Convert JSON to Setting objects - List newSettings = convertJsonToSettings(settings); + Set newSettings = convertJsonToSettings(settings); // Perform atomic update (replace all settings) replaceAllSettings(newSettings); @@ -1133,14 +1133,15 @@ public void setAllFromJson(JsonObject settings) { * If the key includes a language (indicated by a separator), the language * information is extracted and included in the Setting object. * Note: This method expects a pre-validated JsonObject and will happily create - * nonsense settings for you otherwise. + * nonsense settings for you otherwise. This is a reason for the package visibility. * * @param settings a (pre-validated) {@link JsonObject} containing key-value pairs where * each key represents a setting name (and optionally a language code), * and each value represents the associated content. * @return a {@link List} of {@link Setting} objects parsed from the input JSON object. */ - static List convertJsonToSettings(JsonObject settings) { + static Set convertJsonToSettings(JsonObject settings) { + Objects.requireNonNull(settings, "The settings object cannot be null."); return settings.entrySet().stream() .map(entry -> { String key = entry.getKey(); @@ -1159,7 +1160,7 @@ static List convertJsonToSettings(JsonObject settings) { return new Setting(key, value); } }) - .collect(Collectors.toList()); + .collect(Collectors.toSet()); } /** diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index 34d791d8fc3..bcc82933226 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -16,6 +16,7 @@ import java.util.Collections; import java.util.List; +import java.util.Set; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -247,13 +248,15 @@ void testConvertJsonToSettings_simpleKeyValues() { .build(); // When - List result = SettingsServiceBean.convertJsonToSettings(input); + Set result = SettingsServiceBean.convertJsonToSettings(input); // Then assertEquals(3, result.size()); - assertEquals(new Setting(":Key1", "Value1"), result.get(0)); - assertEquals(new Setting(":Key2", "123456"), result.get(1)); - assertEquals(new Setting(":Key3", "123456"), result.get(2)); + assertEquals( + Set.of(new Setting(":Key1", "Value1"), + new Setting(":Key2", "123456"), + new Setting(":Key3", "123456") + ), result); } @Test @@ -265,12 +268,14 @@ void testConvertJsonToSettings_localizedKeysWithSimpleValues() { .build(); // When - List result = SettingsServiceBean.convertJsonToSettings(input); + Set result = SettingsServiceBean.convertJsonToSettings(input); // Then assertEquals(2, result.size()); - assertEquals(new Setting(":LocalizedKey", "en", "EnglishValue"), result.get(0)); - assertEquals(new Setting(":LocalizedKey", "fr", "FrenchValue"), result.get(1)); + assertEquals( + Set.of(new Setting(":LocalizedKey", "en", "EnglishValue"), + new Setting(":LocalizedKey", "fr", "FrenchValue") + ), result); } @Test @@ -279,7 +284,7 @@ void testConvertJsonToSettings_emptyJson() { JsonObject input = Json.createObjectBuilder().build(); // When - List result = SettingsServiceBean.convertJsonToSettings(input); + Set result = SettingsServiceBean.convertJsonToSettings(input); // Then assertEquals(0, result.size()); @@ -299,13 +304,13 @@ void testConvertJsonToSettings_complexJsonValue() { .build(); // When - List result = SettingsServiceBean.convertJsonToSettings(input); + Set result = SettingsServiceBean.convertJsonToSettings(input); // Then assertEquals(1, result.size()); assertEquals(new Setting(":MaxFileUploadSizeInBytes", "{\"default\":\"2147483648\",\"fileOne\":\"4000000000\",\"s3\":\"8000000000\"}"), - result.get(0)); + result.stream().toList().get(0)); } From 6f90535c0bfb03aca7d2290344e701775dbf3304 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 17:33:44 +0200 Subject: [PATCH 27/69] fix: add null checks for `settings` in `setAllFromJson` and `validateKeys` #11639 --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index f30814a2a4b..e40165668d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.StringTokenizer; import java.util.function.Function; @@ -1114,6 +1115,10 @@ public JsonObject listAllAsJson() { * @throws IllegalArgumentException if the JSON object contains invalid keys or invalid settings. */ public void setAllFromJson(JsonObject settings) { + if (settings == null) { + throw new IllegalArgumentException("Settings cannot be null"); + } + // Validate the input List invalidKeys = validateKeys(settings); if (!invalidKeys.isEmpty()) { @@ -1331,6 +1336,7 @@ public static String convertToJsonKey(Setting setting) { * @return a list of invalid keys as an unmodifiable list */ public static List validateKeys(JsonObject settings) { + Objects.requireNonNull(settings, "The settings object cannot be null."); List invalidKeys = new ArrayList<>(); for (String key : settings.keySet()) { try { From 1f8a280711bf37c05aa3273c5f6302d1dd8a4b67 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 17:38:30 +0200 Subject: [PATCH 28/69] feat: return operation details from `setAllFromJson` in `SettingsServiceBean` after bulk operation #11639 --- .../edu/harvard/iq/dataverse/api/Admin.java | 4 ++-- .../settings/SettingsServiceBean.java | 23 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index e00a93a826f..1dc6e70fa3f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -226,8 +226,8 @@ public Response putAllSettings(JsonObject settings) { } // Transfer to domain objects and deeper validation to be handled by the service layer. - settingsSvc.setAllFromJson(settings); - return ok("All database options successfully updated."); + JsonObjectBuilder successfullOperations = settingsSvc.setAllFromJson(settings); + return ok("All database options successfully updated.", successfullOperations); } catch (IllegalArgumentException iae) { return error(Response.Status.BAD_REQUEST, iae.getMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index e40165668d6..cd497d91329 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1104,17 +1104,16 @@ public JsonObject listAllAsJson() { } /** - * Updates all settings by replacing them with the settings provided in the given JSON object. - * This method validates the keys and values from the JSON object, converts them into - * a list of Setting objects, and performs an atomic update of the internal settings. + * Updates all current settings from the specified JSON object. Validates the input JSON, + * converts it to a set of settings, and replaces all existing settings with the new ones + * in an atomic operation. If the settings object is null, contains invalid keys, or if the new + * set of settings is empty, the method throws an appropriate exception. * - * @param settings a JsonObject containing the new settings to apply. - * Each key corresponds to a setting name, and each value corresponds - * to its respective value. The keys and values will be validated before - * applying the updates. - * @throws IllegalArgumentException if the JSON object contains invalid keys or invalid settings. + * @param settings the JSON object containing the new configuration settings to be applied; must not be null + * @return a JsonObjectBuilder representing the operational details of the applied updates + * @throws IllegalArgumentException if the settings object is null, contains invalid keys, or results in empty settings */ - public void setAllFromJson(JsonObject settings) { + public JsonObjectBuilder setAllFromJson(JsonObject settings) { if (settings == null) { throw new IllegalArgumentException("Settings cannot be null"); } @@ -1128,8 +1127,10 @@ public void setAllFromJson(JsonObject settings) { // Convert JSON to Setting objects Set newSettings = convertJsonToSettings(settings); - // Perform atomic update (replace all settings) - replaceAllSettings(newSettings); + // Execute the update (in one atomic operation using a transaction) + Map operationalDetails = replaceAllSettings(newSettings); + + return Op.convertToJson(operationalDetails); } /** From 614fc7106f86eca1688bde93fd47750b63db26f5 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 18 Jul 2025 17:38:52 +0200 Subject: [PATCH 29/69] fix: prevent accidental removal of all settings in `setAllFromJson` by validating non-empty input #11639 --- .../iq/dataverse/settings/SettingsServiceBean.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index cd497d91329..5ff0effedff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1127,10 +1127,16 @@ public JsonObjectBuilder setAllFromJson(JsonObject settings) { // Convert JSON to Setting objects Set newSettings = convertJsonToSettings(settings); - // Execute the update (in one atomic operation using a transaction) - Map operationalDetails = replaceAllSettings(newSettings); + // Perform atomic update (replace all settings) + // We don't allow to completely wipe all settings coming from JSON here, so no acciddents happen. + // (It's completely unrealistic someone would try to remove all settings and leave it at that.) + if (newSettings != null && !newSettings.isEmpty()) { + // Execute the update (in one atomic operation using a transaction) + Map operationalDetails = replaceAllSettings(newSettings); - return Op.convertToJson(operationalDetails); + return Op.convertToJson(operationalDetails); + } + throw new IllegalArgumentException("Settings cannot be empty - you'd wipe the entire configuration."); } /** From 580039b39ac083763a0f318eabbb0adb3c052199 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 21 Jul 2025 11:27:49 +0200 Subject: [PATCH 30/69] fix: ensure transactional integrity in `replaceAllSettings` using self-invocation via EJB reference #11639 Added an `EJB` self-reference in `SettingsServiceBean` to ensure proper handling of transactions, with updates to `replaceAllSettings` to use `@Transactional(REQUIRES_NEW)`. Updated method calls and comments to reflect the required use of self-invocation for EJB functionalities. --- .../dataverse/settings/SettingsServiceBean.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 5ff0effedff..05fe56875b3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -750,6 +750,15 @@ public static SettingsServiceBean.Key parse(String key) { @PersistenceContext EntityManager em; + /** + * A reference to the current instance of the SettingsServiceBean. + * Used when self-invocation is required for internal method calls + * within the same bean to ensure that all EJB functionalities + * such as transactions and security are properly applied. + */ + @EJB + private SettingsServiceBean self; + @EJB ActionLogServiceBean actionLogSvc; @@ -1132,7 +1141,8 @@ public JsonObjectBuilder setAllFromJson(JsonObject settings) { // (It's completely unrealistic someone would try to remove all settings and leave it at that.) if (newSettings != null && !newSettings.isEmpty()) { // Execute the update (in one atomic operation using a transaction) - Map operationalDetails = replaceAllSettings(newSettings); + // Note: We need to call via self-reference so the EJB container can create a transaction as intended. + Map operationalDetails = self.replaceAllSettings(newSettings); return Op.convertToJson(operationalDetails); } @@ -1213,6 +1223,9 @@ static JsonObjectBuilder convertToJson(Map operationalDetails) { * - Updates the content of existing settings that match the keys in the provided new settings. * - Creates new settings that are not present in the database. * + * If calling this method from within this class, make sure to use an EJB injected self-reference to it. + * Otherwise, the EJB container will not be able to provide a transaction as intended by {@code @Transactional}. + * * @param newSettings the set of new settings to replace the existing ones. * Each setting is uniquely identified by its name and language. * Must not be null (it may be empty). @@ -1220,7 +1233,7 @@ static JsonObjectBuilder convertToJson(Map operationalDetails) { * are the settings involved, and the values are the types of operations * performed (CREATED, UPDATED, DELETED). */ - @Transactional + @Transactional(Transactional.TxType.REQUIRES_NEW) public Map replaceAllSettings(Set newSettings) { Objects.requireNonNull(newSettings, "The list of new settings cannot be null (it may be empty)."); From bd85f7201bfaf26ffb73aee7a69567c5f84750d0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 21 Jul 2025 12:26:55 +0200 Subject: [PATCH 31/69] fix,build: ensure proper late variable binding for JaCoCo configuration in `pom.xml` #11639 Updated JaCoCo settings in `pom.xml` to use `@{}` syntax for late variable binding in `argLine` for Maven Surefire and Maven Failsafe, ensuring compatibility with `prepare-agent` steps. This seems to be compatible with IntelliJ's parser for argLine - tests execute successfully. --- pom.xml | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 822eaaeb79d..7a3b16a376b 100644 --- a/pom.xml +++ b/pom.xml @@ -21,8 +21,10 @@ false false integration - - + + + -Ddummy.jacoco.property=true + -Ddummy.jacoco.property=true @@ -1038,7 +1040,13 @@ ${testsToExclude} ${skipUnitTests} - ${surefire.jacoco.args} ${argLine} + + @{surefire.jacoco.args} ${argLine} **/builtin-users-spi/** @@ -1050,7 +1058,13 @@ maven-failsafe-plugin ${it.groups} - ${failsafe.jacoco.args} ${argLine} + + @{failsafe.jacoco.args} ${argLine} ${skipIntegrationTests} From c91b4fc40ee70130ebfd4d18bc7dc74a6e6328c4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 21 Jul 2025 12:27:09 +0200 Subject: [PATCH 32/69] style: correct typo in `internalError` key in `JsonResponseBuilder` --- .../edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java index a80d54508fd..9095a40c608 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java @@ -131,7 +131,7 @@ public JsonResponseBuilder requestContentType(HttpServletRequest request) { * @return The enhanced builder */ public JsonResponseBuilder internalError(Throwable ex) { - this.entityBuilder.add("interalError", ex.getClass().getSimpleName()); + this.entityBuilder.add("internalError", ex.getClass().getSimpleName()); if (ex.getCause() != null) { this.entityBuilder.add("internalCause", ex.getCause().getClass().getSimpleName()); } From 4b5b6896a124c549c3fef7fd8b3e4f2a56da6e08 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 22 Jul 2025 18:33:08 +0200 Subject: [PATCH 33/69] fix,style: update SQL and class definitions to standardize lowercase table names and constraint naming conventions #11639 Without converting to this PostgreSQL convention, the checks to avoid adding existing constraints failed, resulting in failing Flyway migrations during startup. --- .../iq/dataverse/settings/Setting.java | 2 +- src/main/resources/db/migration/V6.7.0.1.sql | 20 +++++++++--------- src/main/resources/db/migration/V6.7.0.2.sql | 21 +++++++++---------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java index 0f240302366..e187d3db1cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/Setting.java @@ -32,7 +32,7 @@ }) @Entity @Table(uniqueConstraints = { - @UniqueConstraint(name = "UC_setting_name_lang", columnNames = {"name", "lang"}), + @UniqueConstraint(name = "uc_setting_name_lang", columnNames = {"name", "lang"}), }) public class Setting implements Serializable { diff --git a/src/main/resources/db/migration/V6.7.0.1.sql b/src/main/resources/db/migration/V6.7.0.1.sql index cfe1e75d0b5..656ef04d572 100644 --- a/src/main/resources/db/migration/V6.7.0.1.sql +++ b/src/main/resources/db/migration/V6.7.0.1.sql @@ -6,7 +6,7 @@ DO $$ base_setting_content TEXT; format_settings_cursor CURSOR FOR SELECT name, content - FROM Setting + FROM setting WHERE name LIKE ':TabularIngestSizeLimit:%' AND lang IS NULL; format_record RECORD; @@ -18,7 +18,7 @@ DO $$ BEGIN -- Check if there are any format-specific settings SELECT EXISTS( - SELECT 1 FROM Setting + SELECT 1 FROM setting WHERE name LIKE ':TabularIngestSizeLimit:%' AND lang IS NULL ) INTO has_format_settings; @@ -31,7 +31,7 @@ DO $$ -- Get the base setting (without format suffix) if it exists SELECT content INTO base_setting_content - FROM Setting + FROM setting WHERE name = ':TabularIngestSizeLimit' AND lang IS NULL; @@ -42,7 +42,7 @@ DO $$ format_value := base_setting_content::BIGINT; json_object := json_object || jsonb_build_object('default', format_value); EXCEPTION WHEN invalid_text_representation THEN - RAISE WARNING 'Base TabularIngestSizeLimit setting contains non-numeric value: %. Setting to 0 (disabling ingest!).', base_setting_content; + RAISE WARNING 'Base TabularIngestSizeLimit setting contains non-numeric value: %. Setting it to 0 (disabling ingest!).', base_setting_content; json_object := json_object || jsonb_build_object('default', 0); END; END IF; @@ -57,7 +57,7 @@ DO $$ format_value := format_record.content::BIGINT; json_object := json_object || jsonb_build_object(format_name, format_value); EXCEPTION WHEN invalid_text_representation THEN - warning_message := format('Format-specific TabularIngestSizeLimit setting %s contains non-numeric value: %s. Setting to 0 (disabling ingest!).', + warning_message := format('Format-specific TabularIngestSizeLimit setting %s contains non-numeric value: %s. Setting it to 0 (disabling ingest!).', format_record.name, format_record.content); RAISE WARNING '%', warning_message; json_object := json_object || jsonb_build_object(format_name, 0); @@ -65,13 +65,13 @@ DO $$ END LOOP; -- Insert or update the new JSON-based setting - INSERT INTO Setting (name, content, lang) + INSERT INTO setting (name, content, lang) VALUES (':TabularIngestSizeLimit', json_object::TEXT, NULL) ON CONFLICT (name) WHERE lang IS NULL DO UPDATE SET content = EXCLUDED.content; -- Delete all format-specific settings - DELETE FROM Setting + DELETE FROM setting WHERE name LIKE ':TabularIngestSizeLimit:%' AND lang IS NULL; @@ -81,8 +81,8 @@ DO $$ -- 2. Migrate BuiltinUsers.KEY to the new setting name DO $$ BEGIN - IF EXISTS (SELECT 1 FROM Setting WHERE name = 'BuiltinUsers.KEY') THEN - INSERT INTO Setting (name, lang, content) VALUES (':BuiltinUsersKey', NULL, (SELECT content FROM Setting WHERE name = 'BuiltinUsers.KEY')); - DELETE FROM Setting WHERE name = 'BuiltinUsers.KEY'; + IF EXISTS (SELECT 1 FROM setting WHERE name = 'BuiltinUsers.KEY') THEN + INSERT INTO setting (name, lang, content) VALUES (':BuiltinUsersKey', NULL, (SELECT content FROM setting WHERE name = 'BuiltinUsers.KEY')); + DELETE FROM setting WHERE name = 'BuiltinUsers.KEY'; END IF; END $$; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.7.0.2.sql b/src/main/resources/db/migration/V6.7.0.2.sql index 177f88332b7..cd4b5018b1d 100644 --- a/src/main/resources/db/migration/V6.7.0.2.sql +++ b/src/main/resources/db/migration/V6.7.0.2.sql @@ -4,32 +4,31 @@ -- 3. Add NOT NULL constraints and unique constraint for name+lang pairs -- First, update any existing NULL lang values to empty string -UPDATE Setting SET lang = '' WHERE lang IS NULL; +UPDATE setting SET lang = '' WHERE lang IS NULL; -- Postgres doesn't support IF NOT EXISTS for ALTER COLUMN or ADD CONSTRAINT, so we need conditional logic DO $$ BEGIN -- Only alter columns if they need to be changed IF EXISTS (SELECT 1 FROM information_schema.columns - WHERE table_name = 'Setting' AND column_name = 'name' + WHERE table_name = 'setting' AND column_name = 'name' AND (data_type = 'text' OR is_nullable = 'YES')) THEN ALTER TABLE setting ALTER COLUMN name TYPE VARCHAR(255); - ALTER TABLE Setting ALTER COLUMN name SET NOT NULL; + ALTER TABLE setting ALTER COLUMN name SET NOT NULL; END IF; IF EXISTS (SELECT 1 FROM information_schema.columns - WHERE table_name = 'Setting' AND column_name = 'lang' + WHERE table_name = 'setting' AND column_name = 'lang' AND (data_type = 'text' OR is_nullable = 'YES')) THEN - ALTER TABLE Setting ALTER COLUMN lang TYPE VARCHAR(10); - ALTER TABLE Setting ALTER COLUMN lang SET NOT NULL; - ALTER TABLE Setting ALTER COLUMN lang SET DEFAULT ''; + ALTER TABLE setting ALTER COLUMN lang TYPE VARCHAR(10); + ALTER TABLE setting ALTER COLUMN lang SET NOT NULL; + ALTER TABLE setting ALTER COLUMN lang SET DEFAULT ''; END IF; IF NOT EXISTS (SELECT 1 FROM information_schema.table_constraints - WHERE table_name = 'Setting' - AND constraint_name = 'UC_setting_name_lang' + WHERE table_name = 'setting' + AND constraint_name = 'uc_setting_name_lang' AND constraint_type = 'UNIQUE') THEN - ALTER TABLE Setting ADD CONSTRAINT UC_setting_name_lang UNIQUE (name, lang); + ALTER TABLE setting ADD CONSTRAINT uc_setting_name_lang UNIQUE (name, lang); END IF; - END $$; From c7434a5cae652e811cfa97cb8cdbf1cddd51fd3e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 22 Jul 2025 18:33:33 +0200 Subject: [PATCH 34/69] fix: update Flyway migration to replace constraints and indexes in `setting` table for consistent validation #11639 Added conditional logic to drop outdated constraints and the unique index in the `setting` table. New validation imposes restrictions with an empty `lang` as the default. Revamped design aligns with API validation requirements. --- src/main/resources/db/migration/V6.7.0.2.sql | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/resources/db/migration/V6.7.0.2.sql b/src/main/resources/db/migration/V6.7.0.2.sql index cd4b5018b1d..07a568f0834 100644 --- a/src/main/resources/db/migration/V6.7.0.2.sql +++ b/src/main/resources/db/migration/V6.7.0.2.sql @@ -9,6 +9,13 @@ UPDATE setting SET lang = '' WHERE lang IS NULL; -- Postgres doesn't support IF NOT EXISTS for ALTER COLUMN or ADD CONSTRAINT, so we need conditional logic DO $$ BEGIN + -- These database constraints were added with Dataverse 4.15, but they had no representation in the code, + -- not even a comment about their existence. See also Flyway script V4.16.0.1__5303-addColumn-to-settingTable.sql. + -- We are going to replace them with the new design here, using an empty lang as default. + -- Before, lang could be more or less anything. Now we do imply restrictions on validation within the API. + ALTER TABLE setting DROP CONSTRAINT IF EXISTS non_empty_lang; + DROP INDEX IF EXISTS unique_settings; + -- Only alter columns if they need to be changed IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'setting' AND column_name = 'name' From ff45c8096afc63a3fa5582f37caf3599bec4b5b8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 22 Jul 2025 18:34:28 +0200 Subject: [PATCH 35/69] fix: update `get` method in `Admin` API to handle third parameter for settings retrieval #11639 This was wrongly requesting the setting, using the language parameter as the default value if not configured. --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 1dc6e70fa3f..d81b5115ea1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -281,7 +281,7 @@ public Response getSetting(@PathParam("name") String name, @PathParam("lang") St SettingsServiceBean.validateSettingName(name); SettingsServiceBean.validateSettingLang(lang); - String content = settingsSvc.get(name, lang); + String content = settingsSvc.get(name, lang, null); return (content != null) ? ok(content) : notFound("Setting " + name + " for language " + lang + " not found."); } catch (IllegalArgumentException iae) { return error(Response.Status.BAD_REQUEST, iae.getMessage()); From 11890681720d13ca349f3dcab1b8489ea4dd4397 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 22 Jul 2025 18:42:57 +0200 Subject: [PATCH 36/69] test: add nested test cases for settings API in `AdminIT` #11639 - Introduced new nested test class `SettingsAPI` to `AdminIT` with focused test cases for settings retrieval, updates, and localization handling. - Added utility methods in `UtilIT` to support language-specific settings operations. - Ensured thorough cleanup in tests to prevent state leakage. --- .../edu/harvard/iq/dataverse/api/AdminIT.java | 187 ++++++++++++++++-- .../edu/harvard/iq/dataverse/api/UtilIT.java | 5 + 2 files changed, 175 insertions(+), 17 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index b48c5507a54..8143796cbf1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -1,38 +1,48 @@ package edu.harvard.iq.dataverse.api; -import io.restassured.RestAssured; -import io.restassured.path.json.JsonPath; -import io.restassured.response.Response; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.GitHubOAuth2AP; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.OrcidOAuth2AP; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - +import io.restassured.RestAssured; +import io.restassured.path.json.JsonPath; +import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonObject; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; - - +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.UUID; import java.util.logging.Logger; -import static jakarta.ws.rs.core.Response.Status.*; -import static org.hamcrest.CoreMatchers.*; +import static io.restassured.RestAssured.given; +import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.CREATED; +import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; +import static jakarta.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; +import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; +import static jakarta.ws.rs.core.Response.Status.OK; +import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; public class AdminIT { @@ -45,7 +55,150 @@ public class AdminIT { public static void setUp() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); } - + + @Nested + class SettingsAPI { + + static final SettingsServiceBean.Key harmlessSetting = SettingsServiceBean.Key.InstallationName; + static final String harmlessValue = "Test Instance Name"; + static final String language = "fr"; + static final String harmlessL10nValue = "Nom de l'instance de test"; + + @AfterAll + static void destroy() { + // No leftover settings after breaking tests! + UtilIT.deleteSetting(harmlessSetting); + UtilIT.deleteSetting(harmlessSetting, language); + } + + @Test + void testSettingsRoundTrip() { + Assumptions.assumeTrue(UtilIT.getSetting(harmlessSetting).statusCode() == NOT_FOUND.getStatusCode(), "Harmless setting should not exist yet."); + Assumptions.assumeTrue(UtilIT.getSetting(harmlessSetting, language).statusCode() == NOT_FOUND.getStatusCode(), "Harmless localized setting should not exist yet."); + + // Step 0: Add a localized setting so we can make sure the put all can cope with that, too. + UtilIT.setSetting(harmlessSetting, harmlessL10nValue, language); + + // Step 1: Get current settings state + Response getResponse = given() + .when() + .get("/api/admin/settings"); + + getResponse.then() + .assertThat() + .statusCode(OK.getStatusCode()) + .contentType("application/json") + .body("status", equalTo("OK")) + .body("data.'"+harmlessSetting+"/lang/"+language+"'", equalTo(harmlessL10nValue)); + + // Store original settings as JsonObject for later restoration + JsonObject originalSettings = Json.createReader(getResponse.body().asInputStream()) + .readObject() + .getJsonObject("data"); + + // Step 2: Set our harmless test setting using UtilIT + Response setResponse = UtilIT.setSetting(harmlessSetting.toString(), harmlessValue); + setResponse.then() + .assertThat() + .statusCode(OK.getStatusCode()); + + // Step 3: Verify the harmless setting was set + Response verifySetResponse = UtilIT.getSetting(harmlessSetting); + + verifySetResponse.then() + .assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo(harmlessValue)); + + // Step 4: Put back the original settings (this is what we're testing) + Response putResponse = given() + //.header("X-Dataverse-key", "") + .header("Content-Type", "application/json") + .body(originalSettings.toString()) + .when() + .put("/api/admin/settings"); + + putResponse.then() + .assertThat() + .statusCode(OK.getStatusCode()) + .body("status", equalTo("OK")) + .body("message.message", containsString("successfully updated")); + + // Step 5: Verify the harmless setting is gone (restored to original state) + Response verifyRestoredResponse = given() + //.header("X-Dataverse-key", "") + .when() + .get("/api/admin/settings" + harmlessSetting.toString()); + + verifyRestoredResponse.then() + .assertThat() + .statusCode(NOT_FOUND.getStatusCode()); // Should not exist anymore + + // Step 6: Verify overall settings state matches original + Response finalGetResponse = given() + //.header("X-Dataverse-key", "") + .when() + .get("/api/admin/settings"); + + finalGetResponse.then() + .assertThat() + .statusCode(OK.getStatusCode()); + + // Store original settings as JsonObject for later restoration + JsonObject finalSettings = Json.createReader(getResponse.body().asInputStream()) + .readObject() + .getJsonObject("data"); + + // Verify the settings are back to original state (our test setting should be absent) + assertFalse(finalSettings.containsKey(harmlessSetting.toString()), "Harmless setting should not exist in restored settings"); + + // Cleanup: delete the localized setting + UtilIT.deleteSetting(harmlessSetting, language); + } + + @Test + void testGetAllSettingsWithLocalization() { + int statusCode = UtilIT.getSetting(harmlessSetting, language).statusCode(); + Assumptions.assumeTrue(statusCode == NOT_FOUND.getStatusCode(), "Harmless localized setting should not exist yet. Status Code: " + statusCode); + + // Given + UtilIT.setSetting(harmlessSetting, harmlessL10nValue, language); + + // When + Response getResponse = given() + .when() + .get("/api/admin/settings"); + + // Then + getResponse.then() + .assertThat() + .statusCode(OK.getStatusCode()) + .contentType("application/json") + .body("status", equalTo("OK")) + .body("data.'"+harmlessSetting+"/lang/"+language+"'", equalTo(harmlessL10nValue)); + + // Cleanup + UtilIT.deleteSetting(harmlessSetting, language); + } + + @Test + void testPutAllSettingsWithEmptyJson() { + // Test error handling for empty JSON + Response response = given() + //.header("X-Dataverse-key", UtilIT.getSuperuserApiToken()) + .header("Content-Type", "application/json") + .body("{}") + .when() + .put("/api/admin/settings"); + + response.then() + .assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", containsString("Empty or invalid JSON object")); + } + } + + @Test public void testListAuthenticatedUsers() throws Exception { Response anon = UtilIT.listAuthenticatedUsers(testNonSuperuserApiToken); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index eba8181e566..aa11ceb0d18 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2511,6 +2511,11 @@ static Response getSetting(SettingsServiceBean.Key settingKey) { Response response = given().when().get("/api/admin/settings/" + settingKey); return response; } + + static Response getSetting(SettingsServiceBean.Key settingKey, String language) { + Response response = given().when().get("/api/admin/settings/" + settingKey + "/lang/" + language); + return response; + } static Response setSetting(SettingsServiceBean.Key settingKey, String value) { Response response = given().body(value).when().put("/api/admin/settings/" + settingKey); From 982e0921524d8d15e9a6101c00e91d14a6f2b759 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 23 Jul 2025 17:18:11 +0200 Subject: [PATCH 37/69] docs: add release note and API docs for #11639 --- .../11639-db-opts-idempotency.md | 21 ++++++++++ doc/sphinx-guides/source/api/native-api.rst | 38 ++++++++++++++++--- .../source/installation/config.rst | 2 + 3 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 doc/release-notes/11639-db-opts-idempotency.md diff --git a/doc/release-notes/11639-db-opts-idempotency.md b/doc/release-notes/11639-db-opts-idempotency.md new file mode 100644 index 00000000000..2af82f4ffda --- /dev/null +++ b/doc/release-notes/11639-db-opts-idempotency.md @@ -0,0 +1,21 @@ +## Database Settings Cleanup + +With this release, we remove some legacy specialties around Database Settings and provide better Admin API endpoints for them. + +Most important changes: + +1. Setting `BuiltinUsers.KEY` was renamed to `:BuiltinUsersKey`, aligned with our general naming pattern for options. +2. Setting `:TabularIngestSizeLimit` no longer uses suffixes for formats and becomes a JSON-based setting instead. +3. If set, both settings will be migrated to their new form automatically for you (Flyway migration). +4. You can no longer (accidentally) create or use arbitrary setting names or languages. + All Admin API endpoints for settings now validate setting names and languages for existence and compliance. + +As an administrator of a Dataverse instance, you can now make use of enhanced Bulk Operations on the Settings Admin API: + +1. Retrieving all settings as JSON via `GET /api/admin/settings` supports localized options now, too. +2. You can replace all existing settings in an idempotent way sending JSON to `PUT /api/admin/settings`. + This will create, update and remove settings as necessary in one atomic operation. + The new endpoint is especially useful to admins using GitOps or other automations. + It allows control over all Database Settings from a single source without risking an undefined state. + +Note: Despite the validation of setting names and languages, the content of any database setting is still not being validated when using the Settings Admin API! diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 7dc50e7a532..c55fc424496 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -6775,7 +6775,11 @@ If the PID is not managed by Dataverse, this call will report if the PID is reco Admin ----- -This is the administrative part of the API. For security reasons, it is absolutely essential that you block it before allowing public access to a Dataverse installation. Blocking can be done using settings. See the ``post-install-api-block.sh`` script in the ``scripts/api`` folder for details. See :ref:`blocking-api-endpoints` in Securing Your Installation section of the Configuration page of the Installation Guide. +This is the administrative part of the API. +For security reasons, it is absolutely essential that you block it before allowing public access to a Dataverse installation. +Blocking can be done using settings. +See the ``post-install-api-block.sh`` script in the ``scripts/api`` folder for details. +See :ref:`blocking-api-endpoints` in Securing Your Installation section of the Configuration page of the Installation Guide. List All Database Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6784,13 +6788,29 @@ List all settings:: GET http://$SERVER/api/admin/settings -Configure Database Setting -~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _settings_put_bulk: + +Configure All Database Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Replace all settings in a single idempotent and atomic operation:: + + PUT http://$SERVER/api/admin/settings + +See JSON ``data`` object in output of ``GET /api/admin/settings`` for the JSON input structure for this endpoint. +The :doc:`../installation/config` page of the Installation Guide has a :ref:`complete list of all the available settings `. + +Configure Single Database Setting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sets setting ``name`` to the body of the request:: PUT http://$SERVER/api/admin/settings/$name +Sets a localized setting ``name`` for locale/language ``lang`` to the body of the request:: + + PUT http://$SERVER/api/admin/settings/$name/lang/$lang + Get Single Database Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -6798,13 +6818,21 @@ Get the setting under ``name``:: GET http://$SERVER/api/admin/settings/$name -Delete Database Setting -~~~~~~~~~~~~~~~~~~~~~~~ +Gets a localized setting under ``name`` for locale/language ``lang``:: + + GET http://$SERVER/api/admin/settings/$name/lang/$lang + +Delete Single Database Setting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Delete the setting under ``name``:: DELETE http://$SERVER/api/admin/settings/$name +Delete a localized setting under ``name`` for locale/language ``lang``:: + + DELETE http://$SERVER/api/admin/settings/$name/lang/$lang + .. _list-all-feature-flags: List All Feature Flags diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8722da19b78..f064cc51a06 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3879,6 +3879,8 @@ The most commonly used configuration options are listed first. The pattern you will observe in curl examples below is that an HTTP ``PUT`` is used to add or modify a setting. If you perform an HTTP ``GET`` (the default when using curl), the output will contain the value of the setting, if it has been set. You can also do a ``GET`` of all settings with ``curl http://localhost:8080/api/admin/settings`` which you may want to pretty-print by piping the output through a tool such as jq by appending ``| jq .``. If you want to remove a setting, use an HTTP ``DELETE`` such as ``curl -X DELETE http://localhost:8080/api/admin/settings/:GuidesBaseUrl`` . +For your convenience, there is also an Admin API endpoint to :ref:`bulk manage database settings in an atomic, idempotent fashion `. + .. _:BlockedApiPolicy: :BlockedApiPolicy (Deprecated) From 1b06dfb868ca84443d3a541b853d478a854cfc1c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 23 Jul 2025 18:27:49 +0200 Subject: [PATCH 38/69] feat: add `UNCHANGED` state handling in `SettingsServiceBean` operations #11639 - Introduced `UNCHANGED` as a new operation type for settings. - Updated `convertToJson` method to include unchanged settings in JSON output. - Enhanced `replaceAllSettings` logic to track unchanged settings when content remains identical. - The reason for this is idempotency: if nothing changed, nothing should be done. Replacing the present content of an existing setting is unnecessary, wasting CPU cycles, and we also want the admin to be informed about what actually is getting changed. Before, all of these unchanged elements would be counted as "updated". --- .../settings/SettingsServiceBean.java | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 05fe56875b3..5f82030eb75 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1191,7 +1191,8 @@ static Set convertJsonToSettings(JsonObject settings) { static enum Op { UPDATED, CREATED, - DELETED; + DELETED, + UNCHANGED; static JsonObjectBuilder convertToJson(Map operationalDetails) { // Create a nice represenation of what happened as Json @@ -1199,6 +1200,7 @@ static JsonObjectBuilder convertToJson(Map operationalDetails) { JsonArrayBuilder created = Json.createArrayBuilder(); JsonArrayBuilder updated = Json.createArrayBuilder(); JsonArrayBuilder deleted = Json.createArrayBuilder(); + JsonArrayBuilder unchanged = Json.createArrayBuilder(); operationalDetails.forEach((setting, op) -> { String name = convertToJsonKey(setting); @@ -1206,13 +1208,15 @@ static JsonObjectBuilder convertToJson(Map operationalDetails) { case CREATED -> created.add(name); case UPDATED -> updated.add(name); case DELETED -> deleted.add(name); + case UNCHANGED -> unchanged.add(name); } }); return jbo .add("created", created) .add("updated", updated) - .add("deleted", deleted); + .add("deleted", deleted) + .add("unchanged", unchanged); } } @@ -1268,10 +1272,14 @@ public Map replaceAllSettings(Set newSettings) { // Setting exists in both - update with new values } else { Setting newSetting = newByKey.get(key); - // We use the already managed entity and update it with the content of the new setting. - // (This means we don't need to call em.merge(), the ORM will track and execute it for us.) - existingSetting.setContent(newSetting.getContent()); - opsTracking.put(existingSetting, Op.UPDATED); + if (existingSetting.getContent().equals(newSetting.getContent())) { + opsTracking.put(existingSetting, Op.UNCHANGED); + } else { + // We use the already managed entity and update it with the content of the new setting. + // (This means we don't need to call em.merge(), the ORM will track and execute it for us.) + existingSetting.setContent(newSetting.getContent()); + opsTracking.put(existingSetting, Op.UPDATED); + } } } From d500650ceddcd816c3b898dea8311358d9d3941a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 23 Jul 2025 18:28:05 +0200 Subject: [PATCH 39/69] test: add unit tests for `replaceAllSettings` in `SettingsServiceBean` #11639 - Introduced a nested test class to validate `replaceAllSettings` behavior. - Added test cases to cover scenarios for null input, updates, deletions, creations, and unchanged settings. - Ensured proper mock verifications to validate interactions with `EntityManager` and prevent state leakage in tests. --- .../settings/SettingsServiceBeanTest.java | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index bcc82933226..2b0ba2871f9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -4,6 +4,7 @@ import jakarta.json.JsonObject; import jakarta.persistence.EntityManager; import jakarta.persistence.TypedQuery; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -16,12 +17,17 @@ import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Set; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.clearInvocations; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; class SettingsServiceBeanTest { @@ -315,4 +321,85 @@ void testConvertJsonToSettings_complexJsonValue() { } + + @Nested + class ReplaceAllSettingsTest { + + static TypedQuery typedQuery = mock(TypedQuery.class); + static EntityManager em = mock(EntityManager.class); + static SettingsServiceBean settingsServiceBean = new SettingsServiceBean(); + + @BeforeAll + static void setup() { + settingsServiceBean.em = em; + + when(em.createNamedQuery( + ArgumentMatchers.eq("Setting.findAll"), + ArgumentMatchers.eq(Setting.class))) + .thenReturn(typedQuery); + } + + @AfterEach + void reset() { + // After each test, we need to clear the invocations for test isolation. + clearInvocations(em); + } + + @Test + void testReplaceAllSettings_null() { + // When/Then + NullPointerException exception = assertThrows(NullPointerException.class, + () -> settingsServiceBean.replaceAllSettings(null)); + assertEquals("The list of new settings cannot be null (it may be empty).", exception.getMessage()); + } + + @Test + void testReplaceAllSettings_updateDeleteCreate() { + // Given + Setting existingSetting1 = new Setting(":Key1", "Value1"); + Setting existingSetting2 = new Setting(":Key2", "Value2"); + Setting newSetting1 = new Setting(":Key1", "UpdatedValue1"); + Setting newSetting3 = new Setting(":Key3", "Value3"); + + when(typedQuery.getResultList()).thenReturn(List.of(existingSetting1, existingSetting2)); + + // When + Map result = settingsServiceBean.replaceAllSettings(Set.of(newSetting1, newSetting3)); + + // Then + assertEquals(3, result.size()); + assertEquals(SettingsServiceBean.Op.UPDATED, result.get(existingSetting1)); + assertEquals(SettingsServiceBean.Op.DELETED, result.get(existingSetting2)); + assertEquals(SettingsServiceBean.Op.CREATED, result.get(newSetting3)); + // We cannot track the em.merge() call in this unit-test, as this happens in ORM code, beyond our reach. + // Thus check the update to the ORM-tracked entity happened. + assertEquals("UpdatedValue1", existingSetting1.getContent()); + + // Verify interactions + verify(em).remove(existingSetting2); + verify(em).persist(newSetting3); + verify(em).flush(); // verify persistence is enforced + } + + @Test + void testReplaceAllSettings_noChanges() { + // Given + Setting existingSetting = new Setting(":Key1", "Value1"); + Setting newSetting = new Setting(":Key1", "Value1"); + + when(typedQuery.getResultList()).thenReturn(List.of(existingSetting)); + + // When + Map result = settingsServiceBean.replaceAllSettings(Set.of(newSetting)); + + // Then + assertEquals(1, result.size()); + assertEquals(SettingsServiceBean.Op.UNCHANGED, result.get(existingSetting)); + + // Verify no interactions causing change + verify(em, never()).persist(any(Setting.class)); + verify(em, never()).remove(any(Setting.class)); + verify(em, never()).merge(any(Setting.class)); + } + } } \ No newline at end of file From 4758f86d957f5f3459ef7457157ebed6fa175ae0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 24 Jul 2025 10:50:44 +0200 Subject: [PATCH 40/69] refactor: introduce `SettingsValidationException` for enhanced error handling in settings logic #11639 - Replaced `IllegalArgumentException` with a custom `SettingsValidationException` for improved specificity. - Updated all related methods in `SettingsServiceBean` and `Admin` API to use the new exception. - Added `SettingsValidationException` class as an application-level exception with rollback support. - This way we don't see wrapped exceptions (either as EJBException or EJBTransactionRollbackException), as we tell the EJB system this is a known exception. - It may also make Copilot Review happy, as we are limiting the chances of leaking internal information with this specific exception. --- .../edu/harvard/iq/dataverse/api/Admin.java | 26 +++++++++---------- .../settings/SettingsServiceBean.java | 26 +++++++++---------- .../settings/SettingsValidationException.java | 10 +++++++ .../settings/SettingsServiceBeanTest.java | 4 +-- 4 files changed, 38 insertions(+), 28 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/settings/SettingsValidationException.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d81b5115ea1..de7890250e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsValidationException; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -228,9 +229,8 @@ public Response putAllSettings(JsonObject settings) { // Transfer to domain objects and deeper validation to be handled by the service layer. JsonObjectBuilder successfullOperations = settingsSvc.setAllFromJson(settings); return ok("All database options successfully updated.", successfullOperations); - - } catch (IllegalArgumentException iae) { - return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } catch (SettingsValidationException sve) { + return error(Response.Status.BAD_REQUEST, sve.getMessage()); } } @@ -242,8 +242,8 @@ public Response putSetting(@PathParam("name") String name, String content) { Setting s = settingsSvc.set(name, content); return ok("Setting " + name + " added."); - } catch (IllegalArgumentException iae) { - return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } catch (SettingsValidationException sve) { + return error(Response.Status.BAD_REQUEST, sve.getMessage()); } } @@ -256,8 +256,8 @@ public Response putSettingLang(@PathParam("name") String name, @PathParam("lang" Setting s = settingsSvc.set(name, lang, content); return ok("Setting " + name + " added for language " + lang + "."); - } catch (IllegalArgumentException iae) { - return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } catch (SettingsValidationException sve) { + return error(Response.Status.BAD_REQUEST, sve.getMessage()); } } @@ -283,8 +283,8 @@ public Response getSetting(@PathParam("name") String name, @PathParam("lang") St String content = settingsSvc.get(name, lang, null); return (content != null) ? ok(content) : notFound("Setting " + name + " for language " + lang + " not found."); - } catch (IllegalArgumentException iae) { - return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } catch (SettingsValidationException sve) { + return error(Response.Status.BAD_REQUEST, sve.getMessage()); } } @@ -296,8 +296,8 @@ public Response deleteSetting(@PathParam("name") String name) { settingsSvc.delete(name); return ok("Setting " + name + " deleted."); - } catch (IllegalArgumentException iae) { - return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } catch (SettingsValidationException sve) { + return error(Response.Status.BAD_REQUEST, sve.getMessage()); } } @@ -310,8 +310,8 @@ public Response deleteSettingLang(@PathParam("name") String name, @PathParam("la settingsSvc.delete(name, lang); return ok("Setting " + name + " for language " + lang + " deleted."); - } catch (IllegalArgumentException iae) { - return error(Response.Status.BAD_REQUEST, iae.getMessage()); + } catch (SettingsValidationException sve) { + return error(Response.Status.BAD_REQUEST, sve.getMessage()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 5f82030eb75..4006435bf09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1114,23 +1114,23 @@ public JsonObject listAllAsJson() { /** * Updates all current settings from the specified JSON object. Validates the input JSON, - * converts it to a set of settings, and replaces all existing settings with the new ones + * converts it to a set of settings and replaces all existing settings with the new ones * in an atomic operation. If the settings object is null, contains invalid keys, or if the new * set of settings is empty, the method throws an appropriate exception. * * @param settings the JSON object containing the new configuration settings to be applied; must not be null * @return a JsonObjectBuilder representing the operational details of the applied updates - * @throws IllegalArgumentException if the settings object is null, contains invalid keys, or results in empty settings + * @throws SettingsValidationException if the settings object is null, contains invalid keys or results in empty settings */ public JsonObjectBuilder setAllFromJson(JsonObject settings) { if (settings == null) { - throw new IllegalArgumentException("Settings cannot be null"); + throw new SettingsValidationException("Settings cannot be null"); } // Validate the input List invalidKeys = validateKeys(settings); if (!invalidKeys.isEmpty()) { - throw new IllegalArgumentException("Invalid key(s): " + String.join(", ", invalidKeys)); + throw new SettingsValidationException("Invalid key(s): " + String.join(", ", invalidKeys)); } // Convert JSON to Setting objects @@ -1146,7 +1146,7 @@ public JsonObjectBuilder setAllFromJson(JsonObject settings) { return Op.convertToJson(operationalDetails); } - throw new IllegalArgumentException("Settings cannot be empty - you'd wipe the entire configuration."); + throw new SettingsValidationException("Settings cannot be empty - you'd wipe the entire configuration."); } /** @@ -1378,7 +1378,7 @@ public static List validateKeys(JsonObject settings) { } else { validateSettingName(key); } - } catch (IllegalArgumentException iae) { + } catch (SettingsValidationException sev) { invalidKeys.add(key); } } @@ -1387,22 +1387,22 @@ public static List validateKeys(JsonObject settings) { /** * Validates the provided setting name to ensure it meets the required format. - * Throws an {@code IllegalArgumentException} if the name is invalid, including cases + * Throws an {@code SettingsValidationException} if the name is invalid, including cases * where it contains a colon-separated suffix that is no longer supported. * * @param name The name of the setting to be validated. * It must adhere to the allowable setting name format. * Names with more than one colon, which may indicate deprecated suffix formats, are not allowed. - * @throws IllegalArgumentException if the setting name is invalid. + * @throws SettingsValidationException if the setting name is invalid. */ public static void validateSettingName(String name) { if (SettingsServiceBean.Key.parse(name) == null) { // If there is more than one colon, this may be someone trying to use the old suffix settings. // Change the error message for that slightly. if (name.replace(":","").length() < name.length() - 1) { - throw new IllegalArgumentException("The name of the setting may not have a colon separated suffix since Dataverse 6.8. Please update your scripts."); + throw new SettingsValidationException("The name of the setting may not have a colon separated suffix since Dataverse 6.8. Please update your scripts."); } - throw new IllegalArgumentException("The name of the setting is invalid."); + throw new SettingsValidationException("The name of the setting is invalid."); } } @@ -1410,15 +1410,15 @@ public static void validateSettingName(String name) { * Validates the provided language code to ensure it adheres to the ISO 639-1 format. * This method checks that the language code is not null, has a length of 2 characters, * and exists within the list of valid ISO 639-1 language codes. If the validation - * fails, an {@code IllegalArgumentException} is thrown. + * fails, an {@code SettingsValidationException} is thrown. * * @param lang the language code to be validated. It must be a non-null, * 2-character string representing a valid ISO 639-1 language code. - * @throws IllegalArgumentException if the language code is invalid. + * @throws SettingsValidationException if the language code is invalid. */ public static void validateSettingLang(String lang) { if (lang == null || lang.length() != 2 || !Arrays.asList(Locale.getISOLanguages()).contains(lang)) { - throw new IllegalArgumentException("The language '" + lang + "' is not a valid ISO 639-1 language code."); + throw new SettingsValidationException("The language '" + lang + "' is not a valid ISO 639-1 language code."); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsValidationException.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsValidationException.java new file mode 100644 index 00000000000..e02e3234675 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsValidationException.java @@ -0,0 +1,10 @@ +package edu.harvard.iq.dataverse.settings; + +import jakarta.ejb.ApplicationException; + +@ApplicationException(rollback = true) +public class SettingsValidationException extends RuntimeException { + public SettingsValidationException(String message) { + super(message); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index 2b0ba2871f9..ed6d5417c06 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -83,7 +83,7 @@ void testValidateSettingName_validNames(String name) { }) @ParameterizedTest void testValidateSettingName_invalidNames(String name, String expectedMessage) { - IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, + SettingsValidationException exception = assertThrows(SettingsValidationException.class, () -> SettingsServiceBean.validateSettingName(name)); assertEquals(expectedMessage, exception.getMessage()); } @@ -106,7 +106,7 @@ void testValidateSettingLang_validLanguage(String language) { }) @ParameterizedTest void testValidateSettingLang_invalidLanguage(String language, String expectedMessage) { - IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, + SettingsValidationException exception = assertThrows(SettingsValidationException.class, () -> SettingsServiceBean.validateSettingLang(language)); assertEquals(expectedMessage, exception.getMessage()); } From e33b3f7f6808edabc26413341886db98a494daef Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 24 Jul 2025 10:51:03 +0200 Subject: [PATCH 41/69] test: add validation test for invalid settings in `AdminIT` #11639 - Added `testPutAllSettingsWithInvalidSetting` to verify error handling for invalid keys. - Ensures proper response code and error message for invalid settings payload. --- .../edu/harvard/iq/dataverse/api/AdminIT.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 8143796cbf1..83a62d41b9f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -185,7 +185,6 @@ void testGetAllSettingsWithLocalization() { void testPutAllSettingsWithEmptyJson() { // Test error handling for empty JSON Response response = given() - //.header("X-Dataverse-key", UtilIT.getSuperuserApiToken()) .header("Content-Type", "application/json") .body("{}") .when() @@ -196,6 +195,21 @@ void testPutAllSettingsWithEmptyJson() { .statusCode(BAD_REQUEST.getStatusCode()) .body("message", containsString("Empty or invalid JSON object")); } + + @Test + void testPutAllSettingsWithInvalidSetting() { + // Test error handling for empty JSON + Response response = given() + .header("Content-Type", "application/json") + .body("{\":Test1\": \"Foobar\", \":Test2\": \"Foobar\" }") + .when() + .put("/api/admin/settings"); + + response.then() + .assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", containsString("Invalid key(s): :Test1, :Test2")); + } } From 08a28615671c8a0fb4c8b16c1a994ec3efff866a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 24 Jul 2025 10:55:21 +0200 Subject: [PATCH 42/69] fix: replace `@Transactional` with `@TransactionAttribute` in `SettingsServiceBean` #11639 As this is an EJBean and not under CDI control, we must use EJB Transaction Annotations. Otherwise, the behavior is undefined, as the container may or may not look after transactions as we want it to. --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 4006435bf09..c29f94e2672 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -7,6 +7,8 @@ import edu.harvard.iq.dataverse.util.json.JsonUtil; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.json.Json; import jakarta.json.JsonArray; @@ -17,7 +19,6 @@ import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; -import jakarta.transaction.Transactional; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -1237,7 +1238,7 @@ static JsonObjectBuilder convertToJson(Map operationalDetails) { * are the settings involved, and the values are the types of operations * performed (CREATED, UPDATED, DELETED). */ - @Transactional(Transactional.TxType.REQUIRES_NEW) + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public Map replaceAllSettings(Set newSettings) { Objects.requireNonNull(newSettings, "The list of new settings cannot be null (it may be empty)."); From 5a099efc1866561c977237db7d93e13f3ec1ce8f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 24 Jul 2025 13:40:00 +0200 Subject: [PATCH 43/69] feat(ct): add database settings replacement script to configbaker image #11639 - Introduced `apply-db-settings.sh` for safely replacing database settings in a Dataverse instance. - Added logic to validate inputs, ensure dependencies (`yq`, `jq`, `wait4x`) are available, and handle API authentication via unblock keys. - Updated Dockerfile to include `yq` installation with specified version. --- modules/container-configbaker/Dockerfile | 10 +- .../scripts/apply-db-settings.sh | 129 ++++++++++++++++++ 2 files changed, 137 insertions(+), 2 deletions(-) create mode 100755 modules/container-configbaker/scripts/apply-db-settings.sh diff --git a/modules/container-configbaker/Dockerfile b/modules/container-configbaker/Dockerfile index 5532cda1a9e..9fc876a283b 100644 --- a/modules/container-configbaker/Dockerfile +++ b/modules/container-configbaker/Dockerfile @@ -23,6 +23,8 @@ ENV PATH="${PATH}:${SCRIPT_DIR}" \ ARG PKGS="bc curl dnsutils dumb-init ed jq netcat-openbsd postgresql-client" # renovate: datasource=github-releases depName=wait4x/wait4x ARG WAIT4X_VERSION="v3.2.0" +# renove: datasource=github-releases depName=mikefarah/yq +ARG YQ_VERSION="v4.47.1" # renovate: datasource=pypi depName=awscli ARG AWSCLI_VERSION="1.40.15" ARG PYTHON_PKGS="awscli==${AWSCLI_VERSION}" @@ -65,7 +67,11 @@ RUN true && \ echo "$(cat /tmp/w4x-checksum | cut -f1 -d" ") /usr/bin/wait4x.tar.gz" | sha256sum -c - && \ tar -xzf /usr/bin/wait4x.tar.gz -C /usr/bin && chmod +x /usr/bin/wait4x && \ - # 2. Python packages + # 2. yq-go \ + curl -sSfL -o /usr/bin/yq "https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_${ARCH}" && \ + chmod +x /usr/bin/yq && \ + + # 3. Python packages pipx install --global ${PYTHON_PKGS} # Get in the scripts @@ -81,7 +87,7 @@ COPY --from=solr /opt/solr/server/solr/configsets/_default ${SOLR_TEMPLATE}/ COPY maven/solr/*.xml ${SOLR_TEMPLATE}/conf/ RUN rm ${SOLR_TEMPLATE}/conf/managed-schema.xml - +WORKDIR ${SCRIPT_DIR} # Set the entrypoint to tini (as a process supervisor) ENTRYPOINT ["/usr/bin/dumb-init", "--"] # By default run a script that will print a help message and terminate diff --git a/modules/container-configbaker/scripts/apply-db-settings.sh b/modules/container-configbaker/scripts/apply-db-settings.sh new file mode 100755 index 00000000000..0d0a2ecedbc --- /dev/null +++ b/modules/container-configbaker/scripts/apply-db-settings.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash + +# [INFO]: Idempotent replacement of all database settings from a file source. + +set -euo pipefail + +function usage() { + echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] [-c configFile] [-b unblockKey]" + echo "" + echo "Replace all Database Settings in a running Dataverse installation in an idempotent way." + echo "" + echo "Parameters:" + echo "instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'" + echo " Can be set as environment variable 'DATAVERSE_URL'." + echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '3m'" + echo " Can be set as environment variable 'TIMEOUT'." + echo " configFile - Path to a JSON, YAML, PROPERTIES or TOML file containing your settings. Default: '/dv/db-opts.yml'" + echo " Can be set as environment variable 'CONFIG_FILE'." + echo " unblockKey - Either string or path to a file with the Admin API Unblock Key. Optional for localhost. No default." + echo " Can be set as environment variable 'ADMIN_API_UNBLOCK_KEY'." + echo "" + echo "Note: This script will wait for the Dataverse instance to be available before executing the replacement." + echo " Be careful - this script will not stop you from deleting any vital settings." + echo "" + exit 1 +} + +### Common functions +function error { + echo "ERROR:" "$@" >&2 + exit 2 +} + +function exists { + type "$1" >/dev/null 2>&1 && return 0 + ( IFS=:; for p in $PATH; do [ -x "${p%/}/$1" ] && return 0; done; return 1 ) +} + +# Check for (the right) yq, jq, and wait4x being available +if ! exists yq; then + error "No yq executable found on PATH." +elif ! grep -q "https://github.com/mikefarah/yq" <((yq --version)); then + error "You must install yq from https://github.com/mikefarah/yq, not https://github.com/kislyuk/yq" +fi +if ! exists jq; then + error "No jq executable found on PATH." +fi +if ! exists wait4x; then + error "No wait4x executable found on PATH." +fi + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +ADMIN_API_UNBLOCK_KEY=${ADMIN_API_UNBLOCK_KEY:-""} +TIMEOUT=${TIMEOUT:-"3m"} +CONFIG_FILE=${CONFIG_FILE:-"/dv/db-opts.yml"} + +while getopts "u:t:c:b:h" OPTION +do + case "$OPTION" in + u) DATAVERSE_URL="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + c) CONFIG_FILE="$OPTARG" ;; + b) ADMIN_API_UNBLOCK_KEY="$OPTARG" ;; + h) usage;; + \?) usage;; + esac +done +shift $((OPTIND-1)) + +# Define an auth header argument (enabling usage of different ways) +AUTH_HEADER_ARG="" + +# Check for Dataverse Unblock API Key present (option with file/env var) +# This is only required if the host is not localhost (then there may be no key necessary) +if ! [[ "${DATAVERSE_URL}" == *"://localhost"* ]] || [ -n "${ADMIN_API_UNBLOCK_KEY}" ]; then + # The argument should not be empty + if [ -z "${ADMIN_API_UNBLOCK_KEY}" ]; then + error "You must provide the Dataverse API Unblock Key to this script." + # In case it's not empty, check if it's a file path and read the key from there + elif [ -f "${ADMIN_API_UNBLOCK_KEY}" ] && [ -r "${ADMIN_API_UNBLOCK_KEY}" ]; then + echo "Reading Dataverse API Unblock Key from ${ADMIN_API_UNBLOCK_KEY}." + if ! API_KEY_FILE_CONTENT=$(cat "${ADMIN_API_UNBLOCK_KEY}" 2>/dev/null); then + error "Could not read unblock key from file ${ADMIN_API_UNBLOCK_KEY}." + fi + # Validate the key is not empty + if [ -z "${API_KEY_FILE_CONTENT}" ]; then + error "API key file ${ADMIN_API_UNBLOCK_KEY} appears empty." + fi + ADMIN_API_UNBLOCK_KEY="$API_KEY_CONTENT" + fi + # Very basic error check (as there is no clear format or formal spec for the key) + if [ ${#ADMIN_API_UNBLOCK_KEY} -lt 5 ]; then + error "API key appears to be too short (<5 chars)." + fi + + # Build the header argument for Admin API Authentication via unblock key + AUTH_HEADER_ARG="X-Dataverse-unblock-key: ${ADMIN_API_UNBLOCK_KEY}" +fi + +# Check for file with DB options given, file present and readable as well as parseable by yq +# If parseable, render as JSON to temp file +CONV_CONF_FILE=$(mktemp) +if [ -f "${CONFIG_FILE}" ] && [ -r "${CONFIG_FILE}" ]; then + yq -M -o json "${CONFIG_FILE}" > "${CONV_CONF_FILE}" || error "Could not parse config file with yq from ${CONFIG_FILE}." +else + error "Could not read a config file at ${CONFIG_FILE}." +fi + +# Check or wait for Dataverse API being responsive +echo "Waiting for ${DATAVERSE_URL} to become ready in max ${TIMEOUT}." +wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.version + +# Check for Dataverse Admin API endpoints being reachable by retrieving the current DB options, expect blockades! +CURRENT_SETTINGS=$(mktemp) +echo "Retrieving settings from running instance." +# TODO: Do we need to support pre v6.7 style unblock key query parameter? +curl -sSL --fail-with-body -o "${CURRENT_SETTINGS}" -H "${AUTH_HEADER_ARG}" "${DATAVERSE_URL}/api/admin/settings" \ + || error "Failed. Response message: $( cat "${CURRENT_SETTINGS}")" \ + && echo "Success!" + # TODO: while it's nice to have the current settings written out, it may contain sensitive information (so don't). + # && ( echo "Success! Current settings: "; jq '.data' < "$CURRENT_SETTINGS" ) + +# We need to make the settings update atomic. +echo "Replacing settings." +RESPONSE=$(mktemp) +curl -sSL --fail-with-body -o "${RESPONSE}" -X PUT -H "${AUTH_HEADER_ARG}" --json @"${CONV_CONF_FILE}" "${DATAVERSE_URL}/api/admin/settings" \ + || error "Failed. Response message: $( jq ".message" < "${RESPONSE}" )" \ + && ( echo -e "Success!\nOperations executed: "; jq '.data' < "$RESPONSE" ) From 558e8f98992d7f8fddafeb8fc975e8305a7b6b23 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 25 Jul 2025 12:33:12 +0200 Subject: [PATCH 44/69] refactor(ct): extract common utility functions in `apply-db-settings.sh` to `common.sh` #11639 - Moved shared functions (`error`, `exists_on_path`, `require_on_path`) to a new reusable script `common.sh`. - Simplified `apply-db-settings.sh` by sourcing `common.sh` for dependency validation logic. --- .../scripts/apply-db-settings.sh | 24 ++++--------------- .../scripts/util/common.sh | 17 +++++++++++++ 2 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 modules/container-configbaker/scripts/util/common.sh diff --git a/modules/container-configbaker/scripts/apply-db-settings.sh b/modules/container-configbaker/scripts/apply-db-settings.sh index 0d0a2ecedbc..65b9639c07e 100755 --- a/modules/container-configbaker/scripts/apply-db-settings.sh +++ b/modules/container-configbaker/scripts/apply-db-settings.sh @@ -25,29 +25,15 @@ function usage() { exit 1 } -### Common functions -function error { - echo "ERROR:" "$@" >&2 - exit 2 -} - -function exists { - type "$1" >/dev/null 2>&1 && return 0 - ( IFS=:; for p in $PATH; do [ -x "${p%/}/$1" ] && return 0; done; return 1 ) -} +source util/common.sh # Check for (the right) yq, jq, and wait4x being available -if ! exists yq; then - error "No yq executable found on PATH." -elif ! grep -q "https://github.com/mikefarah/yq" <((yq --version)); then +require_on_path yq +if ! grep -q "https://github.com/mikefarah/yq" <((yq --version)); then error "You must install yq from https://github.com/mikefarah/yq, not https://github.com/kislyuk/yq" fi -if ! exists jq; then - error "No jq executable found on PATH." -fi -if ! exists wait4x; then - error "No wait4x executable found on PATH." -fi +require_on_path jq +require_on_path wait4x # Set some defaults as documented DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} diff --git a/modules/container-configbaker/scripts/util/common.sh b/modules/container-configbaker/scripts/util/common.sh new file mode 100644 index 00000000000..91de5257a5c --- /dev/null +++ b/modules/container-configbaker/scripts/util/common.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +function error { + echo "ERROR:" "$@" >&2 + exit 2 +} + +function exists_on_path { + type "$1" >/dev/null 2>&1 && return 0 + ( IFS=:; for p in $PATH; do [ -x "${p%/}/$1" ] && return 0 || echo "${p%/}/$1"; done; return 1 ) +} + +function require_on_path { + if ! exists_on_path "$1"; then + error "No $1 executable found on PATH." + fi +} From 8e220fb674a0e15852ee9066a72b742ecc597382 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 25 Jul 2025 12:34:44 +0200 Subject: [PATCH 45/69] feat(ct): support additional environment variable sources in `apply-db-settings.sh` #11639 - Added `-e`/`ENV_SOURCE` option to specify a file or directory as a source for additional environment variables. - Introduced `read-to-env.sh` utility script to parse environment variables from specified sources. - Updated usage information and parsing logic in `apply-db-settings.sh` to handle the new option. --- .../scripts/apply-db-settings.sh | 21 ++++++++++-- .../scripts/util/read-to-env.sh | 34 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 modules/container-configbaker/scripts/util/read-to-env.sh diff --git a/modules/container-configbaker/scripts/apply-db-settings.sh b/modules/container-configbaker/scripts/apply-db-settings.sh index 65b9639c07e..c7e92ef42b0 100755 --- a/modules/container-configbaker/scripts/apply-db-settings.sh +++ b/modules/container-configbaker/scripts/apply-db-settings.sh @@ -5,7 +5,7 @@ set -euo pipefail function usage() { - echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] [-c configFile] [-b unblockKey]" + echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] [-c configFile] [-b unblockKey] [-e envSource]" echo "" echo "Replace all Database Settings in a running Dataverse installation in an idempotent way." echo "" @@ -15,9 +15,12 @@ function usage() { echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '3m'" echo " Can be set as environment variable 'TIMEOUT'." echo " configFile - Path to a JSON, YAML, PROPERTIES or TOML file containing your settings. Default: '/dv/db-opts.yml'" - echo " Can be set as environment variable 'CONFIG_FILE'." + echo " Can be set as environment variable 'CONFIG_FILE'. May contain \${var} references to env. vars." echo " unblockKey - Either string or path to a file with the Admin API Unblock Key. Optional for localhost. No default." echo " Can be set as environment variable 'ADMIN_API_UNBLOCK_KEY'." + echo " envSource - Path to a file or directory used as source for additional environment variables." + echo " Optional, no default. Can be set as environment variable 'ENV_SOURCE'." + echo " Environment variables from this file or directory structure will be script-local." echo "" echo "Note: This script will wait for the Dataverse instance to be available before executing the replacement." echo " Be careful - this script will not stop you from deleting any vital settings." @@ -26,6 +29,7 @@ function usage() { } source util/common.sh +source util/read-to-env.sh # Check for (the right) yq, jq, and wait4x being available require_on_path yq @@ -40,20 +44,31 @@ DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} ADMIN_API_UNBLOCK_KEY=${ADMIN_API_UNBLOCK_KEY:-""} TIMEOUT=${TIMEOUT:-"3m"} CONFIG_FILE=${CONFIG_FILE:-"/dv/db-opts.yml"} +ENV_SOURCE=${ENV_SOURCE:-""} -while getopts "u:t:c:b:h" OPTION +while getopts "u:t:c:b:e:h" OPTION do case "$OPTION" in u) DATAVERSE_URL="$OPTARG" ;; t) TIMEOUT="$OPTARG" ;; c) CONFIG_FILE="$OPTARG" ;; b) ADMIN_API_UNBLOCK_KEY="$OPTARG" ;; + e) ENV_SOURCE="$OPTARG" ;; h) usage;; \?) usage;; esac done shift $((OPTIND-1)) +##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### +# PARSE CONFIGURATION + +# In case the env source was given as cmd arg, parse it +if [ -n "$ENV_SOURCE" ]; then + read_to_env "$ENV_SOURCE" +fi + + # Define an auth header argument (enabling usage of different ways) AUTH_HEADER_ARG="" diff --git a/modules/container-configbaker/scripts/util/read-to-env.sh b/modules/container-configbaker/scripts/util/read-to-env.sh new file mode 100644 index 00000000000..344cb6e38bf --- /dev/null +++ b/modules/container-configbaker/scripts/util/read-to-env.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" + +# Read from a target into environment variables. +# Parameters: $target +# Case A) If $target is a file, simply source it. +# Case B) If $target is a directory, parse dirs and files in it as variable names and file content as value +function read_to_env() { + local target="$1" + + if [ -f "$target" ] && [ -r "$target" ]; then + set -o allexport + # shellcheck disable=SC1090 + source "$target" + set +o allexport + elif [ -d "$target" ] && [ -r "$target" ] && [ -x "$target" ]; then + # Find all files + FILES=$( find "$target" -type f -printf '%P\n' ) + for FILE in $FILES; do + # Same as MPCONFIG does! + VARNAME=$( echo "$FILE" | tr '[:lower:]' '[:upper:]' | tr '/' '_' ) + VARVAL=$( cat "$target/$FILE") + + # Use printf to create the variable in global scope + printf -v "$VARNAME" '%s' "$VARVAL" + export "${VARNAME?}" + done + else + error "'$target' not a (readable) environment file or directory" + fi +} From bde0384241c56fca1b153c5ebd0f352d0237024d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 25 Jul 2025 12:37:09 +0200 Subject: [PATCH 46/69] feat,refactor(ct): move yq parsing and add envsubst #11639 - Enhanced `apply-db-settings.sh` to support environment variable substitution while parsing the configuration file using `yq` feature `envsubst()`. - Improved wording of error messages for unreadable or unparseable configuration files. - Moved the conversion logic above all interaction with the instance, as we should be sure all env var substitutions fail early (at least before waiting for the instance to come up). --- .../scripts/apply-db-settings.sh | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/modules/container-configbaker/scripts/apply-db-settings.sh b/modules/container-configbaker/scripts/apply-db-settings.sh index c7e92ef42b0..0780dd47f1c 100755 --- a/modules/container-configbaker/scripts/apply-db-settings.sh +++ b/modules/container-configbaker/scripts/apply-db-settings.sh @@ -68,6 +68,19 @@ if [ -n "$ENV_SOURCE" ]; then read_to_env "$ENV_SOURCE" fi +# Check for file with DB options given, file present and readable as well as parseable by yq +# If parseable, render as JSON to temp file +CONV_CONF_FILE=$(mktemp) +if [ -f "${CONFIG_FILE}" ] && [ -r "${CONFIG_FILE}" ]; then + # See https://mikefarah.gitbook.io/yq/operators/env-variable-operators#tip + yq -M -o json '(.. | select(tag == "!!str")) |= envsubst(nu)' "${CONFIG_FILE}" > "${CONV_CONF_FILE}" || error "Could not parse config file with yq from '${CONFIG_FILE}'." + cat "$CONV_CONF_FILE" +else + error "Could not read a config file at '${CONFIG_FILE}'." +fi + +##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### +# API INTERACTION # Define an auth header argument (enabling usage of different ways) AUTH_HEADER_ARG="" @@ -99,15 +112,6 @@ if ! [[ "${DATAVERSE_URL}" == *"://localhost"* ]] || [ -n "${ADMIN_API_UNBLOCK_K AUTH_HEADER_ARG="X-Dataverse-unblock-key: ${ADMIN_API_UNBLOCK_KEY}" fi -# Check for file with DB options given, file present and readable as well as parseable by yq -# If parseable, render as JSON to temp file -CONV_CONF_FILE=$(mktemp) -if [ -f "${CONFIG_FILE}" ] && [ -r "${CONFIG_FILE}" ]; then - yq -M -o json "${CONFIG_FILE}" > "${CONV_CONF_FILE}" || error "Could not parse config file with yq from ${CONFIG_FILE}." -else - error "Could not read a config file at ${CONFIG_FILE}." -fi - # Check or wait for Dataverse API being responsive echo "Waiting for ${DATAVERSE_URL} to become ready in max ${TIMEOUT}." wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.version From 2368d7eb924c9ccfeafeb94ca97adb07837efe6b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 1 Aug 2025 13:31:57 +0200 Subject: [PATCH 47/69] fix(migrate): move `UPDATE` statement post constraint drops for NULL handling Reordered `UPDATE` statement in migration script to occur after dropping constraints, ensuring NULL values in `lang` are handled correctly before setting NOT NULL constraints, but after we remove the initial constraints from v4.15. --- src/main/resources/db/migration/V6.7.0.2.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/resources/db/migration/V6.7.0.2.sql b/src/main/resources/db/migration/V6.7.0.2.sql index 07a568f0834..a7493359ad5 100644 --- a/src/main/resources/db/migration/V6.7.0.2.sql +++ b/src/main/resources/db/migration/V6.7.0.2.sql @@ -3,10 +3,6 @@ -- 2. Update lang column to use empty string default instead of NULL (avoid non-unique pairs) -- 3. Add NOT NULL constraints and unique constraint for name+lang pairs --- First, update any existing NULL lang values to empty string -UPDATE setting SET lang = '' WHERE lang IS NULL; - --- Postgres doesn't support IF NOT EXISTS for ALTER COLUMN or ADD CONSTRAINT, so we need conditional logic DO $$ BEGIN -- These database constraints were added with Dataverse 4.15, but they had no representation in the code, @@ -16,7 +12,12 @@ BEGIN ALTER TABLE setting DROP CONSTRAINT IF EXISTS non_empty_lang; DROP INDEX IF EXISTS unique_settings; + -- Now, update any existing NULL lang values to empty string (we cannot do this before lifting the restrictions) + -- This also needs to be done before we try to alter the table to not allow NULL for setting.lang + UPDATE setting SET lang = '' WHERE lang IS NULL; + -- Only alter columns if they need to be changed + -- (Note: Postgres doesn't support IF NOT EXISTS for ALTER COLUMN or ADD CONSTRAINT, so we need conditional logic) IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'setting' AND column_name = 'name' AND (data_type = 'text' OR is_nullable = 'YES')) THEN From 21ca0b836c4abe027fe74e9d4e6d7d8c03f2c120 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 1 Aug 2025 16:36:02 +0200 Subject: [PATCH 48/69] fix(scripts): exclude hidden mounted files in `read-to-env.sh` Updated `read-to-env.sh` to handle Kubernetes secrets by excluding hidden mounted files and refining file detection logic with `find`. --- modules/container-configbaker/scripts/util/read-to-env.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/container-configbaker/scripts/util/read-to-env.sh b/modules/container-configbaker/scripts/util/read-to-env.sh index 344cb6e38bf..485586521ab 100644 --- a/modules/container-configbaker/scripts/util/read-to-env.sh +++ b/modules/container-configbaker/scripts/util/read-to-env.sh @@ -17,8 +17,8 @@ function read_to_env() { source "$target" set +o allexport elif [ -d "$target" ] && [ -r "$target" ] && [ -x "$target" ]; then - # Find all files - FILES=$( find "$target" -type f -printf '%P\n' ) + # Find all files (K8s secrets are symlinks, so look for not directory & remove the hidden mounted files.) + FILES=$( find "$target" -not -type d -printf '%P\n' | grep -v '^\.\.' ) for FILE in $FILES; do # Same as MPCONFIG does! VARNAME=$( echo "$FILE" | tr '[:lower:]' '[:upper:]' | tr '/' '_' ) From 7f8329226cd21c5aa3480f8351fa069fbcec057e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 1 Aug 2025 17:35:01 +0200 Subject: [PATCH 49/69] chore(scripts): comment out config file print in `apply-db-settings.sh` #11639 Temporarily removed `cat` command for converted config file output. Added a TODO to consider a debug switch for conditional logging in the future. --- modules/container-configbaker/scripts/apply-db-settings.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/container-configbaker/scripts/apply-db-settings.sh b/modules/container-configbaker/scripts/apply-db-settings.sh index 0780dd47f1c..deb897d138c 100755 --- a/modules/container-configbaker/scripts/apply-db-settings.sh +++ b/modules/container-configbaker/scripts/apply-db-settings.sh @@ -74,7 +74,8 @@ CONV_CONF_FILE=$(mktemp) if [ -f "${CONFIG_FILE}" ] && [ -r "${CONFIG_FILE}" ]; then # See https://mikefarah.gitbook.io/yq/operators/env-variable-operators#tip yq -M -o json '(.. | select(tag == "!!str")) |= envsubst(nu)' "${CONFIG_FILE}" > "${CONV_CONF_FILE}" || error "Could not parse config file with yq from '${CONFIG_FILE}'." - cat "$CONV_CONF_FILE" + # TODO: think about adding a debug switch here, not just print + # cat "$CONV_CONF_FILE" else error "Could not read a config file at '${CONFIG_FILE}'." fi From 37015d6fcd55fca787930ca3da6887f87eab6edc Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 1 Aug 2025 18:17:36 +0200 Subject: [PATCH 50/69] fix(settings): handle JSON string unescaping in `SettingsServiceBean` #11639 Updated `SettingsServiceBean` to properly handle unescaping of JSON strings for string-type values. Enhanced tests in `SettingsServiceBeanTest` to validate behavior with new scenarios. --- .../settings/SettingsServiceBean.java | 16 +++++++++++----- .../settings/SettingsServiceBeanTest.java | 19 ++++++++++++------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index c29f94e2672..c555b059523 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -15,6 +15,7 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; @@ -1168,11 +1169,16 @@ static Set convertJsonToSettings(JsonObject settings) { return settings.entrySet().stream() .map(entry -> { String key = entry.getKey(); - String value = entry.getValue().toString() - // This is necessary to avoid storing the quotes inthe DB when a setting is a simple value. - // JsonValue will escape any JsonString with such quotes. - .replaceFirst("^\"", "") - .replaceFirst("\"$", ""); + + String value; + JsonValue jsonValue = entry.getValue(); + if (jsonValue.getValueType() == JsonValue.ValueType.STRING) { + // For string values, get the actual string content (unescaped) + value = ((JsonString) jsonValue).getString(); + } else { + // For objects, arrays, numbers, booleans, null - use JSON representation + value = jsonValue.toString(); + } if (key.contains(L10N_KEY_SEPARATOR)) { // Handle localized settings diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index ed6d5417c06..eb4d67d1835 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -251,18 +251,23 @@ void testConvertJsonToSettings_simpleKeyValues() { // The REST API endpoint presents a JsonObject, which may have number literals in it. // Check that we can cope with that. .add(":Key3", 123456) + // Make sure we deal with quotes + .add(":Key4", " Dataverse © 2014-2025") .build(); // When Set result = SettingsServiceBean.convertJsonToSettings(input); // Then - assertEquals(3, result.size()); - assertEquals( - Set.of(new Setting(":Key1", "Value1"), - new Setting(":Key2", "123456"), - new Setting(":Key3", "123456") - ), result); + Map expectedResults = Map.of( + ":Key1", "Value1", + ":Key2", "123456", + ":Key3", "123456", + ":Key4", " Dataverse © 2014-2025" + ); + for (Setting setting : result) { + assertEquals(expectedResults.get(setting.getName()), setting.getContent()); + } } @Test @@ -277,7 +282,7 @@ void testConvertJsonToSettings_localizedKeysWithSimpleValues() { Set result = SettingsServiceBean.convertJsonToSettings(input); // Then - assertEquals(2, result.size()); + // Note: we do not verify the content with Setting.equals() - but we are not really interested in it as well. assertEquals( Set.of(new Setting(":LocalizedKey", "en", "EnglishValue"), new Setting(":LocalizedKey", "fr", "FrenchValue") From 292855bb0a9a9445e7628d4426b6a3103305cc14 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 1 Aug 2025 16:50:06 -0400 Subject: [PATCH 51/69] doc suggestions for settings API #11639 --- doc/sphinx-guides/source/api/changelog.rst | 10 + doc/sphinx-guides/source/api/native-api.rst | 186 +++++++++++++++--- .../source/installation/config.rst | 26 ++- 3 files changed, 187 insertions(+), 35 deletions(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 5be6c78adce..7a9bd446fb8 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -7,6 +7,16 @@ This API changelog is experimental and we would love feedback on its usefulness. :local: :depth: 1 +v6.9 +---- + +- The way to set per-format size limits for tabular ingest has changed. + JSON input is now used. + See :ref:`:TabularIngestSizeLimit`. +- In the past, the settings API would accept any key and value. + This is no longer the case because validation has been added. + See :ref:`settings_put_single`, for example. + v6.8 ---- diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index c55fc424496..a972894eb57 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -6777,61 +6777,191 @@ Admin This is the administrative part of the API. For security reasons, it is absolutely essential that you block it before allowing public access to a Dataverse installation. -Blocking can be done using settings. -See the ``post-install-api-block.sh`` script in the ``scripts/api`` folder for details. -See :ref:`blocking-api-endpoints` in Securing Your Installation section of the Configuration page of the Installation Guide. +See :ref:`blocking-api-endpoints` in the Installation Guide for details. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. _admin-api-db-settings: + +Manage Database Settings +~~~~~~~~~~~~~~~~~~~~~~~~ + +These are the API endpoints for managing the :ref:`database-settings` listed in the Installation Guide. + +.. _settings_get_all: List All Database Settings -~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: bash -List all settings:: + export SERVER_URL="http://localhost:8080" + + curl "$SERVER_URL/api/admin/settings" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl http://localhost:8080/api/admin/settings + +.. _settings_get_single: + +Get Single Database Setting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: bash + + export SERVER_URL="http://localhost:8080" + export NAME=":UploadMethods" + + curl "$SERVER_URL/api/admin/settings/$NAME" + +The fully expanded example above (without environment variables) looks like this: - GET http://$SERVER/api/admin/settings +.. code-block:: bash + + curl http://localhost:8080/api/admin/settings/:UploadMethods + +.. _settings_get_single_lang: + +Get Single Database Setting With Language/Locale +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A small number of settings, most notably :ref:`:ApplicationTermsOfUse`, can be saved in multiple languages. + +Use two character language codes. + +.. code-block:: bash + + export SERVER_URL="http://localhost:8080" + export NAME=":ApplicationTermsOfUse" + export LANG="en" + + curl "$SERVER_URL/api/admin/settings/$NAME/lang/$LANG" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl http://localhost:8080/api/admin/settings/:ApplicationTermsOfUse/lang/en + +.. _settings_put_single: + +Configure Single Database Setting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: bash + + export SERVER_URL="http://localhost:8080" + export NAME=":InstallationName" + export VALUE="LibreScholar" + + curl -X PUT "$SERVER_URL/api/admin/settings/$NAME" -d "$VALUE" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -X PUT http://localhost:8080/api/admin/settings/:InstallationName -d LibreScholar + +Note: ``NAME`` values are validated for existence and compliance. + +.. _settings_put_single_lang: + +Configure Single Database Setting With Language/Locale +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A small number of settings, most notably :ref:`:ApplicationTermsOfUse`, can be saved in multiple languages. + +Use two character language codes. + +.. code-block:: bash + + export SERVER_URL="http://localhost:8080" + export NAME=":ApplicationTermsOfUse" + export LANG="fr" + + curl -X PUT "$SERVER_URL/api/admin/settings/$NAME/lang/$LANG" --upload-file /tmp/apptou_fr.html + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -X PUT http://localhost:8080/api/admin/settings/:ApplicationTermsOfUse/lang/fr --upload-file /tmp/apptou_fr.html + +Note: ``NAME`` and ``LANG`` values are validated for existence and compliance. .. _settings_put_bulk: Configure All Database Settings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Using a JSON file, replace all settings in a single idempotent and atomic operation and delete any settings not present in that JSON file. + +Use the JSON ``data`` object in output of ``GET /api/admin/settings`` (:ref:`settings_get_all`) for the JSON input structure for this endpoint. +To put this concretely, you can save just the ``data`` object for your existing settings to disk by filtering them through ``.jq`` like this: -Replace all settings in a single idempotent and atomic operation:: +.. code-block:: bash - PUT http://$SERVER/api/admin/settings + curl http://localhost:8080/api/admin/settings | jq '.data' > /tmp/all-settings.json -See JSON ``data`` object in output of ``GET /api/admin/settings`` for the JSON input structure for this endpoint. +Then you can use this "all-settings.json" file as a starting point for your input file. The :doc:`../installation/config` page of the Installation Guide has a :ref:`complete list of all the available settings `. +Note that settings in the JSON file are validated for existence and compliance. -Configure Single Database Setting -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. code-block:: bash -Sets setting ``name`` to the body of the request:: + export SERVER_URL="http://localhost:8080" + + curl -X PUT -H "Content-type:application/json" "$SERVER_URL/api/admin/settings" --upload-file /tmp/all-settings.json - PUT http://$SERVER/api/admin/settings/$name +The fully expanded example above (without environment variables) looks like this: -Sets a localized setting ``name`` for locale/language ``lang`` to the body of the request:: +.. code-block:: bash - PUT http://$SERVER/api/admin/settings/$name/lang/$lang + curl -X PUT -H "Content-type:application/json" http://localhost:8080/api/admin/settings --upload-file /tmp/all-settings.json -Get Single Database Setting -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _settings_delete_single: -Get the setting under ``name``:: +Delete Single Database Setting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - GET http://$SERVER/api/admin/settings/$name +.. code-block:: bash -Gets a localized setting under ``name`` for locale/language ``lang``:: + export SERVER_URL="http://localhost:8080" + export NAME=":InstallationName" + + curl -X DELETE "$SERVER_URL/api/admin/settings/$NAME" - GET http://$SERVER/api/admin/settings/$name/lang/$lang +The fully expanded example above (without environment variables) looks like this: -Delete Single Database Setting -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. code-block:: bash + + curl -X DELETE http://localhost:8080/api/admin/settings/:InstallationName + +.. _settings_delete_single_lang: + +Delete Single Database Setting With Language/Locale +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A small number of settings, most notably :ref:`:ApplicationTermsOfUse`, can be saved in multiple languages. -Delete the setting under ``name``:: +Use two character language codes. - DELETE http://$SERVER/api/admin/settings/$name +.. code-block:: bash + + export SERVER_URL="http://localhost:8080" + export NAME=":ApplicationTermsOfUse" + export LANG="fr" + + curl -X DELETE "$SERVER_URL/api/admin/settings/$NAME/lang/$LANG" -Delete a localized setting under ``name`` for locale/language ``lang``:: +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash - DELETE http://$SERVER/api/admin/settings/$name/lang/$lang + curl -X DELETE http://localhost:8080/api/admin/settings/:ApplicationTermsOfUse/lang/fr .. _list-all-feature-flags: diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f064cc51a06..c7f76c04754 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3873,13 +3873,13 @@ You might also create your own profiles and use these, please refer to the upstr Database Settings ----------------- -These settings are stored in the ``setting`` database table but can be read and modified via the "admin" endpoint of the :doc:`/api/native-api` for easy scripting. +These settings are stored in the ``setting`` database table but we recommend using the Admin API (:ref:`admin-api-db-settings`) to view and modify them, as shown below. -The most commonly used configuration options are listed first. +In short: -The pattern you will observe in curl examples below is that an HTTP ``PUT`` is used to add or modify a setting. If you perform an HTTP ``GET`` (the default when using curl), the output will contain the value of the setting, if it has been set. You can also do a ``GET`` of all settings with ``curl http://localhost:8080/api/admin/settings`` which you may want to pretty-print by piping the output through a tool such as jq by appending ``| jq .``. If you want to remove a setting, use an HTTP ``DELETE`` such as ``curl -X DELETE http://localhost:8080/api/admin/settings/:GuidesBaseUrl`` . - -For your convenience, there is also an Admin API endpoint to :ref:`bulk manage database settings in an atomic, idempotent fashion `. +- HTTP ``GET`` is used to show settings. +- HTTP ``PUT`` is used to add or modify settings. +- HTTP ``DELETE`` is used to delete settings. .. _:BlockedApiPolicy: @@ -4374,10 +4374,14 @@ For performance reasons, your Dataverse installation will only allow creation of In the UI, users trying to download a zip file larger than the Dataverse installation's :ZipDownloadLimit will receive messaging that the zip file is too large, and the user will be presented with alternate access options. +.. _:TabularIngestSizeLimit: + :TabularIngestSizeLimit +++++++++++++++++++++++ Threshold in bytes for limiting whether or not "ingest" is attempted for an uploaded tabular file (which can be resource intensive). +For more on the ingest features, see :doc:`/user/tabulardataingest/index` in the User Guide. + For example, with the below in place, files greater than 2 GB in size will not go through the ingest process: ``curl -X PUT -d 2000000000 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` @@ -4394,6 +4398,8 @@ Using a JSON-based setting, you can override this global setting on a per-format - CSV - XLSX +(In previous releases of Dataverse, a colon-separated form was used to specify per-format limits, such as ``:TabularIngestSizeLimit:Rdata``, but this is no longer supported. Now JSON is used.) + The JSON follows this form, all fields optional: .. code:: json @@ -4405,8 +4411,14 @@ The JSON follows this form, all fields optional: "formatZ": "100" } -The ``default`` key represents the global default (with it being absent meaning the implicit global default of ``-1`` applies). -Add a format name (as listed above) to change the limit for this particular format. +Whatever JSON you send will overwrite existing values. If you have any current settings, you can use the following command to see them in the proper format (and then add the new settings you want): + +``curl http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit | jq -r '.data.message'`` + +The ``default`` key is optional and can be used to give limits to formats that are not specified in the JSON. If you omit the ``default`` key or set it to ``-1``, no limits are applied to formats not specified in the JSON. + +Add a format name (DTA, POR, etc., as listed above) to change the limit for this particular format. + Any size limits must be provided as string literals (in quotes), not number literals! Examples: From b7e541d5cdfc2a4d0f15e3f7de81ddb565423217 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 4 Aug 2025 15:56:32 -0400 Subject: [PATCH 52/69] doc tweaks #11639 --- doc/sphinx-guides/source/api/native-api.rst | 8 ++++---- doc/sphinx-guides/source/installation/config.rst | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index a972894eb57..30fb4c24d70 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -6830,7 +6830,7 @@ Get Single Database Setting With Language/Locale A small number of settings, most notably :ref:`:ApplicationTermsOfUse`, can be saved in multiple languages. -Use two character language codes. +Use two-character ISO 639-1 language codes. .. code-block:: bash @@ -6874,7 +6874,7 @@ Configure Single Database Setting With Language/Locale A small number of settings, most notably :ref:`:ApplicationTermsOfUse`, can be saved in multiple languages. -Use two character language codes. +Use two-character ISO 639-1 language codes. .. code-block:: bash @@ -6900,7 +6900,7 @@ Configure All Database Settings Using a JSON file, replace all settings in a single idempotent and atomic operation and delete any settings not present in that JSON file. Use the JSON ``data`` object in output of ``GET /api/admin/settings`` (:ref:`settings_get_all`) for the JSON input structure for this endpoint. -To put this concretely, you can save just the ``data`` object for your existing settings to disk by filtering them through ``.jq`` like this: +To put this concretely, you can save just the ``data`` object for your existing settings to disk by filtering them through ``jq`` like this: .. code-block:: bash @@ -6947,7 +6947,7 @@ Delete Single Database Setting With Language/Locale A small number of settings, most notably :ref:`:ApplicationTermsOfUse`, can be saved in multiple languages. -Use two character language codes. +Use two-character ISO 639-1 language codes. .. code-block:: bash diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c7f76c04754..7e4d85b360f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4389,7 +4389,7 @@ For example, with the below in place, files greater than 2 GB in size will not g You can set this value to ``0`` to prevent files from being ingested at all. The default is ``-1``, meaning no file size limit is applied. -Using a JSON-based setting, you can override this global setting on a per-format basis for the following formats: +Using a JSON-based setting, you can override this global setting on a per-format basis for the following formats (case-insensitive): - DTA - POR @@ -4417,13 +4417,13 @@ Whatever JSON you send will overwrite existing values. If you have any current s The ``default`` key is optional and can be used to give limits to formats that are not specified in the JSON. If you omit the ``default`` key or set it to ``-1``, no limits are applied to formats not specified in the JSON. -Add a format name (DTA, POR, etc., as listed above) to change the limit for this particular format. +Add a format name (DTA, POR, etc., as listed above) to change the limit for that particular format. Any size limits must be provided as string literals (in quotes), not number literals! Examples: -1. If you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB but otherwise unlimited: +1. If you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB but otherwise be unlimited: ``curl -X PUT -d '{"Rdata":"1000000"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` 2. If you want your Dataverse installation to not attempt to ingest XLSX files at all and apply a global limit of 512 MiB, use this setting: From 071f695f13b6516aada232cac5e234407798f710 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 17 Sep 2025 08:26:26 +0200 Subject: [PATCH 53/69] chore: move migrations to be part of v6.9 instead of v6.8 #11639 --- src/main/resources/db/migration/{V6.7.0.1.sql => V6.8.0.1.sql} | 0 src/main/resources/db/migration/{V6.7.0.2.sql => V6.8.0.2.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V6.7.0.1.sql => V6.8.0.1.sql} (100%) rename src/main/resources/db/migration/{V6.7.0.2.sql => V6.8.0.2.sql} (100%) diff --git a/src/main/resources/db/migration/V6.7.0.1.sql b/src/main/resources/db/migration/V6.8.0.1.sql similarity index 100% rename from src/main/resources/db/migration/V6.7.0.1.sql rename to src/main/resources/db/migration/V6.8.0.1.sql diff --git a/src/main/resources/db/migration/V6.7.0.2.sql b/src/main/resources/db/migration/V6.8.0.2.sql similarity index 100% rename from src/main/resources/db/migration/V6.7.0.2.sql rename to src/main/resources/db/migration/V6.8.0.2.sql From efd7bbfc3096479eb2a02cd32704a719b6385200 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Aug 2025 16:25:48 -0400 Subject: [PATCH 54/69] create test methods for getting and setting all settings (and use them) #11639 --- .../edu/harvard/iq/dataverse/api/AdminIT.java | 39 ++++--------------- .../edu/harvard/iq/dataverse/api/UtilIT.java | 14 +++++++ 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 83a62d41b9f..6f3ffaa83b8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -80,9 +80,7 @@ void testSettingsRoundTrip() { UtilIT.setSetting(harmlessSetting, harmlessL10nValue, language); // Step 1: Get current settings state - Response getResponse = given() - .when() - .get("/api/admin/settings"); + Response getResponse = UtilIT.getSettings(); getResponse.then() .assertThat() @@ -111,12 +109,7 @@ void testSettingsRoundTrip() { .body("data.message", equalTo(harmlessValue)); // Step 4: Put back the original settings (this is what we're testing) - Response putResponse = given() - //.header("X-Dataverse-key", "") - .header("Content-Type", "application/json") - .body(originalSettings.toString()) - .when() - .put("/api/admin/settings"); + Response putResponse = UtilIT.setSettings(originalSettings.toString()); putResponse.then() .assertThat() @@ -125,20 +118,14 @@ void testSettingsRoundTrip() { .body("message.message", containsString("successfully updated")); // Step 5: Verify the harmless setting is gone (restored to original state) - Response verifyRestoredResponse = given() - //.header("X-Dataverse-key", "") - .when() - .get("/api/admin/settings" + harmlessSetting.toString()); + Response verifyRestoredResponse = UtilIT.getSetting(harmlessSetting); verifyRestoredResponse.then() .assertThat() .statusCode(NOT_FOUND.getStatusCode()); // Should not exist anymore // Step 6: Verify overall settings state matches original - Response finalGetResponse = given() - //.header("X-Dataverse-key", "") - .when() - .get("/api/admin/settings"); + Response finalGetResponse = UtilIT.getSettings(); finalGetResponse.then() .assertThat() @@ -165,9 +152,7 @@ void testGetAllSettingsWithLocalization() { UtilIT.setSetting(harmlessSetting, harmlessL10nValue, language); // When - Response getResponse = given() - .when() - .get("/api/admin/settings"); + Response getResponse = UtilIT.getSettings(); // Then getResponse.then() @@ -184,11 +169,7 @@ void testGetAllSettingsWithLocalization() { @Test void testPutAllSettingsWithEmptyJson() { // Test error handling for empty JSON - Response response = given() - .header("Content-Type", "application/json") - .body("{}") - .when() - .put("/api/admin/settings"); + Response response = UtilIT.setSettings("{}"); response.then() .assertThat() @@ -199,11 +180,7 @@ void testPutAllSettingsWithEmptyJson() { @Test void testPutAllSettingsWithInvalidSetting() { // Test error handling for empty JSON - Response response = given() - .header("Content-Type", "application/json") - .body("{\":Test1\": \"Foobar\", \":Test2\": \"Foobar\" }") - .when() - .put("/api/admin/settings"); + Response response = UtilIT.setSettings("{\":Test1\": \"Foobar\", \":Test2\": \"Foobar\" }"); response.then() .assertThat() @@ -242,7 +219,7 @@ public void testListAuthenticatedUsers() throws Exception { Response deleteSuperuser = UtilIT.deleteUser(superuserUsername); assertEquals(200, deleteSuperuser.getStatusCode()); - } +} @Test public void testFilterAuthenticatedUsersForbidden() throws Exception { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index aa11ceb0d18..548627554d2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2507,6 +2507,11 @@ static Response deleteSetting(String settingKey) { return response; } + static Response getSettings() { + Response response = given().when().get("/api/admin/settings"); + return response; + } + static Response getSetting(SettingsServiceBean.Key settingKey) { Response response = given().when().get("/api/admin/settings/" + settingKey); return response; @@ -2535,6 +2540,15 @@ public static Response setSetting(String settingKey, String value) { return response; } + public static Response setSettings(String value) { + Response response = given() + .header("Content-Type", "application/json") + .body(value) + .when() + .put("/api/admin/settings"); + return response; + } + static Response getFeatureFlags() { Response response = given().when().get("/api/admin/featureFlags"); return response; From 95533c68486dc8fcb5451c23f7d935c8b268bb7e Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Aug 2025 13:10:27 -0400 Subject: [PATCH 55/69] the "not found" message now ends in a period #11639 Make the test comply with the change. --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index b273502e6ed..6d160c038ff 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2401,7 +2401,7 @@ public void testFileChecksum() { Response getDefaultSetting = UtilIT.getSetting(SettingsServiceBean.Key.FileFixityChecksumAlgorithm); getDefaultSetting.prettyPrint(); getDefaultSetting.then().assertThat() - .body("message", equalTo("Setting :FileFixityChecksumAlgorithm not found")); + .body("message", equalTo("Setting :FileFixityChecksumAlgorithm not found.")); Response uploadMd5File = UtilIT.uploadRandomFile(dataset1PersistentId, apiToken); uploadMd5File.prettyPrint(); From 30f4603521154428a22ff55665f99239b284345d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Aug 2025 15:30:57 -0400 Subject: [PATCH 56/69] add to the enum many archive-related settings #11639 Attempts to set these settings were failing with a 400 and "The name of the setting is invalid." By adding them to the enum, they pass validation and are able to work again. These settings are really subsettings of :ArchiverSettings. (See installation/config.rst.) The exception is :BagGeneratorThreads, which is not a subsetting of :ArchiverSettings. Rather, it is a related setting used by multiple archivers. :DRSArchiverConfig was added to the enum. DRS is a system specific to Harvard which is why we don't document the setting in the guides, but we plan to mention it in the release notes. See also https://github.com/IQSS/dataverse.harvard.edu/issues/177 --- .../11639-db-opts-idempotency.md | 14 ++++++++ .../impl/DRSSubmitToArchiveCommand.java | 3 +- .../impl/DuraCloudSubmitToArchiveCommand.java | 9 ++++-- .../GoogleCloudSubmitToArchiveCommand.java | 6 ++-- .../impl/LocalSubmitToArchiveCommand.java | 3 +- .../impl/S3SubmitToArchiveCommand.java | 3 +- .../settings/SettingsServiceBean.java | 32 +++++++++++++++++++ .../iq/dataverse/util/bagit/BagGenerator.java | 3 +- .../edu/harvard/iq/dataverse/api/BagIT.java | 8 +++-- 9 files changed, 70 insertions(+), 11 deletions(-) diff --git a/doc/release-notes/11639-db-opts-idempotency.md b/doc/release-notes/11639-db-opts-idempotency.md index 2af82f4ffda..2a87a81eff8 100644 --- a/doc/release-notes/11639-db-opts-idempotency.md +++ b/doc/release-notes/11639-db-opts-idempotency.md @@ -19,3 +19,17 @@ As an administrator of a Dataverse instance, you can now make use of enhanced Bu It allows control over all Database Settings from a single source without risking an undefined state. Note: Despite the validation of setting names and languages, the content of any database setting is still not being validated when using the Settings Admin API! + +### Updated Database Settings + +The following database settings are were added to the official list within the code (to remain valid with the settings cleanup mentioned above): + +- :BagGeneratorThreads +- :BagItLocalPath +- :BuiltinUsersKey - formerly BuiltinUsers.KEY +- :DRSArchiverConfig - a Harvard-specific setting +- :DuraCloudContext +- :DuraCloudHost +- :DuraCloudPort +- :GoogleCloudBucket +- :GoogleCloudProject diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 594d4fe25ba..78e8454255b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -56,12 +56,13 @@ import com.auth0.jwt.JWT; import com.auth0.jwt.algorithms.Algorithm; import com.auth0.jwt.exceptions.JWTCreationException; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DRSArchiverConfig; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); - private static final String DRS_CONFIG = ":DRSArchiverConfig"; + private static final String DRS_CONFIG = DRSArchiverConfig.toString(); private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; private static final String S3_PATH = "s3_path"; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 94f983f0c13..fe4a25091d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -7,6 +7,9 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudContext; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudHost; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudPort; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -36,9 +39,9 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final Logger logger = Logger.getLogger(DuraCloudSubmitToArchiveCommand.class.getName()); private static final String DEFAULT_PORT = "443"; private static final String DEFAULT_CONTEXT = "durastore"; - private static final String DURACLOUD_PORT = ":DuraCloudPort"; - private static final String DURACLOUD_HOST = ":DuraCloudHost"; - private static final String DURACLOUD_CONTEXT = ":DuraCloudContext"; + private static final String DURACLOUD_PORT = DuraCloudPort.toString(); + private static final String DURACLOUD_HOST = DuraCloudHost.toString(); + private static final String DURACLOUD_CONTEXT = DuraCloudContext.toString(); public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 7d749262b87..7dfb9f07e19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -14,6 +14,8 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.settings.JvmSettings; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.GoogleCloudBucket; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.GoogleCloudProject; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; import org.apache.commons.codec.binary.Hex; @@ -35,8 +37,8 @@ public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { private static final Logger logger = Logger.getLogger(GoogleCloudSubmitToArchiveCommand.class.getName()); - private static final String GOOGLECLOUD_BUCKET = ":GoogleCloudBucket"; - private static final String GOOGLECLOUD_PROJECT = ":GoogleCloudProject"; + private static final String GOOGLECLOUD_BUCKET = GoogleCloudBucket.toString(); + private static final String GOOGLECLOUD_PROJECT = GoogleCloudProject.toString(); public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index d2f061b6e70..462879f2ec9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagItLocalPath; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.workflow.step.Failure; @@ -38,7 +39,7 @@ public LocalSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion ver public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { logger.fine("In LocalCloudSubmitToArchive..."); - String localPath = requestedSettings.get(":BagItLocalPath"); + String localPath = requestedSettings.get(BagItLocalPath.toString()); String zipName = null; //Set a failure status that will be updated if we succeed diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 4f93e88de5e..65531d775c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.S3ArchiverConfig; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -64,7 +65,7 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { private ManagedExecutorService executorService; private static final Logger logger = Logger.getLogger(S3SubmitToArchiveCommand.class.getName()); - private static final String S3_CONFIG = ":S3ArchiverConfig"; + private static final String S3_CONFIG = S3ArchiverConfig.toString(); private static final Config config = ConfigProvider.getConfig(); protected S3AsyncClient s3 = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index c555b059523..b97de383391 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -466,7 +466,39 @@ Whether Harvesting (OAI) service is enabled */ ArchiverClassName, + /** + * Custom settings for each archiver. See list below. + */ ArchiverSettings, + /** + * :ArchiverSettings used by DRSSubmitToArchiveCommand. DRS is a system + * specific to Harvard which is why we don't document it in the guides. + * See also https://github.com/IQSS/dataverse.harvard.edu/issues/177 + */ + DRSArchiverConfig, + /** + * :ArchiverSettings used by DuraCloudSubmitToArchiveCommand. + */ + DuraCloudPort, + DuraCloudHost, + DuraCloudContext, + /** + * :ArchiverSettings used by GoogleCloudSubmitToArchiveCommand. + */ + GoogleCloudBucket, + GoogleCloudProject, + /** + * :ArchiverSettings used by LocalSubmitToArchiveCommand. + */ + BagItLocalPath, + /** + * :ArchiverSettings used by S3SubmitToArchiveCommand. + */ + S3ArchiverConfig, + /** + * :ArchiverSettings used by multiple archive commands. + */ + BagGeneratorThreads, /** * A comma-separated list of roles for which new dataverses should inherit the * corresponding role assignments from the parent dataverse. Also affects diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index f6b12d5f904..f24ebdb8655 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -75,6 +75,7 @@ import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagGeneratorThreads; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import java.util.Optional; @@ -120,7 +121,7 @@ public class BagGenerator { private boolean usetemp = false; private int numConnections = 8; - public static final String BAG_GENERATOR_THREADS = ":BagGeneratorThreads"; + public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString(); private OREMap oremap; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index c80e321b228..16c44003f35 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -2,6 +2,8 @@ import edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagGeneratorThreads; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagItLocalPath; import io.restassured.RestAssured; import static io.restassured.RestAssured.given; import io.restassured.response.Response; @@ -36,11 +38,13 @@ public static void setUpClass() { setArchiverClassName.then().assertThat() .statusCode(OK.getStatusCode()); - Response setArchiverSettings = UtilIT.setSetting(SettingsServiceBean.Key.ArchiverSettings, ":BagItLocalPath, :BagGeneratorThreads"); + // BagGeneratorThreads isn't used. Consider setting it or removing it. + Response setArchiverSettings = UtilIT.setSetting(SettingsServiceBean.Key.ArchiverSettings, + String.join(", ", BagItLocalPath.toString(), BagGeneratorThreads.toString())); setArchiverSettings.then().assertThat() .statusCode(OK.getStatusCode()); - Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", bagitExportDir); + Response setBagItLocalPath = UtilIT.setSetting(BagItLocalPath.toString(), bagitExportDir); setBagItLocalPath.then().assertThat() .statusCode(OK.getStatusCode()); From 4d72fcbf32b1e014913d87fed9b9e33c4f228dca Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Aug 2025 10:19:08 -0400 Subject: [PATCH 57/69] add basic test for :TabularIngestSizeLimit #11639 --- .../edu/harvard/iq/dataverse/api/FilesIT.java | 80 ++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 38d89f782dd..7848314a834 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -60,6 +60,8 @@ public static void setUpClass() { Response removePublicInstall = UtilIT.deleteSetting(SettingsServiceBean.Key.PublicInstall); removePublicInstall.then().assertThat().statusCode(200); + Response removeLimit = UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit); + removeLimit.then().assertThat().statusCode(OK.getStatusCode()); } @AfterAll @@ -1208,7 +1210,83 @@ public void test_AddFileBadUploadFormat() { } } - + + @Test + public void testIngestSizeLimits() throws InterruptedException, IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + Response makeSuperUser = UtilIT.setSuperuserStatus(username, true); + makeSuperUser.then().assertThat().statusCode(OK.getStatusCode()); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + String tinyCsvOnly = """ +{ + "csv": "50" +} +"""; + + Response setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, tinyCsvOnly); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + Path pathToDataFile = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv"); + String contentOfCsv = "" + + "name,pounds,species,treats\n" + + "Midnight,15,dog,milkbones\n" + + "Tiger,17,cat,cat grass\n" + + "Panther,21,cat,cat nip\n"; + java.nio.file.Files.write(pathToDataFile, contentOfCsv.getBytes()); + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data.csv")); + + String fileId1 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + Response getFileDataTablesForNonTabularFileResponse = UtilIT.getFileDataTables(fileId1, apiToken); + getFileDataTablesForNonTabularFileResponse.prettyPrint(); + getFileDataTablesForNonTabularFileResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); + + String largerCsv = """ +{ + "csv": "123456" +} +"""; + + setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, largerCsv); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data-1.csv")); + + assertTrue(UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToDataFile); + + String fileId2 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + Response getFileDataTablesForTabularFileResponse = UtilIT.getFileDataTables(fileId2, apiToken); + getFileDataTablesForTabularFileResponse.prettyPrint(); + getFileDataTablesForTabularFileResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].varQuantity", equalTo(4)); + + Response removeLimit = UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit); + removeLimit.then().assertThat().statusCode(OK.getStatusCode()); + } + @Test public void testUningestFileViaApi() throws InterruptedException { Response createUser = UtilIT.createRandomUser(); From 159b189b417d38be4930dcbb6662e081c3fd8f0c Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Aug 2025 12:19:47 -0400 Subject: [PATCH 58/69] more ingest size limit tests #11639 --- .../edu/harvard/iq/dataverse/api/FilesIT.java | 113 +++++++++++++++++- 1 file changed, 107 insertions(+), 6 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 7848314a834..c0b8543ff09 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -67,6 +67,7 @@ public static void setUpClass() { @AfterAll public static void tearDownClass() { UtilIT.deleteSetting(SettingsServiceBean.Key.PublicInstall); + UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit); } /** @@ -1252,9 +1253,9 @@ public void testIngestSizeLimits() throws InterruptedException, IOException { String fileId1 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - Response getFileDataTablesForNonTabularFileResponse = UtilIT.getFileDataTables(fileId1, apiToken); - getFileDataTablesForNonTabularFileResponse.prettyPrint(); - getFileDataTablesForNonTabularFileResponse.then().assertThat() + Response getTabularFails = UtilIT.getFileDataTables(fileId1, apiToken); + getTabularFails.prettyPrint(); + getTabularFails.then().assertThat() .statusCode(BAD_REQUEST.getStatusCode()) .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); @@ -1277,9 +1278,109 @@ public void testIngestSizeLimits() throws InterruptedException, IOException { String fileId2 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - Response getFileDataTablesForTabularFileResponse = UtilIT.getFileDataTables(fileId2, apiToken); - getFileDataTablesForTabularFileResponse.prettyPrint(); - getFileDataTablesForTabularFileResponse.then().assertThat() + Response getTabularWorks = UtilIT.getFileDataTables(fileId2, apiToken); + getTabularWorks.prettyPrint(); + getTabularWorks.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].varQuantity", equalTo(4)); + + String tinyDefaultSize = """ +{ + "default": "50" +} +"""; + + setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, tinyDefaultSize); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data-2.csv")); + + String fileId3 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + getTabularFails = UtilIT.getFileDataTables(fileId3, apiToken); + getTabularFails.prettyPrint(); + getTabularFails.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); + + // The behavior of `"default": "-2"` is not documented in the guides + // but it acts like `"default": "0"` which disables ingest. + String unexpectedNegativeDefault = """ +{ + "default": "-2" +} +"""; + + setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, unexpectedNegativeDefault); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data-3.csv")); + + String fileId4 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + getTabularFails = UtilIT.getFileDataTables(fileId4, apiToken); + getTabularFails.prettyPrint(); + getTabularFails.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); + + // As the guides say, you MUST provide a string, not a JSON number. + // That is, `"123"` in quotes rather than `123` with no quotes. + // If you provide a number (no quotes) rather than a string, + // all ingest will be disabled and you'll see an error in server.log + // about how the system is misconfigured. + String invalidNonString = """ +{ + "default": 987654321 +} +"""; + + setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, invalidNonString); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data-4.csv")); + + String fileId5 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + getTabularFails = UtilIT.getFileDataTables(fileId5, apiToken); + getTabularFails.prettyPrint(); + getTabularFails.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); + + String defaultDisabledAndLargeCsvLimit = """ +{ + "default": "0", + "csv": "123456" +} +"""; + + setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, defaultDisabledAndLargeCsvLimit); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data-5.csv")); + + String fileId6 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + getTabularWorks = UtilIT.getFileDataTables(fileId2, apiToken); + getTabularWorks.prettyPrint(); + getTabularWorks.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].varQuantity", equalTo(4)); From 839dd79fa2a4d390c009ac4e02ffbf5f043419e3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Aug 2025 12:20:11 -0400 Subject: [PATCH 59/69] improve docs based on ingest size limit testing #11639 --- .../source/installation/config.rst | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7e4d85b360f..f33c04eada8 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4380,46 +4380,47 @@ In the UI, users trying to download a zip file larger than the Dataverse install +++++++++++++++++++++++ Threshold in bytes for limiting whether or not "ingest" is attempted for an uploaded tabular file (which can be resource intensive). -For more on the ingest features, see :doc:`/user/tabulardataingest/index` in the User Guide. +For more on the ingest feature, see :doc:`/user/tabulardataingest/index` in the User Guide. -For example, with the below in place, files greater than 2 GB in size will not go through the ingest process: +There are two ways to specify ingest size limits. You can set a global limit for all file types or you can use a JSON file for more granularity. We'll cover the global limit first. + +With the following value in place (again, expressed in bytes), files greater than 2 GB in size will not go through the ingest process: ``curl -X PUT -d 2000000000 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` You can set this value to ``0`` to prevent files from being ingested at all. -The default is ``-1``, meaning no file size limit is applied. -Using a JSON-based setting, you can override this global setting on a per-format basis for the following formats (case-insensitive): +Out of the box, the ``:TabularIngestSizeLimit`` setting is absent, which results in ingest being attempted no matter how large the file is. You can specify this "no size limit" default explicitly with the value ``-1``. + +Using a JSON-based setting, you can set a global default and per-format limits for the following formats: +- CSV - DTA - POR -- SAV - Rdata -- CSV +- SAV - XLSX (In previous releases of Dataverse, a colon-separated form was used to specify per-format limits, such as ``:TabularIngestSizeLimit:Rdata``, but this is no longer supported. Now JSON is used.) -The JSON follows this form, all fields optional: +The expected JSON is an object with key/value pairs like the following. Format names are case-insensitive, and all fields are optional. The size limits must be strings with double quotes around them (e.g. ``"10"``) rather than numbers (e.g. ``10``). .. code:: json { "default": "-1", - "formatX": "0", - "formatY": "10", - "formatZ": "100" + "csv": "0", + "dta": "10", + "por": "100" } -Whatever JSON you send will overwrite existing values. If you have any current settings, you can use the following command to see them in the proper format (and then add the new settings you want): +Whatever JSON you send will overwrite existing values. If you have any exiting ``:TabularIngestSizeLimit`` settings, you can use the following command to see them in the expected input format above (and then add the new settings you want): ``curl http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit | jq -r '.data.message'`` -The ``default`` key is optional and can be used to give limits to formats that are not specified in the JSON. If you omit the ``default`` key or set it to ``-1``, no limits are applied to formats not specified in the JSON. +The ``default`` key is optional and can be used to give limits to formats that are not specified in the JSON. If you omit the ``default`` key or set it to ``"-1"``, no limits are applied to formats not specified in the JSON. If you set it to ``"0"``, ingest will be disabled (but you can override this per-format). -Add a format name (DTA, POR, etc., as listed above) to change the limit for that particular format. - -Any size limits must be provided as string literals (in quotes), not number literals! +Add a format name (``csv``, ``dta``, etc., as listed above) to change the limit for that particular format. Examples: @@ -4429,6 +4430,9 @@ Examples: 2. If you want your Dataverse installation to not attempt to ingest XLSX files at all and apply a global limit of 512 MiB, use this setting: ``curl -X PUT -d '{"default":"536870912", "XSLX":"0"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` +3. If you want your Dataverse installation to not attempt to ingest files at all except for CSV files that are 256 MiB or smaller, use this setting: + + ``curl -X PUT -d '{"default":"0", "CSV":"268435456"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` :ZipUploadFilesLimit ++++++++++++++++++++ From 2450ecd0345377619de25f0990f564b8b52e23b5 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Aug 2025 16:15:39 -0400 Subject: [PATCH 60/69] add more settings to enum to pass validation #11639 - :BagItHandlerEnabled - :BagValidatorJobPoolSize - :BagValidatorJobWaitInterval - :BagValidatorMaxErrors - :CreateDataFilesMaxErrorsToDisplay - :FileCategories - :LDNAnnounceRequiredFields - :LDNTarget These settings were originally added (not to the enum) in the following PRs: - https://github.com/IQSS/dataverse/pull/8478 - https://github.com/IQSS/dataverse/pull/8775 - https://github.com/IQSS/dataverse/pull/8677 --- doc/release-notes/11639-db-opts-idempotency.md | 8 ++++++++ .../iq/dataverse/DataFileCategoryServiceBean.java | 3 ++- .../harvard/iq/dataverse/EditDataFilesPageHelper.java | 3 ++- .../iq/dataverse/settings/SettingsServiceBean.java | 11 +++++++++++ .../harvard/iq/dataverse/util/bagit/BagValidator.java | 9 ++++++--- .../dataverse/util/file/BagItFileHandlerFactory.java | 3 ++- .../internalspi/LDNAnnounceDatasetVersionStep.java | 6 ++++-- 7 files changed, 35 insertions(+), 8 deletions(-) diff --git a/doc/release-notes/11639-db-opts-idempotency.md b/doc/release-notes/11639-db-opts-idempotency.md index 2a87a81eff8..55ed07c998c 100644 --- a/doc/release-notes/11639-db-opts-idempotency.md +++ b/doc/release-notes/11639-db-opts-idempotency.md @@ -25,11 +25,19 @@ Note: Despite the validation of setting names and languages, the content of any The following database settings are were added to the official list within the code (to remain valid with the settings cleanup mentioned above): - :BagGeneratorThreads +- :BagItHandlerEnabled - :BagItLocalPath +- :BagValidatorJobPoolSize +- :BagValidatorJobWaitInterval +- :BagValidatorMaxErrors - :BuiltinUsersKey - formerly BuiltinUsers.KEY +- :CreateDataFilesMaxErrorsToDisplay - :DRSArchiverConfig - a Harvard-specific setting - :DuraCloudContext - :DuraCloudHost - :DuraCloudPort +- :FileCategories - :GoogleCloudBucket - :GoogleCloudProject +- :LDNAnnounceRequiredFields +- :LDNTarget diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java index 29dcb22c3ec..d29b5670952 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileCategoryServiceBean.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.FileCategories; import edu.harvard.iq.dataverse.util.BundleUtil; import jakarta.ejb.EJB; @@ -21,7 +22,7 @@ @Stateless public class DataFileCategoryServiceBean { - public static final String FILE_CATEGORIES_KEY = ":FileCategories"; + public static final String FILE_CATEGORIES_KEY = FileCategories.toString(); @EJB private SettingsServiceBean settingsService; diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java index 883baeedef4..7b5c3aa0857 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.CreateDataFilesMaxErrorsToDisplay; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import org.apache.commons.text.StringEscapeUtils; @@ -18,7 +19,7 @@ @Stateless public class EditDataFilesPageHelper { - public static final String MAX_ERRORS_TO_DISPLAY_SETTING = ":CreateDataFilesMaxErrorsToDisplay"; + public static final String MAX_ERRORS_TO_DISPLAY_SETTING = CreateDataFilesMaxErrorsToDisplay.toString(); public static final Integer MAX_ERRORS_TO_DISPLAY = 5; @Inject diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index b97de383391..7b6580068c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -499,6 +499,13 @@ Whether Harvesting (OAI) service is enabled * :ArchiverSettings used by multiple archive commands. */ BagGeneratorThreads, + /** + * Various BagIt settings. + */ + BagValidatorJobPoolSize, + BagValidatorMaxErrors, + BagValidatorJobWaitInterval, + BagItHandlerEnabled, /** * A comma-separated list of roles for which new dataverses should inherit the * corresponding role assignments from the parent dataverse. Also affects @@ -675,6 +682,8 @@ Whether Harvesting (OAI) service is enabled * LDN Inbox Allowed Hosts - a comma separated list of IP addresses allowed to submit messages to the inbox */ LDNMessageHosts, + LDNAnnounceRequiredFields, + LDNTarget, /* * Allow a custom JavaScript to control values of specific fields. @@ -731,6 +740,8 @@ Whether Harvesting (OAI) service is enabled * files *with* the variable names line up top. */ StoreIngestedTabularFilesWithVarHeaders, + FileCategories, + CreateDataFilesMaxErrorsToDisplay, ContactFeedbackMessageSizeLimit, //Experimental setting to allow connecting to a GET external search service expecting a GET request with query parameter mirroring the search API query parameters (without search_service) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java index a9052bf4c80..85a2f3f09ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java @@ -1,5 +1,8 @@ package edu.harvard.iq.dataverse.util.bagit; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagValidatorJobPoolSize; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagValidatorJobWaitInterval; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagValidatorMaxErrors; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.bagit.BagValidation.FileValidationResult; import edu.harvard.iq.dataverse.util.bagit.ManifestReader.ManifestChecksum; @@ -27,9 +30,9 @@ public class BagValidator { private static final Logger logger = Logger.getLogger(BagValidator.class.getCanonicalName()); public static enum BagValidatorSettings { - JOB_POOL_SIZE(":BagValidatorJobPoolSize", 4), - MAX_ERRORS(":BagValidatorMaxErrors", 5), - JOB_WAIT_INTERVAL(":BagValidatorJobWaitInterval", 10); + JOB_POOL_SIZE(BagValidatorJobPoolSize.toString(), 4), + MAX_ERRORS(BagValidatorMaxErrors.toString(), 5), + JOB_WAIT_INTERVAL(BagValidatorJobWaitInterval.toString(), 10); private String settingsKey; private Integer defaultValue; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java index 4b0263030dc..1d1b6b5b7aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.util.file; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagItHandlerEnabled; import edu.harvard.iq.dataverse.util.bagit.BagValidator; import edu.harvard.iq.dataverse.util.bagit.BagValidator.BagValidatorSettings; import edu.harvard.iq.dataverse.util.bagit.ManifestReader; @@ -25,7 +26,7 @@ public class BagItFileHandlerFactory implements Serializable { private static final Logger logger = Logger.getLogger(BagItFileHandlerFactory.class.getCanonicalName()); - public static final String BAGIT_HANDLER_ENABLED_SETTING = ":BagItHandlerEnabled"; + public static final String BAGIT_HANDLER_ENABLED_SETTING = BagItHandlerEnabled.toString(); @EJB private SettingsServiceBean settingsService; diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 124eea801d9..d96c4db1305 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.LDNAnnounceRequiredFields; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.LDNTarget; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; @@ -52,8 +54,8 @@ public class LDNAnnounceDatasetVersionStep implements WorkflowStep { private static final Logger logger = Logger.getLogger(LDNAnnounceDatasetVersionStep.class.getName()); - private static final String REQUIRED_FIELDS = ":LDNAnnounceRequiredFields"; - private static final String LDN_TARGET = ":LDNTarget"; + private static final String REQUIRED_FIELDS = LDNAnnounceRequiredFields.toString(); + private static final String LDN_TARGET = LDNTarget.toString(); private static final String RELATED_PUBLICATION = "publication"; JsonLDTerm publicationIDType = null; From 692be46ed6f68d9e8a00e8af7028324bda4b3fec Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Aug 2025 16:58:51 -0400 Subject: [PATCH 61/69] add TODOs for WorkflowsAdmin#IP_WHITELIST_KEY db setting #11639 --- .../harvard/iq/dataverse/api/Workflows.java | 1 + .../iq/dataverse/api/WorkflowsAdmin.java | 1 + .../edu/harvard/iq/dataverse/api/UtilIT.java | 19 ++++++ .../harvard/iq/dataverse/api/WorkflowsIT.java | 62 +++++++++++++++++++ tests/integration-tests.txt | 2 +- 5 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java b/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java index 4eadcedf71a..f4bac8e58cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java @@ -60,6 +60,7 @@ private boolean isAllowed(IpAddress addr) { private void updateWhitelist() { IpGroup updatedList = new IpGroup(); + // TODO: Investigate if this still works now that validateSettingName is in place. String[] ips = settingsSvc.get(WorkflowsAdmin.IP_WHITELIST_KEY, "127.0.0.1;::1").split(";"); Arrays.stream(ips) .forEach( str -> updatedList.add( diff --git a/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java b/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java index 15478aacff7..bf48ee660ad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java @@ -31,6 +31,7 @@ @Path("admin/workflows") public class WorkflowsAdmin extends AbstractApiBean { + // TODO: To comply with validateSettingName, prepend with a colon? public static final String IP_WHITELIST_KEY="WorkflowsAdmin#IP_WHITELIST_KEY"; @EJB diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 548627554d2..b836d62492c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -4641,6 +4641,25 @@ static Response updateDatasetTypeAvailableLicense(String idOrName, String jsonAr .put("/api/datasets/datasetTypes/" + idOrName + "/licenses"); } + public static Response getWorkflowIpWhitelist() { + Response response = given() + .get("/api/admin/workflows/ip-whitelist"); + return response; + } + + public static Response setWorkflowIpWhitelist(String iPWhitelist) { + Response response = given() + .body(iPWhitelist) + .put("/api/admin/workflows/ip-whitelist"); + return response; + } + + public static Response deleteWorkflowIpWhitelist() { + Response response = given() + .delete("/api/admin/workflows/ip-whitelist"); + return response; + } + static Response registerOidcUser(String jsonIn, String bearerToken) { return given() .header(HttpHeaders.AUTHORIZATION, bearerToken) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java new file mode 100644 index 00000000000..9e8c36b9e18 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java @@ -0,0 +1,62 @@ +package edu.harvard.iq.dataverse.api; + +import static edu.harvard.iq.dataverse.api.WorkflowsAdmin.IP_WHITELIST_KEY; +import io.restassured.RestAssured; +import static io.restassured.RestAssured.given; +import io.restassured.response.Response; +import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; +import static jakarta.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.equalTo; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class WorkflowsIT { + + @BeforeAll + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + + UtilIT.deleteWorkflowIpWhitelist(); + } + + @AfterAll + public static void afterClass() { + } + + @Test + public void testIpWhitelist() { + Response response = null; + + response = UtilIT.getWorkflowIpWhitelist(); + response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("127.0.0.1;::1")); + + response = UtilIT.setWorkflowIpWhitelist("junk"); + response.prettyPrint(); + response.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo("Request contains illegal IP addresses.")); + + response = UtilIT.setWorkflowIpWhitelist("192.168.0.1;192.168.0.2"); + response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + response = UtilIT.getWorkflowIpWhitelist(); + response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("192.168.0.1;192.168.0.2")); + + // FIXME fix 500 error + response = given().when().get("/api/admin/settings/" + IP_WHITELIST_KEY); + response.prettyPrint(); + response.then().assertThat() + .statusCode(INTERNAL_SERVER_ERROR.getStatusCode()); + } + +} diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 2a15ac3ce74..33d137c893a 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT,SavedSearchIT,DatasetTypesIT,DataverseFeaturedItemsIT,SendFeedbackApiIT,CustomizationIT,JsonLDExportIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT,SavedSearchIT,DatasetTypesIT,DataverseFeaturedItemsIT,SendFeedbackApiIT,CustomizationIT,JsonLDExportIT,WorkflowsIT From a43da296892718fd38ded56ff6f4cfd976a0757b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Sep 2025 14:15:56 +0200 Subject: [PATCH 62/69] style(settings): rename `WorkflowsAdmin#IP_WHITELIST_KEY` to `:WorkflowsAdminIpWhitelist` #11639 Updated `SettingsServiceBean`, `WorkflowsAdmin API`, and Flyway migration script to reflect the new name, enforcing alignment with the standardized naming pattern. Adjusted default value logic and separator constants. --- .../11639-db-opts-idempotency.md | 42 ++++++++++--------- .../harvard/iq/dataverse/api/Workflows.java | 5 ++- .../iq/dataverse/api/WorkflowsAdmin.java | 19 +++++---- .../settings/SettingsServiceBean.java | 5 +++ src/main/resources/db/migration/V6.8.0.1.sql | 11 ++++- .../harvard/iq/dataverse/api/WorkflowsIT.java | 16 +++---- 6 files changed, 59 insertions(+), 39 deletions(-) diff --git a/doc/release-notes/11639-db-opts-idempotency.md b/doc/release-notes/11639-db-opts-idempotency.md index 55ed07c998c..f73cbdebf83 100644 --- a/doc/release-notes/11639-db-opts-idempotency.md +++ b/doc/release-notes/11639-db-opts-idempotency.md @@ -5,9 +5,10 @@ With this release, we remove some legacy specialties around Database Settings an Most important changes: 1. Setting `BuiltinUsers.KEY` was renamed to `:BuiltinUsersKey`, aligned with our general naming pattern for options. -2. Setting `:TabularIngestSizeLimit` no longer uses suffixes for formats and becomes a JSON-based setting instead. -3. If set, both settings will be migrated to their new form automatically for you (Flyway migration). -4. You can no longer (accidentally) create or use arbitrary setting names or languages. +2. Setting `WorkflowsAdmin#IP_WHITELIST_KEY` was renamed to `:WorkflowsAdminIpWhitelist`, aligned with our general naming pattern for options. +3. Setting `:TabularIngestSizeLimit` no longer uses suffixes for formats and becomes a JSON-based setting instead. +4. If set, all three settings will be migrated to their new form automatically for you (Flyway migration). +5. You can no longer (accidentally) create or use arbitrary setting names or languages. All Admin API endpoints for settings now validate setting names and languages for existence and compliance. As an administrator of a Dataverse instance, you can now make use of enhanced Bulk Operations on the Settings Admin API: @@ -24,20 +25,21 @@ Note: Despite the validation of setting names and languages, the content of any The following database settings are were added to the official list within the code (to remain valid with the settings cleanup mentioned above): -- :BagGeneratorThreads -- :BagItHandlerEnabled -- :BagItLocalPath -- :BagValidatorJobPoolSize -- :BagValidatorJobWaitInterval -- :BagValidatorMaxErrors -- :BuiltinUsersKey - formerly BuiltinUsers.KEY -- :CreateDataFilesMaxErrorsToDisplay -- :DRSArchiverConfig - a Harvard-specific setting -- :DuraCloudContext -- :DuraCloudHost -- :DuraCloudPort -- :FileCategories -- :GoogleCloudBucket -- :GoogleCloudProject -- :LDNAnnounceRequiredFields -- :LDNTarget +- `:BagGeneratorThreads` +- `:BagItHandlerEnabled` +- `:BagItLocalPath` +- `:BagValidatorJobPoolSize` +- `:BagValidatorJobWaitInterval` +- `:BagValidatorMaxErrors` +- `:BuiltinUsersKey` - formerly `BuiltinUsers.KEY` +- `:CreateDataFilesMaxErrorsToDisplay` +- `:DRSArchiverConfig` - a Harvard-specific setting +- `:DuraCloudContext` +- `:DuraCloudHost` +- `:DuraCloudPort` +- `:FileCategories` +- `:GoogleCloudBucket` +- `:GoogleCloudProject` +- `:LDNAnnounceRequiredFields` +- `:LDNTarget` +- `:WorkflowsAdminIpWhitelist` - formerly `WorkflowsAdmin#IP_WHITELIST_KEY` diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java b/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java index f4bac8e58cd..7bd19b3a403 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Workflows.java @@ -3,6 +3,8 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddressRange; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.workflow.PendingWorkflowInvocation; import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import java.util.Arrays; @@ -60,8 +62,7 @@ private boolean isAllowed(IpAddress addr) { private void updateWhitelist() { IpGroup updatedList = new IpGroup(); - // TODO: Investigate if this still works now that validateSettingName is in place. - String[] ips = settingsSvc.get(WorkflowsAdmin.IP_WHITELIST_KEY, "127.0.0.1;::1").split(";"); + String[] ips = settingsSvc.getValueForKey(Key.WorkflowsAdminIpWhitelist, WorkflowsAdmin.DEFAULT_IP_ALLOWLIST).split(WorkflowsAdmin.IP_SEPARATOR); Arrays.stream(ips) .forEach( str -> updatedList.add( IpAddressRange.makeSingle( diff --git a/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java b/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java index bf48ee660ad..ecb7248cae9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java @@ -3,6 +3,8 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonParser; + +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.WorkflowsAdminIpWhitelist; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -30,9 +32,9 @@ */ @Path("admin/workflows") public class WorkflowsAdmin extends AbstractApiBean { - - // TODO: To comply with validateSettingName, prepend with a colon? - public static final String IP_WHITELIST_KEY="WorkflowsAdmin#IP_WHITELIST_KEY"; + + public static final String IP_SEPARATOR = ";"; + public static final String DEFAULT_IP_ALLOWLIST = "127.0.0.1" + IP_SEPARATOR + "::1"; @EJB WorkflowServiceBean workflows; @@ -154,14 +156,14 @@ public Response deleteWorkflow(@PathParam("id") String id ) { @Path("/ip-whitelist") @GET public Response getIpWhitelist() { - return ok( settingsSvc.get(IP_WHITELIST_KEY, "127.0.0.1;::1") ); + return ok( settingsSvc.getValueForKey(WorkflowsAdminIpWhitelist, DEFAULT_IP_ALLOWLIST) ); } @Path("/ip-whitelist") @PUT public Response setIpWhitelist(String body) { String ipList = body.trim(); - String[] ips = ipList.split(";"); + String[] ips = ipList.split(IP_SEPARATOR); boolean allIpsOk = Arrays.stream(ips).allMatch(ip->{ try { IpAddress.valueOf(ip); @@ -171,18 +173,17 @@ public Response setIpWhitelist(String body) { } } ); if (allIpsOk) { - settingsSvc.set(IP_WHITELIST_KEY, ipList); - return ok( settingsSvc.get(IP_WHITELIST_KEY, "127.0.0.1;::1") ); + settingsSvc.setValueForKey(WorkflowsAdminIpWhitelist, ipList); + return ok( settingsSvc.getValueForKey(WorkflowsAdminIpWhitelist, DEFAULT_IP_ALLOWLIST) ); } else { return badRequest("Request contains illegal IP addresses."); } - } @Path("/ip-whitelist") @DELETE public Response deleteIpWhitelist() { - settingsSvc.delete(IP_WHITELIST_KEY); + settingsSvc.deleteValueForKey(WorkflowsAdminIpWhitelist); return ok( "Restored whitelist to default (127.0.0.1;::1)" ); } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 7b6580068c0..e23727ed7eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -171,6 +171,11 @@ public enum Key { @Deprecated(forRemoval = true, since = "2025-04-29") BlockedApiPolicy, + /** + * Semicolon separated allowlist of IP addresses allowed administrative access to workflows. + */ + WorkflowsAdminIpWhitelist, + /** * A special secret that, if set, needs to be given when trying to manage internal users. * This key was formerly known as "BuiltinUsers.KEY", which never was a setting name aligning with the others. diff --git a/src/main/resources/db/migration/V6.8.0.1.sql b/src/main/resources/db/migration/V6.8.0.1.sql index 656ef04d572..8e810270b06 100644 --- a/src/main/resources/db/migration/V6.8.0.1.sql +++ b/src/main/resources/db/migration/V6.8.0.1.sql @@ -85,4 +85,13 @@ DO $$ INSERT INTO setting (name, lang, content) VALUES (':BuiltinUsersKey', NULL, (SELECT content FROM setting WHERE name = 'BuiltinUsers.KEY')); DELETE FROM setting WHERE name = 'BuiltinUsers.KEY'; END IF; - END $$; \ No newline at end of file + END $$; + +-- 3. Migrate WorkflowsAdmin#IP_WHITELIST_KEY to the new setting name +DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY') THEN + INSERT INTO setting (name, lang, content) VALUES (':WorkflowsAdminIpWhitelist', NULL, (SELECT content FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY')); + DELETE FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY'; + END IF; + END $$; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java index 9e8c36b9e18..4b94fe6ee68 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/WorkflowsIT.java @@ -1,6 +1,6 @@ package edu.harvard.iq.dataverse.api; -import static edu.harvard.iq.dataverse.api.WorkflowsAdmin.IP_WHITELIST_KEY; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import io.restassured.RestAssured; import static io.restassured.RestAssured.given; import io.restassured.response.Response; @@ -35,13 +35,15 @@ public void testIpWhitelist() { .statusCode(OK.getStatusCode()) .body("data.message", equalTo("127.0.0.1;::1")); + String testIp = "192.168.0.1;192.168.0.2"; + response = UtilIT.setWorkflowIpWhitelist("junk"); response.prettyPrint(); response.then().assertThat() .statusCode(BAD_REQUEST.getStatusCode()) .body("message", equalTo("Request contains illegal IP addresses.")); - response = UtilIT.setWorkflowIpWhitelist("192.168.0.1;192.168.0.2"); + response = UtilIT.setWorkflowIpWhitelist(testIp); response.prettyPrint(); response.then().assertThat() .statusCode(OK.getStatusCode()); @@ -50,13 +52,13 @@ public void testIpWhitelist() { response.prettyPrint(); response.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.message", equalTo("192.168.0.1;192.168.0.2")); - - // FIXME fix 500 error - response = given().when().get("/api/admin/settings/" + IP_WHITELIST_KEY); + .body("data.message", equalTo(testIp)); + + response = given().when().get("/api/admin/settings/" + SettingsServiceBean.Key.WorkflowsAdminIpWhitelist); response.prettyPrint(); response.then().assertThat() - .statusCode(INTERNAL_SERVER_ERROR.getStatusCode()); + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo(testIp)); } } From 2a97dd74e043dce8fc89544aabddecc64a03e5ad Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Sep 2025 14:22:46 +0200 Subject: [PATCH 63/69] chore(settings): mark `SettingsServiceBean` methods using String for lookup as deprecated for removal In the future, we should entirely remove lookup of arbitrary strings in the database for settings. Instead, we need a discovery mechanism for keys that allow plugins to register and lookup their own database settings. --- .../iq/dataverse/settings/SettingsServiceBean.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index e23727ed7eb..e5fce1a2349 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -817,6 +817,7 @@ public static SettingsServiceBean.Key parse(String key) { * @param name of the setting * @return the actual setting, or {@code null}. */ + @Deprecated(since = "6.9", forRemoval = true) public String get( String name ) { List tokens = em.createNamedQuery("Setting.findByName", Setting.class) .setParameter("name", name ) @@ -961,11 +962,13 @@ public Boolean getValueForCompoundKeyAsBoolean(Key key, String param) { * @param defaultValue The value to return if no setting is found in the DB. * @return Either the stored value, or the default value. */ + @Deprecated(since = "6.9", forRemoval = true) public String get( String name, String defaultValue ) { String val = get(name); return (val!=null) ? val : defaultValue; } + @Deprecated(since = "6.9", forRemoval = true) public String get(String name, String lang, String defaultValue ) { // Database safeguard, as the default is an empty string if (lang == null) lang = ""; @@ -991,7 +994,8 @@ public String getValueForKey( Key key, String lang, String defaultValue ) { return get( key.toString(), lang, defaultValue ); } - + + @Deprecated(since = "6.9", forRemoval = true) public Setting set( String name, String content ) { Setting s = null; @@ -1015,6 +1019,7 @@ public Setting set( String name, String content ) { return s; } + @Deprecated(since = "6.9", forRemoval = true) public Setting set( String name, String lang, String content ) { // Database safeguard, as the default is an empty string if (lang == null) lang = ""; @@ -1053,6 +1058,7 @@ public Setting setValueForKey( Key key, String content ) { * @param defaultValue logical value of {@code null}. * @return boolean value of the setting. */ + @Deprecated(since = "6.9", forRemoval = true) public boolean isTrue( String name, boolean defaultValue ) { String val = get(name); return ( val==null ) ? defaultValue : StringUtil.isTrue(val); @@ -1079,6 +1085,7 @@ public void deleteValueForKey( Key name ) { delete( name.toString() ); } + @Deprecated(since = "6.9", forRemoval = true) public void delete( String name ) { actionLogSvc.log( new ActionLogRecord(ActionLogRecord.ActionType.Setting, "delete") .setInfo(name)); @@ -1087,6 +1094,7 @@ public void delete( String name ) { .executeUpdate(); } + @Deprecated(since = "6.9", forRemoval = true) public void delete( String name, String lang ) { // Database safeguard, as the default is an empty string if (lang == null) lang = ""; From c667b18efa01e44b11abb406b108805d859a547f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Sep 2025 14:22:58 +0200 Subject: [PATCH 64/69] doc(settings): clarify database changes require app reload for ORM --- doc/sphinx-guides/source/installation/config.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f33c04eada8..c3f97904ead 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3873,7 +3873,8 @@ You might also create your own profiles and use these, please refer to the upstr Database Settings ----------------- -These settings are stored in the ``setting`` database table but we recommend using the Admin API (:ref:`admin-api-db-settings`) to view and modify them, as shown below. +These settings are stored in the ``setting`` database table but we recommend using the Settings Admin API (:ref:`admin-api-db-settings`) to view and modify them, as shown below. +If changed in the database directly, you need to reload the application to make the ORM pickup the changes. In short: From c4d2f4ad44307991eb813d0ad04aa6504c65cad8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 30 Oct 2025 14:24:35 +0100 Subject: [PATCH 65/69] chore(settings): add deprecation notices for String-based methods in `SettingsServiceBean` #11654 Requested by @qqmyers via review comment. Expanded deprecation tags across String-based methods, encouraging migration to key-based alternatives for future removal in v6.9. --- .../settings/SettingsServiceBean.java | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index e5fce1a2349..2811b4e10c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -816,6 +816,9 @@ public static SettingsServiceBean.Key parse(String key) { * Basic functionality - get the name, return the setting, or {@code null}. * @param name of the setting * @return the actual setting, or {@code null}. + * + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and the {@link #getValueForKey(Key)} variants instead. */ @Deprecated(since = "6.9", forRemoval = true) public String get( String name ) { @@ -961,6 +964,9 @@ public Boolean getValueForCompoundKeyAsBoolean(Key key, String param) { * @param name Name of the setting. * @param defaultValue The value to return if no setting is found in the DB. * @return Either the stored value, or the default value. + * + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and the {@link #getValueForKey(Key)} variants instead. */ @Deprecated(since = "6.9", forRemoval = true) public String get( String name, String defaultValue ) { @@ -968,6 +974,10 @@ public String get( String name, String defaultValue ) { return (val!=null) ? val : defaultValue; } + /** + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and the {@link #getValueForKey(Key)} variants instead. + */ @Deprecated(since = "6.9", forRemoval = true) public String get(String name, String lang, String defaultValue ) { // Database safeguard, as the default is an empty string @@ -995,6 +1005,10 @@ public String getValueForKey( Key key, String lang, String defaultValue ) { return get( key.toString(), lang, defaultValue ); } + /** + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and the {@link #setValueForKey(Key, String)} variants instead. + */ @Deprecated(since = "6.9", forRemoval = true) public Setting set( String name, String content ) { Setting s = null; @@ -1018,7 +1032,11 @@ public Setting set( String name, String content ) { .setInfo(name + ": " + content)); return s; } - + + /** + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and the {@link #setValueForKey(Key, String)} variants instead. + */ @Deprecated(since = "6.9", forRemoval = true) public Setting set( String name, String lang, String content ) { // Database safeguard, as the default is an empty string @@ -1057,6 +1075,9 @@ public Setting setValueForKey( Key key, String content ) { * @param name name of the setting. * @param defaultValue logical value of {@code null}. * @return boolean value of the setting. + * + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and {@link #isTrueForKey(Key, boolean)} instead. */ @Deprecated(since = "6.9", forRemoval = true) public boolean isTrue( String name, boolean defaultValue ) { @@ -1085,6 +1106,10 @@ public void deleteValueForKey( Key name ) { delete( name.toString() ); } + /** + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and {@link #deleteValueForKey(Key)} instead. + */ @Deprecated(since = "6.9", forRemoval = true) public void delete( String name ) { actionLogSvc.log( new ActionLogRecord(ActionLogRecord.ActionType.Setting, "delete") @@ -1093,7 +1118,11 @@ public void delete( String name ) { .setParameter("name", name) .executeUpdate(); } - + + /** + * @deprecated This will be removed in a future version of Dataverse. Please refrain from using it and migrate + * any code doing so to use a {@link Key} and {@link #deleteValueForKey(Key)} instead. + */ @Deprecated(since = "6.9", forRemoval = true) public void delete( String name, String lang ) { // Database safeguard, as the default is an empty string From 3ce58754d523745ab4ae49c19de6072c865593ae Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 30 Oct 2025 14:34:59 +0100 Subject: [PATCH 66/69] style(settings): make `Op` enum public to resolve IDE warnings #11654 Addressed visibility concerns by changing the `Op` enum to public, aligning it with a public method that leverages it. Added `@implNote` for clarification. Requested by @pdurbin in review comment --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 2811b4e10c4..adfd6bc99d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1279,8 +1279,10 @@ static Set convertJsonToSettings(JsonObject settings) { /** * Enum representing the types of operations that are performed on a bulk operation with settings. + * @implNote Although this is only meant for internal use, we use it in a public method (which needs to stay public). + * To avoid IDE warning about exposure, let's make it public, too. */ - static enum Op { + public enum Op { UPDATED, CREATED, DELETED, @@ -1328,6 +1330,8 @@ static JsonObjectBuilder convertToJson(Map operationalDetails) { * @return a map tracking the operations performed on each setting. The map's keys * are the settings involved, and the values are the types of operations * performed (CREATED, UPDATED, DELETED). + * + * @implNote Must be a public method to ensure proper transaction management. */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public Map replaceAllSettings(Set newSettings) { From 5eea9d5fb945dd35dfde2fc29c611c09a62d8c75 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 31 Oct 2025 09:16:50 +0100 Subject: [PATCH 67/69] feat(util): add `getJsonValue` method for robust JSON parsing #11654 Introduced `getJsonValue` to parse serialized JSON strings into `JsonValue`, supporting `JsonObject`, `JsonArray`, and primitives. Includes comprehensive unit tests for valid and invalid JSON scenarios. Note: a primitive (number, string, ...) is not a valid JSON document on its own. The exception thrown is intended to be used to handle this situation. As this is related to the Settings Service, this may be used to return the raw value instead of some JSON object/array. --- .../iq/dataverse/util/json/JsonUtil.java | 32 ++++++++++++++++ .../iq/dataverse/util/json/JsonUtilTest.java | 37 ++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index 72a1cd2e1eb..737d67d8245 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -8,12 +8,14 @@ import java.io.StringWriter; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.logging.Logger; import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonException; import jakarta.json.JsonObject; import jakarta.json.JsonReader; +import jakarta.json.JsonValue; import jakarta.json.JsonWriter; import jakarta.json.JsonWriterFactory; import jakarta.json.stream.JsonGenerator; @@ -131,4 +133,34 @@ public static JsonArray getJsonArray(String serializedJson) { } } } + + + /** + * Parses a serialized JSON string and returns it as a JsonValue. + * The returned JsonValue can be a JsonObject, JsonArray, or another type + * based on the structure of the provided serialized JSON string. + * This method closes its resources but does not catch any exceptions. + * + * @param serializedJson The JSON content serialized as a String + * @return The parsed content as a JsonValue which could be a JsonObject, JsonArray, or another JsonValue type + * @throws JsonException If an error occurs during parsing (null, invalid JSON, not trimmed, etc.) + */ + public static JsonValue getJsonValue(String serializedJson) { + if (serializedJson == null) { + throw new JsonException("The serialized JSON string cannot be null."); + } + + try (StringReader rdr = new StringReader(serializedJson)) { + try (JsonReader jsonReader = Json.createReader(rdr)) { + JsonValue jsonValue = jsonReader.read(); + if (jsonValue.getValueType() == JsonValue.ValueType.OBJECT) { + return jsonValue.asJsonObject(); + } else if (jsonValue.getValueType() == JsonValue.ValueType.ARRAY) { + return jsonValue.asJsonArray(); + } else { + return jsonValue; + } + } + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java index 3e4f9a690d2..b703597a91c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java @@ -1,7 +1,15 @@ package edu.harvard.iq.dataverse.util.json; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import jakarta.json.JsonException; +import jakarta.json.JsonValue; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.NullAndEmptySource; +import org.junit.jupiter.params.provider.ValueSource; class JsonUtilTest { @@ -15,5 +23,32 @@ void testPrettyPrint() { assertEquals("[\n \"junk\"\n]", JsonUtil.prettyPrint("[\"junk\"]")); assertEquals("{\n" + " \"foo\": \"bar\"\n" + "}", JsonUtil.prettyPrint("{\"foo\": \"bar\"}")); } - + + @Nested + class JsonValues { + @Test + void testGetJsonValueWithJsonObject() { + String jsonObject = "{\"key\": \"value\"}"; + JsonValue result = JsonUtil.getJsonValue(jsonObject); + assertEquals(JsonValue.ValueType.OBJECT, result.getValueType()); + assertEquals("value", result.asJsonObject().getString("key")); + } + + @Test + void testGetJsonValueWithJsonArray() { + String jsonArray = "[\"element1\", \"element2\"]"; + JsonValue result = JsonUtil.getJsonValue(jsonArray); + assertEquals(JsonValue.ValueType.ARRAY, result.getValueType()); + assertEquals("element1", result.asJsonArray().getString(0)); + assertEquals("element2", result.asJsonArray().getString(1)); + } + + @ParameterizedTest + @NullAndEmptySource + @ValueSource(strings = {" ", " \"\"", "\"primitive\"", "{invalid}", "[invalid]", "[1234, invalid]"}) + void testGetJsonValueWithInvalidJson(String sut) { + assertThrows(JsonException.class, () -> JsonUtil.getJsonValue(sut)); + } + } + } From 9497acd19dfbe2cb8f4e6c058a4724a6b310f849 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 31 Oct 2025 09:19:02 +0100 Subject: [PATCH 68/69] fix(settings): improve JSON handling in `SettingsServiceBean` and add tests for arrays #11654 Enhanced `listAllAsJson` to robustly handle JSON parsing with proper fallback for invalid JSON, which means it is some primitive setting. Added new unit tests to ensure accurate processing of JSON arrays and objects in settings. --- .../settings/SettingsServiceBean.java | 12 +++++---- .../settings/SettingsServiceBeanTest.java | 25 ++++++++++++++++++- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index adfd6bc99d6..1cdac02a013 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -13,6 +13,7 @@ import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonException; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonString; @@ -1188,13 +1189,14 @@ public JsonObject listAllAsJson() { settings.forEach(setting -> { String name = convertToJsonKey(setting); - // In case the setting is a JSON object, treat it a such in the output (so the API can return valid JSON) - if (setting.getContent().trim().startsWith("{")) - response.add(name, Json.createObjectBuilder(JsonUtil.getJsonObject(setting.getContent()))); - else + try { + // In case the setting is JSON, treat it as such in the output (so the API can return valid JSON) + response.add(name, JsonUtil.getJsonValue(setting.getContent())); + } catch (JsonException e) { + // This wasn't valid JSON, so we just add it as a string response.add(name, setting.getContent()); } - ); + }); return response.build(); } diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java index eb4d67d1835..c4881257374 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/SettingsServiceBeanTest.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.settings; import jakarta.json.Json; +import jakarta.json.JsonArray; import jakarta.json.JsonObject; import jakarta.persistence.EntityManager; import jakarta.persistence.TypedQuery; @@ -197,7 +198,7 @@ void testListAllAsJson_nonLocalizedSettings() { } @Test - void testListAllAsJson_jsonSetting() { + void testListAllAsJson_jsonObjectSetting() { // Given JsonObject expected = Json.createObjectBuilder() .add("default", "2147483648") @@ -218,6 +219,28 @@ void testListAllAsJson_jsonSetting() { assertEquals(expected.toString(), result.getJsonObject(SettingsServiceBean.Key.MaxFileUploadSizeInBytes.toString()).toString()); } + @Test + void testListAllAsJson_jsonArraySetting() { + // Given + JsonArray expected = Json.createArrayBuilder() + .add(2147483648L) + .add("4000000000") + .add("8000000000") + .build(); + + List resultList = List.of( + new Setting(SettingsServiceBean.Key.MaxFileUploadSizeInBytes.toString(), "[2147483648, \"4000000000\", \"8000000000\"]") + ); + when(typedQuery.getResultList()).thenReturn(resultList); + + // When + JsonObject result = settingsServiceBean.listAllAsJson(); + + // Then + assertEquals(1, result.size()); + assertEquals(expected.toString(), result.getJsonArray(SettingsServiceBean.Key.MaxFileUploadSizeInBytes.toString()).toString()); + } + @Test void testListAllAsJson_localizedSettings() { // Given From 886a9cfe4bfb8c220a5b0ccb8615e3942f68e920 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 14 Nov 2025 22:51:48 +0100 Subject: [PATCH 69/69] feat(db): add `SettingsCleanupCallback` to remove invalid settings after migrations #11654 Introduced a Flyway callback to clean up entries in the `setting` table with unknown keys post-migration. Updated `StartupFlywayMigrator` to register this callback. --- .../flyway/SettingsCleanupCallback.java | 103 ++++++++++++++++++ .../flyway/StartupFlywayMigrator.java | 8 ++ 2 files changed, 111 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java diff --git a/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java b/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java new file mode 100644 index 00000000000..4b02f07a810 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java @@ -0,0 +1,103 @@ +package edu.harvard.iq.dataverse.flyway; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import org.flywaydb.core.api.FlywayException; +import org.flywaydb.core.api.callback.Callback; +import org.flywaydb.core.api.callback.Context; +import org.flywaydb.core.api.callback.Event; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Flyway callback that runs after all migrations and removes any settings + * whose "name" column does not correspond to a SettingsServiceBean.Key. + * + * This enforces that the settings table contains only keys known to the + * current application version. + */ +public class SettingsCleanupCallback implements Callback { + + private static final Logger logger = Logger.getLogger(SettingsCleanupCallback.class.getName()); + + @Override + public boolean supports(Event event, Context context) { + // Only run after all migrations have completed successfully. + return event == Event.AFTER_MIGRATE; + } + + @Override + public boolean canHandleInTransaction(Event event, Context context) { + // Prefer to run inside the same transaction + return true; + } + + @Override + public void handle(Event event, Context context) { + if (event != Event.AFTER_MIGRATE) { + return; + } + + logger.info("Starting settings cleanup: removing entries with unknown keys"); + + try { + cleanupInvalidSettings(context.getConnection()); + } catch (SQLException e) { + logger.log(Level.SEVERE, "Error while cleaning up settings table", e); + throw new FlywayException("Failed to clean up invalid settings", e); + } + + logger.info("Finished cleaning up settings"); + } + + @Override + public String getCallbackName() { + return "SettingsCleanup"; + } + + private void cleanupInvalidSettings(Connection connection) throws SQLException { + // Collect IDs of rows to delete + List idsToDelete = new ArrayList<>(); + + String selectSql = "SELECT id, name FROM setting"; + try (PreparedStatement ps = connection.prepareStatement(selectSql); + ResultSet rs = ps.executeQuery()) { + + while (rs.next()) { + long id = rs.getLong("id"); + String name = rs.getString("name"); + + // We expect names like ":KeyName". Anything that does not parse + // to a SettingsServiceBean.Key is considered invalid and will be removed. + SettingsServiceBean.Key key = SettingsServiceBean.Key.parse(name); + if (key == null) { + idsToDelete.add(id); + } + } + } + + if (idsToDelete.isEmpty()) { + logger.fine("Settings cleanup: no invalid settings found"); + return; + } + + logger.info(() -> "Settings cleanup: found " + idsToDelete.size() + + " invalid settings; deleting them"); + + String deleteSql = "DELETE FROM setting WHERE id = ?"; + try (PreparedStatement delete = connection.prepareStatement(deleteSql)) { + for (Long id : idsToDelete) { + delete.setLong(1, id); + delete.addBatch(); + } + int[] counts = delete.executeBatch(); + logger.info(() -> "Settings cleanup: deleted " + counts.length + " rows with invalid keys"); + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/flyway/StartupFlywayMigrator.java b/src/main/java/edu/harvard/iq/dataverse/flyway/StartupFlywayMigrator.java index 39bc46216ca..06c6048c65a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/flyway/StartupFlywayMigrator.java +++ b/src/main/java/edu/harvard/iq/dataverse/flyway/StartupFlywayMigrator.java @@ -27,6 +27,14 @@ void migrateDatabase() { Flyway flyway = Flyway.configure() .dataSource(dataSource) + .locations( + // Path where to find normal SQL migrations + "classpath:db/migration", + // Path where to find compiled Java migrations + "classpath:edu/harvard/iq/dataverse/flyway" + ) + // Java-based callbacks are not auto-discovered (unlike migrations) + .callbacks(new SettingsCleanupCallback()) .baselineOnMigrate(true) .load();