diff --git a/doc/release-notes/11639-db-opts-idempotency.md b/doc/release-notes/11639-db-opts-idempotency.md index f73cbdebf83..b01e3d955cb 100644 --- a/doc/release-notes/11639-db-opts-idempotency.md +++ b/doc/release-notes/11639-db-opts-idempotency.md @@ -43,3 +43,13 @@ The following database settings are were added to the official list within the c - `:LDNAnnounceRequiredFields` - `:LDNTarget` - `:WorkflowsAdminIpWhitelist` - formerly `WorkflowsAdmin#IP_WHITELIST_KEY` +- `:PrePublishDatasetWorkflowId` - formerly `WorkflowServiceBean.WorkflowId:PrePublishDataset` +- `:PostPublishDatasetWorkflowId` - formerly `WorkflowServiceBean.WorkflowId:PostPublishDataset` + +### Important Considerations During Upgrade Of Your Installation + +1. Running a customized fork? Make sure to add any custom settings to the SettingsServiceBean.Key enum before deploying! +2. Any database settings not contained in the `SettingServiceBean.Key` will be removed from your database during each deployment cycle. +3. As always when upgrading, make sure to backup your database beforehand! + You can also use the existing API endpoint `/api/admin/settings` to retrieve all settings as JSONish data for a quick backup before upgrading. + diff --git a/doc/sphinx-guides/source/developers/workflows.rst b/doc/sphinx-guides/source/developers/workflows.rst index bc894a7dfeb..6f562cd9dd1 100644 --- a/doc/sphinx-guides/source/developers/workflows.rst +++ b/doc/sphinx-guides/source/developers/workflows.rst @@ -27,6 +27,8 @@ If a step in a workflow fails, the Dataverse installation makes an effort to rol provider offers two steps for sending and receiving customizable HTTP requests. *http/sr* and *http/authExt*, detailed below, with the latter able to use the API to make changes to the dataset being processed. (Both lock the dataset to prevent other processes from changing the dataset between the time the step is launched to when the external process responds to the Dataverse instance.) +.. _workflow_admin: + Administration ~~~~~~~~~~~~~~ @@ -36,6 +38,8 @@ At the moment, defining a workflow for each trigger is done for the entire insta In order to prevent unauthorized resuming of workflows, the Dataverse installation maintains a "white list" of IP addresses from which resume requests are honored. This list is maintained using the ``/api/admin/workflows/ip-whitelist`` endpoint of the :doc:`/api/native-api`. By default, the Dataverse installation honors resume requests from localhost only (``127.0.0.1;::1``), so set-ups that use a single server work with no additional configuration. +Note: these settings are also exposed and manageable via the Settings API. +See :ref:`:WorkflowsAdminIpWhitelist`, :ref:`:PrePublishDatasetWorkflowId` and :ref:`:PostPublishDatasetWorkflowId` Available Steps ~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 0ac3e74bfa9..db968826718 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2396,6 +2396,9 @@ The workflow id returned in this call (or available by doing a GET of /api/admin Once these steps are taken, new publication requests will automatically trigger submission of an archival copy to the specified archiver, Chronopolis' DuraCloud component in this example. For Chronopolis, as when using the API, it is currently the admin's responsibility to snap-shot the DuraCloud space and monitor the result. Failure of the workflow, (e.g. if DuraCloud is unavailable, the configuration is wrong, or the space for this dataset already exists due to a prior publication action or use of the API), will create a failure message but will not affect publication itself. +Note: setting the default workflow is also available via the Settings API. +See :ref:`:WorkflowsAdminIpWhitelist`, :ref:`:PrePublishDatasetWorkflowId` and :ref:`:PostPublishDatasetWorkflowId` + .. _bag-info.txt: Configuring bag-info.txt @@ -4515,7 +4518,11 @@ Using a JSON-based setting, you can set a global default and per-format limits f (In previous releases of Dataverse, a colon-separated form was used to specify per-format limits, such as ``:TabularIngestSizeLimit:Rdata``, but this is no longer supported. Now JSON is used.) -The expected JSON is an object with key/value pairs like the following. Format names are case-insensitive, and all fields are optional. The size limits must be strings with double quotes around them (e.g. ``"10"``) rather than numbers (e.g. ``10``). +The expected JSON is an object with key/value pairs like the following. +Format names are case-insensitive, and all fields are optional (an empty JSON object equals not restricted). +The size limits must be whole numbers, either presented as strings with double quotes around them (e.g. ``"10"``) or numeric values (e.g. ``10`` or ``10.0``). +Note that decimal numbers like ``10.5`` are invalid. +Any invalid setting will temporarily disable tabular ingest until corrected. .. code:: json @@ -5134,6 +5141,43 @@ Number of errors to display to the user when creating DataFiles from a file uplo ``curl -X PUT -d '1' http://localhost:8080/api/admin/settings/:CreateDataFilesMaxErrorsToDisplay`` +.. _:WorkflowsAdminIpWhitelist: + +:WorkflowsAdminIpWhitelist +++++++++++++++++++++++++++ + +A semicolon-separated list of IP addresses from which workflow resume requests are honored. +By default, the Dataverse installation honors resume requests from localhost only (``127.0.0.1;::1``). +This setting allows for preventing unauthorized resuming of workflows. + +``curl -X PUT -d '127.0.0.1;::1;192.168.0.1' http://localhost:8080/api/admin/settings/:WorkflowsAdminIpWhitelist`` + +See :ref:`Workflow Admin section ` for more details and context. + +.. _:PrePublishDatasetWorkflowId: + +:PrePublishDatasetWorkflowId +++++++++++++++++++++++++++++ + +The identifier of the workflow to be executed prior to dataset publication. +This pre-publish workflow is useful for preparing a dataset for public access (e.g., moving files, checking metadata) or starting an approval process. + +``curl -X PUT -d '1' http://localhost:8080/api/admin/settings/:PrePublishDatasetWorkflowId`` + +See :ref:`Workflow Admin section ` for more details and context. + +.. _:PostPublishDatasetWorkflowId: + +:PostPublishDatasetWorkflowId ++++++++++++++++++++++++++++++ + +The identifier of the workflow to be executed after a dataset has been successfully published. +This post-publish workflow is useful for actions such as sending notifications about the newly published dataset or archiving. + +``curl -X PUT -d '2' http://localhost:8080/api/admin/settings/:PostPublishDatasetWorkflowId`` + +See :ref:`Workflow Admin section ` for more details and context. + .. _:BagItHandlerEnabled: :BagItHandlerEnabled diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 398d40ee5be..9b909f0c147 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -152,6 +152,7 @@ 6.2025.10 42.7.7 9.8.0 + 16 2.33.0 26.30.0 @@ -168,7 +169,7 @@ 5.3.0 - 1.19.7 + 2.0.2 3.7.1 5.10.2 5.11.0 diff --git a/pom.xml b/pom.xml index 26542196cf2..b844017d8c8 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ false false - integration + integration,migration -Ddummy.jacoco.property=true @@ -748,36 +748,36 @@ + + org.dbunit + dbunit + 3.0.0 + test + org.testcontainers testcontainers test - - - junit - junit - - org.testcontainers - junit-jupiter + testcontainers-junit-jupiter test org.testcontainers - postgresql + testcontainers-postgresql test com.github.dasniko testcontainers-keycloak - 3.6.0 + 4.0.0 test org.testcontainers - localstack + testcontainers-localstack test @{failsafe.jacoco.args} ${argLine} ${skipIntegrationTests} + + ${postgresql.server.version} + diff --git a/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java b/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java index 4b02f07a810..c83472f4b17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java +++ b/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java @@ -11,7 +11,9 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; @@ -40,6 +42,7 @@ public boolean canHandleInTransaction(Event event, Context context) { @Override public void handle(Event event, Context context) { + // Failsafe - we only run _after_ all migrations are done. if (event != Event.AFTER_MIGRATE) { return; } @@ -61,10 +64,19 @@ public String getCallbackName() { return "SettingsCleanup"; } + /** + * Cleans up invalid settings from the database by identifying and removing + * rows in the `setting` table where the `name` attribute does not correspond + * to a valid SettingsServiceBean.Key. + * + * @param connection the database connection to use for querying and updating the `setting` table + * @throws SQLException if a database access error occurs or the query fails + */ private void cleanupInvalidSettings(Connection connection) throws SQLException { - // Collect IDs of rows to delete - List idsToDelete = new ArrayList<>(); + // Collect IDs of rows to delete, together with the setting's "name" attribute. + Map entriesToDelete = new HashMap<>(); + // IMPORTANT: as we cannot use JPQL mid-Flyway, this query needs to be carefully aligned with the Setting class! String selectSql = "SELECT id, name FROM setting"; try (PreparedStatement ps = connection.prepareStatement(selectSql); ResultSet rs = ps.executeQuery()) { @@ -77,24 +89,25 @@ private void cleanupInvalidSettings(Connection connection) throws SQLException { // to a SettingsServiceBean.Key is considered invalid and will be removed. SettingsServiceBean.Key key = SettingsServiceBean.Key.parse(name); if (key == null) { - idsToDelete.add(id); + entriesToDelete.put(id, name); } } } - if (idsToDelete.isEmpty()) { + if (entriesToDelete.isEmpty()) { logger.fine("Settings cleanup: no invalid settings found"); return; } - logger.info(() -> "Settings cleanup: found " + idsToDelete.size() - + " invalid settings; deleting them"); + logger.info(() -> "Settings cleanup: found " + entriesToDelete.size() + + " invalid/obsolete settings; deleting them."); String deleteSql = "DELETE FROM setting WHERE id = ?"; try (PreparedStatement delete = connection.prepareStatement(deleteSql)) { - for (Long id : idsToDelete) { - delete.setLong(1, id); + for (Map.Entry entry : entriesToDelete.entrySet()) { + delete.setLong(1, entry.getKey()); delete.addBatch(); + logger.info("Settings cleanup: deleting \"" + entry.getValue() + "\""); } int[] counts = delete.executeBatch(); logger.info(() -> "Settings cleanup: deleted " + counts.length + " rows with invalid keys"); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index ade961b3b92..37d26995017 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -177,6 +177,18 @@ public enum Key { */ WorkflowsAdminIpWhitelist, + /** + * Represents the workflow identifier for the "pre-publish dataset" operation. + * This identifier is used to manage and define the specific workflow + * triggered before a dataset is published within the application. + */ + PrePublishDatasetWorkflowId, + /** + * Represents the configuration key for specifying the workflow identifier that + * will be executed after a dataset has been published. + */ + PostPublishDatasetWorkflowId, + /** * A special secret that, if set, needs to be given when trying to manage internal users. * This key was formerly known as "BuiltinUsers.KEY", which never was a setting name aligning with the others. @@ -291,13 +303,14 @@ public enum Key { */ @Deprecated(since = "6.2", forRemoval = true) SystemEmail, - /* size limit for Tabular data file ingests */ - /* (can be set separately for specific ingestable formats; in which - case the actual stored option will be TabularIngestSizeLimit:{FORMAT_NAME} - where {FORMAT_NAME} is the format identification tag returned by the - getFormatName() method in the format-specific plugin; "sav" for the - SPSS/sav format, "RData" for R, etc. - for example: :TabularIngestSizeLimit:RData */ + + /** +

Size limit (in bytes) for tabular file ingest. Accepts either a single numeric value or JSON for per-format control.

+

Values: -1 (or absent) = no limit, 0 = disable ingest, >0 = byte threshold, or JSON object.

+

JSON object allows setting a "default" (same as single byte value) and override limits per-format for: CSV, DTA, POR, Rdata, SAV, XLSX. + Example: {"default": "536870912", "CSV": "0", "Rdata": "1000000"}

+

Format names are case-insensitive. Invalid settings disable ingest until corrected.

+ */ TabularIngestSizeLimit, /* Validate physical files in the dataset when publishing, if the dataset size less than the threshold limit */ DatasetChecksumValidationSizeLimit, diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index c1d61378f42..2883212be79 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -519,27 +519,41 @@ public Map getTabularIngestSizeLimits() { if (limitEntry != null) { // Case A: the setting is using JSON to support multiple formats if (limitEntry.trim().startsWith("{")) { - try { - JsonObject limits = Json.createReader(new StringReader(limitEntry)).readObject(); + try (JsonReader reader = Json.createReader(new StringReader(limitEntry))) { + JsonObject limits = reader.readObject(); Map limitsMap = new HashMap<>(); // We add the default in case the JSON does not contain the default (which is optional). limitsMap.put(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, -1L); - for (String formatName : limits.keySet()) { - // We deliberatly do not validate the formatNames here for backward compatibility. - // But we transform to lowercase here, so the casing doesn't matter for lookups. + for (Map.Entry format : limits.entrySet()) { + String formatName = format.getKey(); String lowercaseFormatName = formatName.toLowerCase(); try { - Long sizeOption = Long.valueOf(limits.getString(formatName)); + JsonValue value = format.getValue(); + long sizeOption; + + // We want to be able to use either numbers or string values, so detect which one it is. + // This is necessary as we need to tell the JSON parser what to do, it doesn't automatically handle this for us. + if (value.getValueType() == JsonValue.ValueType.STRING) { + sizeOption = Long.parseLong(limits.getString(formatName)); + } else if (value.getValueType() == JsonValue.ValueType.NUMBER) { + // Will throw if not a whole number! + sizeOption = limits.getJsonNumber(formatName).longValueExact(); + } else { + logger.warning(() -> "Invalid value type for format " + formatName + ": expected string or number"); + logger.warning("Disabling all tabular ingest completely until fixed!"); + return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); + } + limitsMap.put(lowercaseFormatName, sizeOption); - } catch (ClassCastException cce) { - logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long from JSON integer. You must provide the long number as string (use quotes) for format " + formatName); + } catch (NumberFormatException nfe) { + logger.warning(() -> "Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a valid number)"); logger.warning("Disabling all tabular ingest completely until fixed!"); return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); - } catch (NumberFormatException nfe) { - logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a number)"); + } catch (ArithmeticException ae) { + logger.warning(() -> "Number too large or has fractional part for format " + formatName); logger.warning("Disabling all tabular ingest completely until fixed!"); return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); } @@ -547,7 +561,7 @@ public Map getTabularIngestSizeLimits() { return Collections.unmodifiableMap(limitsMap); } catch (JsonParsingException e) { - logger.warning("Invalid TabularIngestSizeLimit option found, cannot parse JSON: " + e.getMessage()); + logger.warning(() -> "Invalid TabularIngestSizeLimit option found, cannot parse JSON: " + e.getMessage()); logger.warning("Disabling all tabular ingest completely until fixed!"); return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); } @@ -557,7 +571,7 @@ public Map getTabularIngestSizeLimits() { Long limit = Long.valueOf(limitEntry); return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, limit); } catch (NumberFormatException nfe) { - logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe); + logger.warning(() -> "Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe); logger.warning("Disabling all tabular ingest completely until fixed!"); return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L); } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java index a0aee4cb4c9..178028d4241 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.workflow.step.WorkflowStep; import java.util.Map; @@ -20,7 +21,17 @@ public class WorkflowContext { public enum TriggerType { - PrePublishDataset, PostPublishDataset + PrePublishDataset(Key.PrePublishDatasetWorkflowId), + PostPublishDataset(Key.PostPublishDatasetWorkflowId); + + final Key key; + TriggerType(Key key) { + this.key = key; + } + + Key getKey() { + return key; + } } private final DataverseRequest request; diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index 757d447b60a..ae1175f0e1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -53,7 +53,6 @@ public class WorkflowServiceBean { private static final Logger logger = Logger.getLogger(WorkflowServiceBean.class.getName()); - private static final String WORKFLOW_ID_KEY = "WorkflowServiceBean.WorkflowId:"; @PersistenceContext(unitName = "VDCNet-ejbPU") EntityManager em; @@ -452,7 +451,7 @@ public boolean deleteWorkflow(long workflowId) { if (doomedOpt.isPresent()) { // validate that this is not the default workflow for ( WorkflowContext.TriggerType tp : WorkflowContext.TriggerType.values() ) { - String defaultWorkflowId = settings.get(workflowSettingKey(tp)); + String defaultWorkflowId = settings.getValueForKey(tp.getKey()); if (defaultWorkflowId != null && Long.parseLong(defaultWorkflowId) == doomedOpt.get().getId()) { throw new IllegalArgumentException("Workflow " + workflowId + " cannot be deleted as it is the default workflow for trigger " + tp.name() ); @@ -476,7 +475,7 @@ public PendingWorkflowInvocation getPendingWorkflow(String invocationId) { } public Optional getDefaultWorkflow( WorkflowContext.TriggerType type ) { - String defaultWorkflowId = settings.get(workflowSettingKey(type)); + String defaultWorkflowId = settings.getValueForKey(type.getKey()); if (defaultWorkflowId == null) { return Optional.empty(); } @@ -491,18 +490,13 @@ public Optional getDefaultWorkflow( WorkflowContext.TriggerType type ) * @param type type of the workflow. */ public void setDefaultWorkflowId(WorkflowContext.TriggerType type, Long id) { - String workflowKey = workflowSettingKey(type); if (id == null) { - settings.delete(workflowKey); + settings.deleteValueForKey(type.getKey()); } else { - settings.set(workflowKey, id.toString()); + settings.setValueForKey(type.getKey(), id.toString()); } } - private String workflowSettingKey(WorkflowContext.TriggerType type) { - return WORKFLOW_ID_KEY+type.name(); - } - private WorkflowStep createStep(WorkflowStepData wsd) { WorkflowStepSPI provider = providers.get(wsd.getProviderId()); if (provider == null) { diff --git a/src/main/resources/db/migration/V6.8.0.1.sql b/src/main/resources/db/migration/V6.8.0.1.sql index 8e810270b06..1c3fbf8ffd7 100644 --- a/src/main/resources/db/migration/V6.8.0.1.sql +++ b/src/main/resources/db/migration/V6.8.0.1.sql @@ -67,7 +67,7 @@ DO $$ -- Insert or update the new JSON-based setting INSERT INTO setting (name, content, lang) VALUES (':TabularIngestSizeLimit', json_object::TEXT, NULL) - ON CONFLICT (name) WHERE lang IS NULL + ON CONFLICT (name, (coalesce(lang, ''))) DO UPDATE SET content = EXCLUDED.content; -- Delete all format-specific settings @@ -79,19 +79,11 @@ DO $$ END $$; -- 2. Migrate BuiltinUsers.KEY to the new setting name -DO $$ - BEGIN - IF EXISTS (SELECT 1 FROM setting WHERE name = 'BuiltinUsers.KEY') THEN - INSERT INTO setting (name, lang, content) VALUES (':BuiltinUsersKey', NULL, (SELECT content FROM setting WHERE name = 'BuiltinUsers.KEY')); - DELETE FROM setting WHERE name = 'BuiltinUsers.KEY'; - END IF; - END $$; +UPDATE setting SET name = ':BuiltinUsersKey' WHERE name = 'BuiltinUsers.KEY'; -- 3. Migrate WorkflowsAdmin#IP_WHITELIST_KEY to the new setting name -DO $$ - BEGIN - IF EXISTS (SELECT 1 FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY') THEN - INSERT INTO setting (name, lang, content) VALUES (':WorkflowsAdminIpWhitelist', NULL, (SELECT content FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY')); - DELETE FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY'; - END IF; - END $$; +UPDATE setting SET name = ':WorkflowsAdminIpWhitelist' WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY'; + +-- 4. Migrate WorkflowServiceBean.WorkflowId specials to new PrePublishDatasetWorkflowId and PostPublishDatasetWorkflowId +UPDATE setting SET name = ':PrePublishDatasetWorkflowId' WHERE name = 'WorkflowServiceBean.WorkflowId:PrePublishDataset'; +UPDATE setting SET name = ':PostPublishDatasetWorkflowId' WHERE name = 'WorkflowServiceBean.WorkflowId:PostPublishDataset'; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index a911e010ed0..262f3252f9d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -9,17 +9,22 @@ import io.restassured.RestAssured; import io.restassured.response.Response; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.logging.Logger; import edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism; import jakarta.json.JsonObject; +import jakarta.ws.rs.core.Response.Status; import org.assertj.core.util.Lists; +import org.hamcrest.Matcher; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; import io.restassured.path.json.JsonPath; import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import static io.restassured.path.json.JsonPath.with; import io.restassured.path.xml.XmlPath; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -45,6 +50,10 @@ import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.hamcrest.CoreMatchers.*; import static org.junit.jupiter.api.Assertions.*; @@ -59,15 +68,11 @@ public static void setUpClass() { Response removePublicInstall = UtilIT.deleteSetting(SettingsServiceBean.Key.PublicInstall); removePublicInstall.then().assertThat().statusCode(200); - - Response removeLimit = UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit); - removeLimit.then().assertThat().statusCode(OK.getStatusCode()); } @AfterAll public static void tearDownClass() { UtilIT.deleteSetting(SettingsServiceBean.Key.PublicInstall); - UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit); } /** @@ -1211,183 +1216,107 @@ public void test_AddFileBadUploadFormat() { } } - - @Test - public void testIngestSizeLimits() throws InterruptedException, IOException { - Response createUser = UtilIT.createRandomUser(); - createUser.then().assertThat().statusCode(OK.getStatusCode()); - String username = UtilIT.getUsernameFromResponse(createUser); - String apiToken = UtilIT.getApiTokenFromResponse(createUser); - Response makeSuperUser = UtilIT.setSuperuserStatus(username, true); - makeSuperUser.then().assertThat().statusCode(OK.getStatusCode()); - - Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); - createDataverseResponse.prettyPrint(); - String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); - createDatasetResponse.prettyPrint(); - Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); - - String tinyCsvOnly = """ -{ - "csv": "50" -} -"""; - - Response setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, tinyCsvOnly); - setLimit.then().assertThat().statusCode(OK.getStatusCode()); - - Path pathToDataFile = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv"); - String contentOfCsv = "" - + "name,pounds,species,treats\n" - + "Midnight,15,dog,milkbones\n" - + "Tiger,17,cat,cat grass\n" - + "Panther,21,cat,cat nip\n"; - java.nio.file.Files.write(pathToDataFile, contentOfCsv.getBytes()); - - Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); - uploadFile.prettyPrint(); - uploadFile.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("data.csv")); - - String fileId1 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - - Response getTabularFails = UtilIT.getFileDataTables(fileId1, apiToken); - getTabularFails.prettyPrint(); - getTabularFails.then().assertThat() - .statusCode(BAD_REQUEST.getStatusCode()) - .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); - - String largerCsv = """ -{ - "csv": "123456" -} -"""; - - setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, largerCsv); - setLimit.then().assertThat().statusCode(OK.getStatusCode()); - - uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); - uploadFile.prettyPrint(); - uploadFile.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("data-1.csv")); - - assertTrue(UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToDataFile); - - String fileId2 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - - Response getTabularWorks = UtilIT.getFileDataTables(fileId2, apiToken); - getTabularWorks.prettyPrint(); - getTabularWorks.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data[0].varQuantity", equalTo(4)); - - String tinyDefaultSize = """ -{ - "default": "50" -} -"""; - - setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, tinyDefaultSize); - setLimit.then().assertThat().statusCode(OK.getStatusCode()); - - uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); - uploadFile.prettyPrint(); - uploadFile.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("data-2.csv")); - - String fileId3 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - - getTabularFails = UtilIT.getFileDataTables(fileId3, apiToken); - getTabularFails.prettyPrint(); - getTabularFails.then().assertThat() - .statusCode(BAD_REQUEST.getStatusCode()) - .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); - - // The behavior of `"default": "-2"` is not documented in the guides - // but it acts like `"default": "0"` which disables ingest. - String unexpectedNegativeDefault = """ -{ - "default": "-2" -} -"""; - - setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, unexpectedNegativeDefault); - setLimit.then().assertThat().statusCode(OK.getStatusCode()); - - uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); - uploadFile.prettyPrint(); - uploadFile.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("data-3.csv")); - - String fileId4 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - - getTabularFails = UtilIT.getFileDataTables(fileId4, apiToken); - getTabularFails.prettyPrint(); - getTabularFails.then().assertThat() - .statusCode(BAD_REQUEST.getStatusCode()) - .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); - - // As the guides say, you MUST provide a string, not a JSON number. - // That is, `"123"` in quotes rather than `123` with no quotes. - // If you provide a number (no quotes) rather than a string, - // all ingest will be disabled and you'll see an error in server.log - // about how the system is misconfigured. - String invalidNonString = """ -{ - "default": 987654321 -} -"""; - - setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, invalidNonString); - setLimit.then().assertThat().statusCode(OK.getStatusCode()); - - uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); - uploadFile.prettyPrint(); - uploadFile.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("data-4.csv")); - - String fileId5 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - - getTabularFails = UtilIT.getFileDataTables(fileId5, apiToken); - getTabularFails.prettyPrint(); - getTabularFails.then().assertThat() - .statusCode(BAD_REQUEST.getStatusCode()) - .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))); - - String defaultDisabledAndLargeCsvLimit = """ -{ - "default": "0", - "csv": "123456" -} -"""; - - setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, defaultDisabledAndLargeCsvLimit); - setLimit.then().assertThat().statusCode(OK.getStatusCode()); - - uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken); - uploadFile.prettyPrint(); - uploadFile.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("data-5.csv")); - - String fileId6 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); - - getTabularWorks = UtilIT.getFileDataTables(fileId2, apiToken); - getTabularWorks.prettyPrint(); - getTabularWorks.then().assertThat() + + @Nested + class IngestSizeLimits { + + static String apiToken; + static int datasetId; + @TempDir + static Path tempDir; + static String csvFileName = "data.csv"; + static Path csvFile; + static final String csvData = + """ + name,pounds,species,treats + Midnight,15,dog,milkbones + Tiger,17,cat,cat grass + Panther,21,cat,cat nip + """; + + @BeforeAll + static void setup() throws IOException { + // Create random user, collection and dataset to work with + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + apiToken = UtilIT.getApiTokenFromResponse(createUser); + Response makeSuperUser = UtilIT.setSuperuserStatus(username, true); + makeSuperUser.then().assertThat().statusCode(OK.getStatusCode()); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + // Create CSV datafile to work with + csvFile = tempDir.resolve(csvFileName); + java.nio.file.Files.writeString(csvFile, csvData, StandardCharsets.UTF_8); + } + + @AfterAll + static void teardown() { + // Remove the setting for test isolation purposes + Response removeLimit = UtilIT.deleteSetting(Key.TabularIngestSizeLimit); + removeLimit.then().assertThat().statusCode(OK.getStatusCode()); + } + + static List configurations() { + return List.of( + // Too small for 134 chars + Arguments.of("{\"csv\": 50}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))), + Arguments.of("{\"default\": 50.0}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))), + + // The behavior of `"default": "-2"` is not documented in the guides + // but it acts like `"default": "0"` which disables ingest. + Arguments.of("{\"default\": -2}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))), + + // Large enough :-) + Arguments.of("-1", OK, "data[0].varQuantity", equalTo(4)), + Arguments.of("123456", OK, "data[0].varQuantity", equalTo(4)), + Arguments.of("{\"default\": 123456}", OK, "data[0].varQuantity", equalTo(4)), + Arguments.of("{\"csv\": 123456}", OK, "data[0].varQuantity", equalTo(4)), + Arguments.of("{\"csv\": \"123457\"}", OK, "data[0].varQuantity", equalTo(4)), + Arguments.of("{\"csv\": 123458.0}", OK, "data[0].varQuantity", equalTo(4)), + // Default is disabled, but exception for CSV + Arguments.of("{\"default\": 0,\"csv\": 123456}", OK, "data[0].varQuantity", equalTo(4)) + ); + } + + @ParameterizedTest + @MethodSource("configurations") + void testIngestSizeLimits(String ingestSizeLimitConfig, Status expectedStatus, String jsonPath, Matcher matcher) { + // given + Response setLimit = UtilIT.setSetting(Key.TabularIngestSizeLimit, ingestSizeLimitConfig); + setLimit.then().assertThat().statusCode(OK.getStatusCode()); + + // when + Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), csvFile.toString(), apiToken); + //uploadResponse.prettyPrint(); + uploadResponse.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data[0].varQuantity", equalTo(4)); - - Response removeLimit = UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit); - removeLimit.then().assertThat().statusCode(OK.getStatusCode()); + .body("data.files[0].label", equalTo(csvFileName)); + String fileId = JsonPath.from(uploadResponse.body().asString()).getString("data.files[0].dataFile.id"); + + // Wait for ingest to complete + assertTrue(UtilIT.sleepForLock(datasetId, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), + "Failed test if Ingest Lock exceeds max duration"); + + // then + Response getTabularFails = UtilIT.getFileDataTables(fileId, apiToken); + //getTabularFails.prettyPrint(); + getTabularFails.then().assertThat() + .statusCode(expectedStatus.getStatusCode()) + .body(jsonPath, matcher); + + // delete the file for the next test + UtilIT.deleteFile(Integer.valueOf(fileId), apiToken); + } } + @Test public void testUningestFileViaApi() throws InterruptedException { Response createUser = UtilIT.createRandomUser(); diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java index 0ca74ab2f2b..91160b0ad77 100644 --- a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java @@ -72,11 +72,12 @@ class OIDCAuthenticationProviderFactoryIT { .withAdminPassword(adminPassword); // simple method to retrieve the issuer URL, referenced to by @JvmSetting annotations (do no delete) + @SuppressWarnings("unused") private static String getAuthUrl() { return keycloakContainer.getAuthServerUrl() + "/realms/" + realm; } - OIDCAuthProvider getProvider() throws Exception { + private OIDCAuthProvider getProvider() throws Exception { OIDCAuthProvider oidcAuthProvider = (OIDCAuthProvider) OIDCAuthenticationProviderFactory.buildFromSettings(); assumeTrue(oidcAuthProvider.getMetadata().getTokenEndpointURI().toString() @@ -86,8 +87,9 @@ OIDCAuthProvider getProvider() throws Exception { } // NOTE: This requires the "direct access grants" for the client to be enabled! - String getBearerTokenViaKeycloakAdminClient() throws Exception { - try (Keycloak keycloak = KeycloakBuilder.builder() + private String getBearerTokenViaKeycloakAdminClient() { + // NOTE: While Keycloak implements AutoClosable, don't use a try-with-resources with it to avoid logging out. + Keycloak keycloak = KeycloakBuilder.builder() .serverUrl(keycloakContainer.getAuthServerUrl()) .grantType(OAuth2Constants.PASSWORD) .realm(realm) @@ -96,9 +98,8 @@ String getBearerTokenViaKeycloakAdminClient() throws Exception { .username(realmAdminUser) .password(realmAdminPassword) .scope("openid") - .build()) { - return keycloak.tokenManager().getAccessTokenString(); - } + .build(); + return keycloak.tokenManager().getAccessTokenString(); } /** diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java new file mode 100644 index 00000000000..17beaabaad0 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java @@ -0,0 +1,207 @@ +package edu.harvard.iq.dataverse.db.migration; + +import org.dbunit.Assertion; +import org.dbunit.IDatabaseTester; +import org.dbunit.assertion.DiffCollectingFailureHandler; +import org.dbunit.assertion.Difference; +import org.dbunit.dataset.DefaultDataSet; +import org.dbunit.dataset.IDataSet; +import org.dbunit.dataset.ITable; +import org.dbunit.dataset.ReplacementDataSet; +import org.dbunit.dataset.SortedTable; +import org.dbunit.dataset.filter.DefaultColumnFilter; +import org.dbunit.dataset.xml.FlatXmlDataSet; +import org.dbunit.dataset.xml.FlatXmlDataSetBuilder; +import org.dbunit.operation.DatabaseOperation; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.sql.Connection; +import java.sql.Statement; + +class DBUnitHelper { + /** + * Loads dataset from the provided XML string into the database associated with the given + * {@link IDatabaseTester} instance. It performs the following steps: + * - Parses the XML string into an {@link IDataSet}. + * - Replaces occurrences of "[null]" with actual null values in the dataset. + * - Inserts the data into the database, performing a CLEAN_INSERT operation. + * - Updates database sequences to avoid primary key collisions. + * + * @param tester the {@link IDatabaseTester} instance used for database operations. + * @param inputXml the XML string representing the dataset to be loaded into the database. + * @throws Exception if an error occurs during dataset parsing, database setup, or sequence update. + */ + public static void loadData(IDatabaseTester tester, String inputXml) throws Exception { + try (InputStream xml = new ByteArrayInputStream(inputXml.getBytes(StandardCharsets.UTF_8))) { + IDataSet inputDataSet = new FlatXmlDataSetBuilder().build(xml); + + // Handle NULL values by replacing [null] string with actual NULL in the input dataset + ReplacementDataSet dataSet = new ReplacementDataSet(inputDataSet); + dataSet.addReplacementObject("[null]", null); + + tester.setDataSet(dataSet); + tester.setSetUpOperation(DatabaseOperation.CLEAN_INSERT); + tester.onSetup(); + } + + // Update the sequence to avoid primary key collisions with the data just inserted by DBUnit + try (Connection conn = tester.getConnection().getConnection(); Statement stmt = conn.createStatement()) { + stmt.execute("SELECT setval('setting_id_seq', (SELECT MAX(id) FROM setting))"); + } + } + + /** + * Retrieves and processes the actual data from a specified table in the database. + * The method performs the following steps: + * - Fetches the entire dataset from the database connection. + * - Retrieves the specific table from the dataset. + * - Filters out specified columns from the table. + * - Sorts the table based on the provided column. + * + * @param tester the {@link IDatabaseTester} instance used to access the database. + * @param table the name of the table to retrieve from the dataset. + * @param orderBy the column name by which the table should be sorted. + * @param ignoreCols optional columns to exclude from the resulting table. + * @return a processed and sorted {@link ITable} object representing the actual data from the database table. + * @throws Exception if an error occurs during database access, table filtering, or sorting. + */ + public static ITable getActualData(IDatabaseTester tester, String table, String orderBy, String... ignoreCols) throws Exception { + IDataSet actualDataSet = tester.getConnection().createDataSet(); + ITable actualTable = actualDataSet.getTable(table); + ITable filteredActual = DefaultColumnFilter.excludedColumnsTable(actualTable, ignoreCols); + SortedTable sortedActual = new SortedTable(filteredActual, new String[]{orderBy}); + sortedActual.setUseComparable(true); + return sortedActual; + } + + /** + * Processes the given XML data to produce an expected dataset table for comparison purposes. + * The method performs the following steps: + * - Parses the XML data into an {@link IDataSet}. + * - Replaces "[null]" strings with actual null values in the dataset. + * - Filters out specified columns from the table. + * - Sorts the resulting table based on the provided column(s). + * + * @param expectedXml the XML string representing the expected dataset. + * @param table the name of the table to retrieve from the dataset. + * @param orderBy the column name by which the table should be sorted. + * @param ignoreCols optional columns to exclude from the resulting table. + * @return a processed and sorted {@link ITable} object representing the expected state of the dataset. + * @throws Exception if an error occurs during XML parsing, dataset processing, or table filtering. + */ + public static ITable getExpectedData(String expectedXml, String table, String orderBy, String... ignoreCols) throws Exception { + try (InputStream xml = new ByteArrayInputStream(expectedXml.getBytes(StandardCharsets.UTF_8))) { + IDataSet rawDataSet = new FlatXmlDataSetBuilder().build(xml); + + // Handle NULL values by replacing [null] string with actual NULL in the expected dataset + // This is necessary to deal with the lang column, which may contain NULL values. + ReplacementDataSet expectedDataSet = new ReplacementDataSet(rawDataSet); + expectedDataSet.addReplacementObject("[null]", null); + ITable expectedTable = expectedDataSet.getTable(table); + + // Filter out the id column (potentially contains auto-generated values, not comparable) + ITable filteredExpected = DefaultColumnFilter.excludedColumnsTable(expectedTable, ignoreCols); + + // Sort the resulting table by name (using comparable to avoid byte-wise sorting!) + SortedTable sortedExpected = new SortedTable(filteredExpected, new String[]{orderBy}); + sortedExpected.setUseComparable(true); + + return sortedExpected; + } + } + + + /** + * Executes a database migration script located at the specified resource path. + * This method loads the SQL script from the given path, establishes a connection + * to the database via the {@link SharedPostgresContainer}, and executes the script. + * + * @param migrationResourcePath the path to the migration script resource to be executed + * @throws Exception if an error occurs while loading the resource, establishing the database connection, + * or executing the SQL script + */ + public static void runMigrationScript(String migrationResourcePath) throws Exception { + String migrationSql = loadResource(migrationResourcePath); + try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) { + stmt.execute(migrationSql); + } + } + + /** + * Loads the content of a resource file located at the given path into a string. + * The resource is read with UTF-8 encoding, and if the resource is not found, an IOException + * is thrown. Proper handling of the InputStream is ensured using try-with-resources. + * + * @param path the path to the resource file to be loaded + * (Note: a path starting with "/" is absolute (root of the classpath). Without it is relative to {@link DBUnitHelper}) + * @return the content of the resource as a string + * @throws IOException if the resource cannot be found or an error occurs during reading + */ + public static String loadResource(String path) throws IOException { + try (InputStream is = DBUnitHelper.class.getResourceAsStream(path)) { + if (is == null) { + throw new IOException("Resource not found: " + path); + } + return new String(is.readAllBytes(), StandardCharsets.UTF_8); + } + } + + /** + * Asserts that two database tables are equal. The method compares the + * expected and actual tables row by row and column by column. If differences + * are found, an {@link AssertionError} is thrown with detailed information + * about the discrepancies, including the expected and actual values and + * their corresponding XML representations. + * + * @param expected the expected {@link ITable} object, representing the + * expected state of the database table. + * @param actual the actual {@link ITable} object, representing the actual + * state of the database table to compare against the expected table. + * @throws Exception if an error occurs during the comparison or while + * generating the XML representation of the tables. + */ + public static void assertTablesEqual(ITable expected, ITable actual) throws Exception { + DiffCollectingFailureHandler failureHandler = new DiffCollectingFailureHandler(); + Assertion.assertEquals(expected, actual, failureHandler); + + if (!failureHandler.getDiffList().isEmpty()) { + StringBuilder sb = new StringBuilder(); + sb.append("=== DIFFERENCES FOUND ===\n"); + + for (Object diff : failureHandler.getDiffList()) { + Difference d = (Difference) diff; + sb.append(String.format("Row %d, Column '%s': expected <%s> but was <%s>%n", + d.getRowIndex(), + d.getColumnName(), + d.getExpectedValue(), + d.getActualValue())); + } + + sb.append("\n=== EXPECTED DATA ===\n"); + sb.append(tableToXmlString(expected)); + + sb.append("\n=== ACTUAL DATA ===\n"); + sb.append(tableToXmlString(actual)); + + throw new AssertionError(sb.toString()); + } + } + + /** + * Converts the data in the given {@link ITable} to an XML string representation. + * This method serializes the specified table into a flat XML format using UTF-8 encoding. + * + * @param table the {@link ITable} to be converted to an XML string + * @return a string containing the XML representation of the specified table + * @throws Exception if an error occurs during the serialization process + */ + public static String tableToXmlString(ITable table) throws Exception { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + FlatXmlDataSet.write(new DefaultDataSet(table), baos); + return baos.toString(StandardCharsets.UTF_8); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java new file mode 100644 index 00000000000..5970e36069a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java @@ -0,0 +1,89 @@ +package edu.harvard.iq.dataverse.db.migration; + +import org.dbunit.IDatabaseTester; +import org.dbunit.JdbcDatabaseTester; +import org.dbunit.database.DatabaseConfig; +import org.dbunit.database.IDatabaseConnection; +import org.dbunit.ext.postgresql.PostgresqlDataTypeFactory; +import org.testcontainers.postgresql.PostgreSQLContainer; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +/** + * Singleton container that is started once and reused across all tests. + */ +public class SharedPostgresContainer { + + private static volatile PostgreSQLContainer instance; + + /** + * Provides a singleton instance of {@link PostgreSQLContainer} configured with + * a specific PostgreSQL version, database name, username, and password. The + * container is created and started on first invocation and reused across + * subsequent calls. + * + * @return the singleton {@link PostgreSQLContainer} instance for use in tests + */ + public static PostgreSQLContainer getInstance() { + if (instance == null) { + synchronized (SharedPostgresContainer.class) { + if (instance == null) { + String version = System.getProperty("postgresql.server.version"); + System.out.println("Creating singleton Postgres container with version: " + version); + + instance = new PostgreSQLContainer("postgres:" + version) + .withDatabaseName("testdb") + .withUsername("test") + .withPassword("test"); + + instance.start(); + + // Ensure container is stopped when JVM shuts down + Runtime.getRuntime().addShutdownHook(new Thread(instance::stop)); + } + } + } + return instance; + } + + /** + * Retrieves a database connection using the credentials and JDBC URL + * provided by the singleton PostgreSQL container instance. + * + * @return a {@link Connection} object to interact with the PostgreSQL database + * @throws SQLException if a database access error occurs + */ + public static Connection getConnection() throws SQLException { + return DriverManager.getConnection( + getInstance().getJdbcUrl(), + getInstance().getUsername(), + getInstance().getPassword() + ); + } + + /** + * Creates and returns an initialized instance of IDatabaseTester for testing + * database interactions. The tester is configured to work with a PostgreSQL + * database by using a PostgresqlDataTypeFactory. + * + * @return an IDatabaseTester instance configured for the current PostgreSQL container + * @throws ClassNotFoundException if there is an issue creating the database tester or connection + */ + public static IDatabaseTester getTester() throws ClassNotFoundException { + return new JdbcDatabaseTester( + getInstance().getDriverClassName(), + getInstance().getJdbcUrl(), + getInstance().getUsername(), + getInstance().getPassword() + ) { + @Override + public IDatabaseConnection getConnection() throws Exception { + IDatabaseConnection connection = super.getConnection(); + connection.getConfig().setProperty(DatabaseConfig.PROPERTY_DATATYPE_FACTORY, new PostgresqlDataTypeFactory()); + return connection; + } + }; + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java new file mode 100644 index 00000000000..309c4779c0e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java @@ -0,0 +1,184 @@ +package edu.harvard.iq.dataverse.db.migration; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; +import edu.harvard.iq.dataverse.util.testing.Tags; +import org.dbunit.IDatabaseTester; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.sql.Connection; +import java.sql.Statement; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +@Tag(Tags.DB_MIGRATION_TEST) +@Tag(Tags.USES_TESTCONTAINERS) +@Testcontainers(disabledWithoutDocker = true) +class V6_8_0_1__SettingsDataMigrationIT { + + static final String MIGRATION_RESOURCE = "/db/migration/V6.8.0.1.sql"; + static final String TABLE_NAME = "setting"; + + private IDatabaseTester databaseTester; + + @BeforeAll + static void initSchema() throws Exception { + // Create the table structure (the DDL that already exists in your database) + String schemaDDL = """ + create table %s + ( + id serial primary key, + content text, + lang text constraint non_empty_lang check (lang <> ''::text), + name text + ); + + create unique index unique_settings on setting (name, (COALESCE(lang, ''::text))); + """.formatted(TABLE_NAME); + + try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) { + stmt.execute(schemaDDL); + } + } + + @BeforeEach + void setUp() throws Exception { + databaseTester = SharedPostgresContainer.getTester(); + } + + @AfterEach + void tearDown() throws Exception { + if (databaseTester != null) { + databaseTester.onTearDown(); + } + // Clean up for the next test + try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) { + stmt.execute("TRUNCATE TABLE " + TABLE_NAME + " RESTART IDENTITY CASCADE"); + } + } + + @Test + @DisplayName("Test migrating BuiltinUsers.KEY and Workflow settings") + void testMigrationConvertSimpleSettings() throws Exception { + // GIVEN + var inputXml = """ + + + + + + + + """; + DBUnitHelper.loadData(databaseTester, inputXml); + + // WHEN + DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE); + var actualData = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id"); + + // THEN + // Notes: We don't specify 'id' in expected because they are auto-generated. + // We use single quotes for XML attributes containing JSON with double quotes. + var expectedXml = """ + + + + + + + + """.formatted(Key.BuiltinUsersKey, Key.WorkflowsAdminIpWhitelist, Key.PrePublishDatasetWorkflowId, Key.PostPublishDatasetWorkflowId); + var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id"); + + DBUnitHelper.assertTablesEqual(expectedData, actualData); + } + + @Test + @DisplayName("Test migrating TabularIngestSizeLimit to JSON") + void testMigrationConvertTabularIngestSizeLimitToJsonFormat() throws Exception { + // GIVEN + var inputXml = """ + + + + + + + """; + DBUnitHelper.loadData(databaseTester, inputXml); + + // WHEN + DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE); + var actualData = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id"); + + // THEN + // Notes: We don't specify 'id' in expected because they are auto-generated. + // We use single quotes for XML attributes containing JSON with double quotes. + var expectedXml = """ + + + + + """; + var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id"); + + DBUnitHelper.assertTablesEqual(expectedData, actualData); + } + + @Test + @DisplayName("Test migrating TabularIngestSizeLimit handles NULL lang values") + void testMigrationTabularIngestSizeLimitToJsonHandlesNullLangValues() throws Exception { + // GIVEN + var inputXml = """ + + + + + """; + DBUnitHelper.loadData(databaseTester, inputXml); + + // WHEN + DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE); + var actualTable = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id"); + + // THEN + // Just verify we have at least one row (the migrated JSON setting) + assertTrue(actualTable.getRowCount() > 0); + } + + @Test + @DisplayName("Test migrating TabularIngestSizeLimit handles non-numeric values") + void testMigrationTabularIngestSizeLimitToJsonHandlesNonNumericValuesGracefully() throws Exception { + // GIVEN + var inputXml = """ + + + + + """; + DBUnitHelper.loadData(databaseTester, inputXml); + + // WHEN + DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE); + var actualData = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id"); + + // THEN + // Invalid values should be set to 0 (at least that's what the migration *should* do) + // Notes: We don't specify 'id' in expected because they are auto-generated. + // We use single quotes for XML attributes containing JSON with double quotes. + var expectedXml = """ + + + + + """; + var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id"); + + DBUnitHelper.assertTablesEqual(expectedData, actualData); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 06026962d2c..123aacc601d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -8,6 +8,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -202,10 +203,10 @@ void testGetTabularIngestSizeLimitsWithSingleInvalidValue() { assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); } - @Test - void testGetTabularIngestSizeLimitsWithJsonButUnsupportedJsonInt() { + @ParameterizedTest + @ValueSource(strings = {"", "{ invalid: }"}) + void testGetTabularIngestSizeLimitsWithInvalidJson(String invalidJson) { // given - String invalidJson = "{\"default\": 0}"; doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); // when @@ -216,10 +217,10 @@ void testGetTabularIngestSizeLimitsWithJsonButUnsupportedJsonInt() { assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); } - @Test - void testGetTabularIngestSizeLimitsWithInvalidJson() { + @ParameterizedTest + @ValueSource(strings = {"[]", "{[{\"tsv\": 1}]}", "{ \"invalid\": \"foobar\" }", "{ \"tsv\": 1.5}", "{ \"tsv\": \"1.0\"}"}) + void testGetTabularIngestSizeLimitsWithInvalidNumbersInValidJson(String invalidJson) { // given - String invalidJson = "{invalid:}"; doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); // when @@ -230,17 +231,33 @@ void testGetTabularIngestSizeLimitsWithInvalidJson() { assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); } - @Test - void testGetTabularIngestSizeLimitsWithInvalidNumberInValidJson() { + @ParameterizedTest + @ValueSource(strings = {"{ \"tSv\": 1.0 }", "{ \"tsv\": \"1\"}", "{ \"tsv\": 1 }"}) + void testGetTabularIngestSizeLimitsWithValidNumbersInValidJson(String validConfig) { // given - String invalidJson = "{\"csv\": \"this-is-not-a-number\", \"tSv\": \"10000\"}"; - doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + doReturn(validConfig).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); // when Map result = systemConfig.getTabularIngestSizeLimits(); // then - assertEquals(1, result.size()); - assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + assertEquals(2, result.size()); + assertEquals(-1L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + assertEquals(1L, (long) result.get("tsv")); + } + + @ParameterizedTest + @ValueSource(strings = {"{ \"tSv\": 429496729600.0 }", "{ \"tsv\": \"429496729600\"}", "{ \"tsv\": 429496729600 }"}) + void testGetTabularIngestSizeLimitsWithValidLongNumbers(String validConfig) { + // given + doReturn(validConfig).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit); + + // when + Map result = systemConfig.getTabularIngestSizeLimits(); + + // then + assertEquals(2, result.size()); + assertEquals(-1L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY)); + assertEquals(429496729600L, (long) result.get("tsv")); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java index 1544d393896..22e13f08665 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java @@ -4,4 +4,5 @@ public class Tags { public static final String NOT_ESSENTIAL_UNITTESTS = "not-essential-unittests"; public static final String INTEGRATION_TEST = "integration"; public static final String USES_TESTCONTAINERS = "testcontainers"; + public static final String DB_MIGRATION_TEST = "migration"; }