From 8e1da69a5e1d0ceccc6e7be440af9d074896ccbe Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 25 Nov 2025 00:46:52 +0100
Subject: [PATCH 01/20] chore(deps): update Testcontainers testing dependencies
 (breaking change upstream)

Upgraded Testcontainers to `2.0.2` and updated related artifact names for consistency (`testcontainers-junit-jupiter`, `testcontainers-postgresql`, etc.). Bumped `testcontainers-keycloak` to `4.0.0` which is compatible with TC2.
---
 modules/dataverse-parent/pom.xml |  2 +-
 pom.xml                          | 14 ++++----------
 2 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index d61e2eff795..1792a3c169c 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -168,7 +168,7 @@
         <gdcc.xoai.version>5.3.0</gdcc.xoai.version>
     
         <!-- Testing dependencies -->
-        <testcontainers.version>1.19.7</testcontainers.version>
+        <testcontainers.version>2.0.2</testcontainers.version>
         <smallrye-mpconfig.version>3.7.1</smallrye-mpconfig.version>
         <junit.jupiter.version>5.10.2</junit.jupiter.version>
         <mockito.version>5.11.0</mockito.version>
diff --git a/pom.xml b/pom.xml
index 9a199c714e7..7eb6279dfed 100644
--- a/pom.xml
+++ b/pom.xml
@@ -752,32 +752,26 @@
             <groupId>org.testcontainers</groupId>
             <artifactId>testcontainers</artifactId>
             <scope>test</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>junit</groupId>
-                    <artifactId>junit</artifactId>
-                </exclusion>
-            </exclusions>
         </dependency>
         <dependency>
             <groupId>org.testcontainers</groupId>
-            <artifactId>junit-jupiter</artifactId>
+            <artifactId>testcontainers-junit-jupiter</artifactId>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>org.testcontainers</groupId>
-            <artifactId>postgresql</artifactId>
+            <artifactId>testcontainers-postgresql</artifactId>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>com.github.dasniko</groupId>
             <artifactId>testcontainers-keycloak</artifactId>
-            <version>3.6.0</version>
+            <version>4.0.0</version>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>org.testcontainers</groupId>
-            <artifactId>localstack</artifactId>
+            <artifactId>testcontainers-localstack</artifactId>
             <scope>test</scope>
         </dependency>
         <!--

From 4d47ef03504aa7d00952bc59bc49d5f690277fd3 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 25 Nov 2025 00:49:10 +0100
Subject: [PATCH 02/20] test,chore(db): add PostgreSQL server version property
 to Maven configuration

Before, the Postgres Server version was only present in the container profile. Now moving it into the parent POM and aligning with our documented settings allows reuse in tests, too.

Using the caps of Maven Failsafe to expose the server version in JUnit Tests will allow to use it with Testcontainers, deploying a Postgres Container for migration tests as documented in our guides.
---
 modules/dataverse-parent/pom.xml | 1 +
 pom.xml                          | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 1792a3c169c..24a5578d2a8 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -152,6 +152,7 @@
         <payara.version>6.2025.3</payara.version>
         <postgresql.version>42.7.7</postgresql.version>
         <solr.version>9.8.0</solr.version>
+        <postgresql.server.version>16</postgresql.server.version>
         <aws.version>2.33.0</aws.version>
         <google.library.version>26.30.0</google.library.version>
     
diff --git a/pom.xml b/pom.xml
index 7eb6279dfed..1dce73c8db4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1060,6 +1060,9 @@
                     -->
                     <argLine>@{failsafe.jacoco.args} ${argLine}</argLine>
                     <skip>${skipIntegrationTests}</skip>
+                    <systemPropertyVariables>
+                        <postgresql.server.version>${postgresql.server.version}</postgresql.server.version>
+                    </systemPropertyVariables>
                 </configuration>
                 <executions>
                     <execution>

From 11984e87446e45a2e9d20cc20ee4a7b01edaba32 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 25 Nov 2025 00:51:27 +0100
Subject: [PATCH 03/20] test(migrations): introducing migration tests to the
 codebase

Expanded integration test tags in `pom.xml` to include `migration` and added `DBunit` as a test-scoped dependency to support database migration testing. Updated `Tags.java` to reflect the new tag, to be used in tests that are migration tests (so they can be easily excluded or run as necessary).
---
 pom.xml                                                   | 8 +++++++-
 .../java/edu/harvard/iq/dataverse/util/testing/Tags.java  | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 1dce73c8db4..a4b640ef768 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
     <properties>
         <skipUnitTests>false</skipUnitTests>
         <skipIntegrationTests>false</skipIntegrationTests>
-        <it.groups>integration</it.groups>
+        <it.groups>integration,migration</it.groups>
         <!-- Provide a fallback value that won't break things if JaCoCo prepare-agent steps don't set it. -->
         <!-- Note: you must use @{} style late variable binding in argLine, otherwise JaCoCo cannot inject the right settings! -->
         <surefire.jacoco.args>-Ddummy.jacoco.property=true</surefire.jacoco.args>
@@ -748,6 +748,12 @@
                 </exclusion>
             </exclusions>
         </dependency>
+        <dependency>
+            <groupId>org.dbunit</groupId>
+            <artifactId>dbunit</artifactId>
+            <version>3.0.0</version>
+            <scope>test</scope>
+        </dependency>
         <dependency>
             <groupId>org.testcontainers</groupId>
             <artifactId>testcontainers</artifactId>
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java
index 1544d393896..22e13f08665 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/testing/Tags.java
@@ -4,4 +4,5 @@ public class Tags {
     public static final String NOT_ESSENTIAL_UNITTESTS = "not-essential-unittests";
     public static final String INTEGRATION_TEST = "integration";
     public static final String USES_TESTCONTAINERS = "testcontainers";
+    public static final String DB_MIGRATION_TEST = "migration";
 }

From 83f3d37ad868a394f4d7bdebbf6c59560f2a2610 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 25 Nov 2025 00:51:57 +0100
Subject: [PATCH 04/20] test(migration): introduce `SharedPostgresContainer`
 singleton for migration tests

Added a reusable PostgresContainer to optimize resource usage in database migration tests. Ensures a single container instance is shared across all migration test cases, with automatic cleanup on JVM shutdown.
---
 .../db/migration/SharedPostgresContainer.java | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java

diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java
new file mode 100644
index 00000000000..47a995621f7
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java
@@ -0,0 +1,44 @@
+package edu.harvard.iq.dataverse.db.migration;
+
+import org.testcontainers.postgresql.PostgreSQLContainer;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+
+/**
+ * Singleton container that is started once and reused across all tests.
+ */
+public class SharedPostgresContainer {
+    
+    private static volatile PostgreSQLContainer instance;
+    
+    public static PostgreSQLContainer getInstance() {
+        if (instance == null) {
+            synchronized (SharedPostgresContainer.class) {
+                if (instance == null) {
+                    String version = System.getProperty("postgresql.server.version");
+                    System.out.println("Creating singleton Postgres container with version: " + version);
+                    
+                    instance = new PostgreSQLContainer("postgres:" + version)
+                        .withDatabaseName("testdb")
+                        .withUsername("test")
+                        .withPassword("test");
+                    
+                    instance.start();
+                    
+                    // Ensure container is stopped when JVM shuts down
+                    Runtime.getRuntime().addShutdownHook(new Thread(instance::stop));
+                }
+            }
+        }
+        return instance;
+    }
+    
+    public static Connection getConnection() throws Exception {
+        return DriverManager.getConnection(
+            getInstance().getJdbcUrl(),
+            getInstance().getUsername(),
+            getInstance().getPassword()
+        );
+    }
+}

From 664b2943b61575223fb6f06a55d7ccdda9062e5d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 25 Nov 2025 00:53:08 +0100
Subject: [PATCH 05/20] test(migration): add integration tests for
 `V6_8_0_1__SettingsDataMigration`

Introduced comprehensive tests for `V6_8_0_1__SettingsDataMigration` using Testcontainers and DBUnit. Includes scenarios for migrating settings to new formats, handling null and invalid values, and verifying JSON transformations.

This is a reproducer for an issue discovered after merging PR #11654 by @landreev. See also https://github.com/IQSS/dataverse/pull/11654#issuecomment-3570855865
---
 .../V6_8_0_1__SettingsDataMigrationIT.java    | 275 ++++++++++++++++++
 1 file changed, 275 insertions(+)
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java

diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
new file mode 100644
index 00000000000..f432aa7ed26
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
@@ -0,0 +1,275 @@
+package edu.harvard.iq.dataverse.db.migration;
+
+
+import edu.harvard.iq.dataverse.util.testing.Tags;
+import org.dbunit.Assertion;
+import org.dbunit.IDatabaseTester;
+import org.dbunit.JdbcDatabaseTester;
+import org.dbunit.dataset.IDataSet;
+import org.dbunit.dataset.ITable;
+import org.dbunit.dataset.SortedTable;
+import org.dbunit.dataset.filter.DefaultColumnFilter;
+import org.dbunit.dataset.xml.FlatXmlDataSetBuilder;
+import org.dbunit.operation.DatabaseOperation;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.postgresql.PostgreSQLContainer;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.sql.Connection;
+import java.sql.Statement;
+
+import static org.dbunit.Assertion.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+@Tag(Tags.DB_MIGRATION_TEST)
+@Tag(Tags.USES_TESTCONTAINERS)
+@Testcontainers(disabledWithoutDocker = true)
+public class V6_8_0_1__SettingsDataMigrationIT {
+    
+    static final String MIGRATION_RESOURCE = "/db/migration/V6.8.0.1.sql";
+    static final String TABLE_NAME = "setting";
+    
+    @Container
+    static PostgreSQLContainer POSTGRES = SharedPostgresContainer.getInstance();
+    
+    private IDatabaseTester databaseTester;
+    
+    @BeforeAll
+    static void initSchema() throws Exception {
+        // Create the table structure (the DDL that already exists in your database)
+        String schemaDDL = """
+            create table %s
+            (
+                id      serial primary key,
+                content text,
+                lang    text constraint non_empty_lang check (lang <> ''::text),
+                name    text
+            );
+            
+            create unique index unique_settings on setting (name, (COALESCE(lang, ''::text)));
+            """.formatted(TABLE_NAME);
+        
+        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
+            stmt.execute(schemaDDL);
+        }
+    }
+    
+    @BeforeEach
+    void setUp() throws Exception {
+        // Create a new database tester instance, get the connection details from Testcontainers
+        databaseTester = new JdbcDatabaseTester(
+            POSTGRES.getDriverClassName(),
+            POSTGRES.getJdbcUrl(),
+            POSTGRES.getUsername(),
+            POSTGRES.getPassword()
+        );
+    }
+    
+    @AfterEach
+    void tearDown() throws Exception {
+        if (databaseTester != null) {
+            databaseTester.onTearDown();
+        }
+        // Clean up for the next test
+        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
+            stmt.execute("TRUNCATE TABLE setting RESTART IDENTITY CASCADE");
+        }
+    }
+    
+    @Test
+    @DisplayName("Test migrating BuiltinUsers.KEY and WorkflowsAdmin")
+    void testMigrationConvertSimpleSettings() throws Exception {
+        // GIVEN
+        var inputXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting id="1" name="BuiltinUsers.KEY" content="secret-key-123" />
+                <setting id="2" name="WorkflowsAdmin#IP_WHITELIST_KEY" content="127.0.0.1" />
+            </dataset>
+            """;
+        loadData(inputXml);
+        
+        // WHEN
+        runMigrationScript();
+        var actualData = getActualData();
+        
+        // THEN
+        // Notes: We don't specify 'id' in expected because they are auto-generated.
+        //        We use single quotes for XML attributes containing JSON with double quotes.
+        var expectedXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting name=":BuiltinUsersKey" content="secret-key-123" />
+                <setting name=":WorkflowsAdminIpWhitelist" content="127.0.0.1" />
+            </dataset>
+            """;
+        var expectedData = getExpectedData(expectedXml);
+        
+        Assertion.assertEquals(expectedData, actualData);
+    }
+    
+    @Test
+    @DisplayName("Test migrating TabularIngestSizeLimit to JSON")
+    void testMigrationConvertTabularIngestSizeLimitToJsonFormat() throws Exception {
+        // GIVEN
+        var inputXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting id="3" name=":TabularIngestSizeLimit" content="2000" />
+                <setting id="4" name=":TabularIngestSizeLimit:csv" content="5000" />
+                <setting id="5" name=":TabularIngestSizeLimit:tab" content="1000" />
+            </dataset>
+            """;
+        loadData(inputXml);
+        
+        // WHEN
+        runMigrationScript();
+        var actualData = getActualData();
+        
+        // THEN
+        // Notes: We don't specify 'id' in expected because they are auto-generated.
+        //        We use single quotes for XML attributes containing JSON with double quotes.
+        var expectedXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting name=":TabularIngestSizeLimit" content='{"csv": 5000, "tab": 1000, "default": 2000}' />
+            </dataset>
+            """;
+        var expectedData = getExpectedData(expectedXml);
+        
+        Assertion.assertEquals(expectedData, actualData);
+    }
+    
+    @Test
+    @DisplayName("Test migrating TabularIngestSizeLimit handles NULL lang values")
+    void testMigrationTabularIngestSizeLimitToJsonHandlesNullLangValues() throws Exception {
+        // GIVEN
+        var inputXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting id="1" name=":TabularIngestSizeLimit:csv" content="3000" lang="[null]" />
+            </dataset>
+            """;
+        loadData(inputXml);
+        
+        // WHEN
+        runMigrationScript();
+        
+        // THEN
+        // Verify the migration completed...
+        IDataSet actualDataSet = databaseTester.getConnection().createDataSet();
+        ITable actualTable = actualDataSet.getTable("setting");
+        
+        // Just verify we have at least one row (the migrated JSON setting)
+        assertTrue(actualTable.getRowCount() > 0);
+    }
+    
+    @Test
+    @DisplayName("Test migrating TabularIngestSizeLimit handles non-numeric values")
+    void testMigrationTabularIngestSizeLimitToJsonHandlesNonNumericValuesGracefully() throws Exception {
+        // GIVEN
+        var inputXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting id="1" name=":TabularIngestSizeLimit:csv" content="not-a-number" />
+            </dataset>
+            """;
+        loadData(inputXml);
+        
+        // WHEN
+        runMigrationScript();
+        var actualData = getActualData();
+        
+        // THEN
+        // Invalid values should be set to 0 (at least that's what the migration *should* do)
+        // Notes: We don't specify 'id' in expected because they are auto-generated.
+        //        We use single quotes for XML attributes containing JSON with double quotes.
+        var expectedXml = """
+            <?xml version='1.0' encoding='UTF-8'?>
+            <dataset>
+                <setting name=":TabularIngestSizeLimit" content='{"csv": 0}' />
+            </dataset>
+            """;
+        var expectedData = getExpectedData(expectedXml);
+        
+        assertEquals(expectedData, actualData);
+    }
+    
+    // --- Helper Methods ---
+    private void loadData(String inputXml) throws Exception {
+        try (InputStream xml = new ByteArrayInputStream(inputXml.getBytes(StandardCharsets.UTF_8))) {
+            IDataSet inputDataSet = new FlatXmlDataSetBuilder().build(xml);
+            databaseTester.setDataSet(inputDataSet);
+            databaseTester.setSetUpOperation(DatabaseOperation.CLEAN_INSERT);
+            databaseTester.onSetup();
+        }
+        
+        // Update the sequence to avoid primary key collisions with the data just inserted by DBUnit
+        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
+            stmt.execute("SELECT setval('setting_id_seq', (SELECT MAX(id) FROM setting))");
+        }
+    }
+    
+    /**
+     * Retrieves and processes the actual data from the database for verification purposes.
+     * The method performs the following steps:
+     * - Obtains the current dataset from the database connection.
+     * - Filters out the id and lang column from the data.
+     * - Sorts the resulting table based on predefined sorting criteria.
+     *
+     * @return a processed and sorted {@link ITable} object representing the actual state of the database.
+     * @throws Exception if an error occurs during dataset retrieval, column filtering, or table sorting.
+     */
+    private ITable getActualData() throws Exception {
+        IDataSet actualDataSet = databaseTester.getConnection().createDataSet();
+        ITable actualTable = actualDataSet.getTable(TABLE_NAME);
+        ITable filteredActual = DefaultColumnFilter.excludedColumnsTable(actualTable, new String[]{"id", "lang"});
+        SortedTable sortedActual = new SortedTable(filteredActual, new String[]{"name"});
+        return sortedActual;
+    }
+    
+    /**
+     * Constructs and returns an expected data table for testing database contents, based on the provided XML input.
+     * The method parses the given XML string into a dataset, filters out the id and lang column,
+     * and sorts the table based on predefined sorting criteria.
+     *
+     * @param expectedXml the XML string representing the expected dataset to be used for verification
+     * @return a processed and sorted {@link ITable} object representing the expected database state
+     * @throws Exception if an error occurs during XML parsing, dataset creation, or table manipulation
+     */
+    private ITable getExpectedData(String expectedXml) throws Exception {
+        try (InputStream xml = new ByteArrayInputStream(expectedXml.getBytes(StandardCharsets.UTF_8))) {
+            IDataSet expectedDataSet = new FlatXmlDataSetBuilder().build(xml);
+            ITable expectedTable = expectedDataSet.getTable(TABLE_NAME);
+            ITable filteredExpected = DefaultColumnFilter.excludedColumnsTable(expectedTable, new String[]{"id", "lang"});
+            SortedTable sortedExpected = new SortedTable(filteredExpected, new String[]{"name"});
+            return sortedExpected;
+        }
+    }
+    
+    private void runMigrationScript() throws Exception {
+        String migrationSql = loadResource(MIGRATION_RESOURCE);
+        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
+            stmt.execute(migrationSql);
+        }
+    }
+    
+    private String loadResource(String path) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(path)) {
+            if (is == null) {
+                throw new RuntimeException("Resource not found: " + path);
+            }
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+}
\ No newline at end of file

From 9c258bd3e634520e708e9bee59d0a1642fcb1f17 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 25 Nov 2025 01:28:01 +0100
Subject: [PATCH 06/20] fix(db): align ON CONFLICT clause with existing
 functional index

The migration script `V6.8.0.1.sql` failed with a `PSQLException` stating "there is no unique or exclusion constraint matching the ON CONFLICT specification".

This occurred because the script used a partial index syntax for the conflict target:
`ON CONFLICT (name) WHERE lang IS NULL`

However, PostgreSQL's `ON CONFLICT` inference requires the target to syntactically match an existing unique index or constraint. The actual index on the `setting` table is a functional index defined as:
`CREATE UNIQUE INDEX unique_settings ON setting (name, (COALESCE(lang, '')))`

While `WHERE lang IS NULL` and `COALESCE(lang, '')` logically handle nulls similarly for uniqueness in this context, Postgres does not treat them as interchangeable for arbiter inference.

This commit updates the migration script to explicitly target the functional index expression:
`ON CONFLICT (name, (coalesce(lang, '')))`

References:
- PostgreSQL 16 Documentation on INSERT: https://www.postgresql.org/docs/16/sql-insert.html#SQL-ON-CONFLICT
---
 src/main/resources/db/migration/V6.8.0.1.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/resources/db/migration/V6.8.0.1.sql b/src/main/resources/db/migration/V6.8.0.1.sql
index 8e810270b06..63df0a44717 100644
--- a/src/main/resources/db/migration/V6.8.0.1.sql
+++ b/src/main/resources/db/migration/V6.8.0.1.sql
@@ -67,7 +67,7 @@ DO $$
         -- Insert or update the new JSON-based setting
         INSERT INTO setting (name, content, lang)
         VALUES (':TabularIngestSizeLimit', json_object::TEXT, NULL)
-        ON CONFLICT (name) WHERE lang IS NULL
+        ON CONFLICT (name, (coalesce(lang, '')))
             DO UPDATE SET content = EXCLUDED.content;
 
         -- Delete all format-specific settings

From b18716c7d550380fa8f6502ae434b2f04e9d378e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 26 Nov 2025 09:11:44 +0100
Subject: [PATCH 07/20] test,fix(auth): avoid flapping test results due to
 logout when Keycloak client is autoclosed

Removing the try-with-resources avoids autoclosing the resource, which triggers a logout. As we reuse the token in multiple tests, we don't want that.
---
 .../oidc/OIDCAuthenticationProviderFactoryIT.java   | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java
index 0ca74ab2f2b..91160b0ad77 100644
--- a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactoryIT.java
@@ -72,11 +72,12 @@ class OIDCAuthenticationProviderFactoryIT {
         .withAdminPassword(adminPassword);
     
     // simple method to retrieve the issuer URL, referenced to by @JvmSetting annotations (do no delete)
+    @SuppressWarnings("unused")
     private static String getAuthUrl() {
         return keycloakContainer.getAuthServerUrl() + "/realms/" + realm;
     }
     
-    OIDCAuthProvider getProvider() throws Exception {
+    private OIDCAuthProvider getProvider() throws Exception {
         OIDCAuthProvider oidcAuthProvider = (OIDCAuthProvider) OIDCAuthenticationProviderFactory.buildFromSettings();
         
         assumeTrue(oidcAuthProvider.getMetadata().getTokenEndpointURI().toString()
@@ -86,8 +87,9 @@ OIDCAuthProvider getProvider() throws Exception {
     }
     
     // NOTE: This requires the "direct access grants" for the client to be enabled!
-    String getBearerTokenViaKeycloakAdminClient() throws Exception {
-        try (Keycloak keycloak = KeycloakBuilder.builder()
+    private String getBearerTokenViaKeycloakAdminClient() {
+        // NOTE: While Keycloak implements AutoClosable, don't use a try-with-resources with it to avoid logging out.
+        Keycloak keycloak = KeycloakBuilder.builder()
             .serverUrl(keycloakContainer.getAuthServerUrl())
             .grantType(OAuth2Constants.PASSWORD)
             .realm(realm)
@@ -96,9 +98,8 @@ String getBearerTokenViaKeycloakAdminClient() throws Exception {
             .username(realmAdminUser)
             .password(realmAdminPassword)
             .scope("openid")
-            .build()) {
-            return keycloak.tokenManager().getAccessTokenString();
-        }
+            .build();
+        return keycloak.tokenManager().getAccessTokenString();
     }
     
     /**

From c63e6953d47fc8e0fdca87bf714856f8d8195dc7 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 27 Nov 2025 14:32:42 +0100
Subject: [PATCH 08/20] refactor(migration): centralize DBUnit helpers and
 reuse shared Postgres container in migration tests

Extracted DBUnit helper methods to a new `DBUnitHelper` class for reusability and cleaner test code. Updated `V6_8_0_1__SettingsDataMigrationIT` to utilize these helpers and the shared Postgres container for better maintainability and reduced redundancy in test setup.
---
 .../dataverse/db/migration/DBUnitHelper.java  | 207 ++++++++++++++++++
 .../db/migration/SharedPostgresContainer.java |  47 +++-
 .../V6_8_0_1__SettingsDataMigrationIT.java    | 143 ++----------
 3 files changed, 277 insertions(+), 120 deletions(-)
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java

diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java
new file mode 100644
index 00000000000..17beaabaad0
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/DBUnitHelper.java
@@ -0,0 +1,207 @@
+package edu.harvard.iq.dataverse.db.migration;
+
+import org.dbunit.Assertion;
+import org.dbunit.IDatabaseTester;
+import org.dbunit.assertion.DiffCollectingFailureHandler;
+import org.dbunit.assertion.Difference;
+import org.dbunit.dataset.DefaultDataSet;
+import org.dbunit.dataset.IDataSet;
+import org.dbunit.dataset.ITable;
+import org.dbunit.dataset.ReplacementDataSet;
+import org.dbunit.dataset.SortedTable;
+import org.dbunit.dataset.filter.DefaultColumnFilter;
+import org.dbunit.dataset.xml.FlatXmlDataSet;
+import org.dbunit.dataset.xml.FlatXmlDataSetBuilder;
+import org.dbunit.operation.DatabaseOperation;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.sql.Connection;
+import java.sql.Statement;
+
+class DBUnitHelper {
+    /**
+     * Loads dataset from the provided XML string into the database associated with the given
+     * {@link IDatabaseTester} instance. It performs the following steps:
+     * - Parses the XML string into an {@link IDataSet}.
+     * - Replaces occurrences of "[null]" with actual null values in the dataset.
+     * - Inserts the data into the database, performing a CLEAN_INSERT operation.
+     * - Updates database sequences to avoid primary key collisions.
+     *
+     * @param tester the {@link IDatabaseTester} instance used for database operations.
+     * @param inputXml the XML string representing the dataset to be loaded into the database.
+     * @throws Exception if an error occurs during dataset parsing, database setup, or sequence update.
+     */
+    public static void loadData(IDatabaseTester tester, String inputXml) throws Exception {
+        try (InputStream xml = new ByteArrayInputStream(inputXml.getBytes(StandardCharsets.UTF_8))) {
+            IDataSet inputDataSet = new FlatXmlDataSetBuilder().build(xml);
+            
+            // Handle NULL values by replacing [null] string with actual NULL in the input dataset
+            ReplacementDataSet dataSet = new ReplacementDataSet(inputDataSet);
+            dataSet.addReplacementObject("[null]", null);
+            
+            tester.setDataSet(dataSet);
+            tester.setSetUpOperation(DatabaseOperation.CLEAN_INSERT);
+            tester.onSetup();
+        }
+        
+        // Update the sequence to avoid primary key collisions with the data just inserted by DBUnit
+        try (Connection conn = tester.getConnection().getConnection(); Statement stmt = conn.createStatement()) {
+            stmt.execute("SELECT setval('setting_id_seq', (SELECT MAX(id) FROM setting))");
+        }
+    }
+    
+    /**
+     * Retrieves and processes the actual data from a specified table in the database.
+     * The method performs the following steps:
+     * - Fetches the entire dataset from the database connection.
+     * - Retrieves the specific table from the dataset.
+     * - Filters out specified columns from the table.
+     * - Sorts the table based on the provided column.
+     *
+     * @param tester the {@link IDatabaseTester} instance used to access the database.
+     * @param table the name of the table to retrieve from the dataset.
+     * @param orderBy the column name by which the table should be sorted.
+     * @param ignoreCols optional columns to exclude from the resulting table.
+     * @return a processed and sorted {@link ITable} object representing the actual data from the database table.
+     * @throws Exception if an error occurs during database access, table filtering, or sorting.
+     */
+    public static ITable getActualData(IDatabaseTester tester, String table, String orderBy, String... ignoreCols) throws Exception {
+        IDataSet actualDataSet = tester.getConnection().createDataSet();
+        ITable actualTable = actualDataSet.getTable(table);
+        ITable filteredActual = DefaultColumnFilter.excludedColumnsTable(actualTable, ignoreCols);
+        SortedTable sortedActual = new SortedTable(filteredActual, new String[]{orderBy});
+        sortedActual.setUseComparable(true);
+        return sortedActual;
+    }
+    
+    /**
+     * Processes the given XML data to produce an expected dataset table for comparison purposes.
+     * The method performs the following steps:
+     * - Parses the XML data into an {@link IDataSet}.
+     * - Replaces "[null]" strings with actual null values in the dataset.
+     * - Filters out specified columns from the table.
+     * - Sorts the resulting table based on the provided column(s).
+     *
+     * @param expectedXml the XML string representing the expected dataset.
+     * @param table the name of the table to retrieve from the dataset.
+     * @param orderBy the column name by which the table should be sorted.
+     * @param ignoreCols optional columns to exclude from the resulting table.
+     * @return a processed and sorted {@link ITable} object representing the expected state of the dataset.
+     * @throws Exception if an error occurs during XML parsing, dataset processing, or table filtering.
+     */
+    public static ITable getExpectedData(String expectedXml, String table, String orderBy, String... ignoreCols) throws Exception {
+        try (InputStream xml = new ByteArrayInputStream(expectedXml.getBytes(StandardCharsets.UTF_8))) {
+            IDataSet rawDataSet = new FlatXmlDataSetBuilder().build(xml);
+            
+            // Handle NULL values by replacing [null] string with actual NULL in the expected dataset
+            // This is necessary to deal with the lang column, which may contain NULL values.
+            ReplacementDataSet expectedDataSet = new ReplacementDataSet(rawDataSet);
+            expectedDataSet.addReplacementObject("[null]", null);
+            ITable expectedTable = expectedDataSet.getTable(table);
+            
+            // Filter out the id column (potentially contains auto-generated values, not comparable)
+            ITable filteredExpected = DefaultColumnFilter.excludedColumnsTable(expectedTable, ignoreCols);
+            
+            // Sort the resulting table by name (using comparable to avoid byte-wise sorting!)
+            SortedTable sortedExpected = new SortedTable(filteredExpected, new String[]{orderBy});
+            sortedExpected.setUseComparable(true);
+            
+            return sortedExpected;
+        }
+    }
+    
+    
+    /**
+     * Executes a database migration script located at the specified resource path.
+     * This method loads the SQL script from the given path, establishes a connection
+     * to the database via the {@link SharedPostgresContainer}, and executes the script.
+     *
+     * @param migrationResourcePath the path to the migration script resource to be executed
+     * @throws Exception if an error occurs while loading the resource, establishing the database connection,
+     *         or executing the SQL script
+     */
+    public static void runMigrationScript(String migrationResourcePath) throws Exception {
+        String migrationSql = loadResource(migrationResourcePath);
+        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
+            stmt.execute(migrationSql);
+        }
+    }
+    
+    /**
+     * Loads the content of a resource file located at the given path into a string.
+     * The resource is read with UTF-8 encoding, and if the resource is not found, an IOException
+     * is thrown. Proper handling of the InputStream is ensured using try-with-resources.
+     *
+     * @param path the path to the resource file to be loaded
+     *             (Note: a path starting with "/" is absolute (root of the classpath). Without it is relative to {@link DBUnitHelper})
+     * @return the content of the resource as a string
+     * @throws IOException if the resource cannot be found or an error occurs during reading
+     */
+    public static String loadResource(String path) throws IOException {
+        try (InputStream is = DBUnitHelper.class.getResourceAsStream(path)) {
+            if (is == null) {
+                throw new IOException("Resource not found: " + path);
+            }
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+    
+    /**
+     * Asserts that two database tables are equal. The method compares the
+     * expected and actual tables row by row and column by column. If differences
+     * are found, an {@link AssertionError} is thrown with detailed information
+     * about the discrepancies, including the expected and actual values and
+     * their corresponding XML representations.
+     *
+     * @param expected the expected {@link ITable} object, representing the
+     *                 expected state of the database table.
+     * @param actual   the actual {@link ITable} object, representing the actual
+     *                 state of the database table to compare against the expected table.
+     * @throws Exception if an error occurs during the comparison or while
+     *                   generating the XML representation of the tables.
+     */
+    public static void assertTablesEqual(ITable expected, ITable actual) throws Exception {
+        DiffCollectingFailureHandler failureHandler = new DiffCollectingFailureHandler();
+        Assertion.assertEquals(expected, actual, failureHandler);
+        
+        if (!failureHandler.getDiffList().isEmpty()) {
+            StringBuilder sb = new StringBuilder();
+            sb.append("=== DIFFERENCES FOUND ===\n");
+            
+            for (Object diff : failureHandler.getDiffList()) {
+                Difference d = (Difference) diff;
+                sb.append(String.format("Row %d, Column '%s': expected <%s> but was <%s>%n",
+                    d.getRowIndex(),
+                    d.getColumnName(),
+                    d.getExpectedValue(),
+                    d.getActualValue()));
+            }
+            
+            sb.append("\n=== EXPECTED DATA ===\n");
+            sb.append(tableToXmlString(expected));
+            
+            sb.append("\n=== ACTUAL DATA ===\n");
+            sb.append(tableToXmlString(actual));
+            
+            throw new AssertionError(sb.toString());
+        }
+    }
+    
+    /**
+     * Converts the data in the given {@link ITable} to an XML string representation.
+     * This method serializes the specified table into a flat XML format using UTF-8 encoding.
+     *
+     * @param table the {@link ITable} to be converted to an XML string
+     * @return a string containing the XML representation of the specified table
+     * @throws Exception if an error occurs during the serialization process
+     */
+    public static String tableToXmlString(ITable table) throws Exception {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        FlatXmlDataSet.write(new DefaultDataSet(table), baos);
+        return baos.toString(StandardCharsets.UTF_8);
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java
index 47a995621f7..5970e36069a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java
+++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/SharedPostgresContainer.java
@@ -1,9 +1,15 @@
 package edu.harvard.iq.dataverse.db.migration;
 
+import org.dbunit.IDatabaseTester;
+import org.dbunit.JdbcDatabaseTester;
+import org.dbunit.database.DatabaseConfig;
+import org.dbunit.database.IDatabaseConnection;
+import org.dbunit.ext.postgresql.PostgresqlDataTypeFactory;
 import org.testcontainers.postgresql.PostgreSQLContainer;
 
 import java.sql.Connection;
 import java.sql.DriverManager;
+import java.sql.SQLException;
 
 /**
  * Singleton container that is started once and reused across all tests.
@@ -12,6 +18,14 @@ public class SharedPostgresContainer {
     
     private static volatile PostgreSQLContainer instance;
     
+    /**
+     * Provides a singleton instance of {@link PostgreSQLContainer} configured with
+     * a specific PostgreSQL version, database name, username, and password. The
+     * container is created and started on first invocation and reused across
+     * subsequent calls.
+     *
+     * @return the singleton {@link PostgreSQLContainer} instance for use in tests
+     */
     public static PostgreSQLContainer getInstance() {
         if (instance == null) {
             synchronized (SharedPostgresContainer.class) {
@@ -34,11 +48,42 @@ public static PostgreSQLContainer getInstance() {
         return instance;
     }
     
-    public static Connection getConnection() throws Exception {
+    /**
+     * Retrieves a database connection using the credentials and JDBC URL
+     * provided by the singleton PostgreSQL container instance.
+     *
+     * @return a {@link Connection} object to interact with the PostgreSQL database
+     * @throws SQLException if a database access error occurs
+     */
+    public static Connection getConnection() throws SQLException {
         return DriverManager.getConnection(
             getInstance().getJdbcUrl(),
             getInstance().getUsername(),
             getInstance().getPassword()
         );
     }
+    
+    /**
+     * Creates and returns an initialized instance of IDatabaseTester for testing
+     * database interactions. The tester is configured to work with a PostgreSQL
+     * database by using a PostgresqlDataTypeFactory.
+     *
+     * @return an IDatabaseTester instance configured for the current PostgreSQL container
+     * @throws ClassNotFoundException if there is an issue creating the database tester or connection
+     */
+    public static IDatabaseTester getTester() throws ClassNotFoundException {
+        return new JdbcDatabaseTester(
+            getInstance().getDriverClassName(),
+            getInstance().getJdbcUrl(),
+            getInstance().getUsername(),
+            getInstance().getPassword()
+        ) {
+            @Override
+            public IDatabaseConnection getConnection() throws Exception {
+                IDatabaseConnection connection = super.getConnection();
+                connection.getConfig().setProperty(DatabaseConfig.PROPERTY_DATATYPE_FACTORY, new PostgresqlDataTypeFactory());
+                return connection;
+            }
+        };
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
index f432aa7ed26..6c442d86bd4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
@@ -2,45 +2,28 @@
 
 
 import edu.harvard.iq.dataverse.util.testing.Tags;
-import org.dbunit.Assertion;
 import org.dbunit.IDatabaseTester;
-import org.dbunit.JdbcDatabaseTester;
-import org.dbunit.dataset.IDataSet;
-import org.dbunit.dataset.ITable;
-import org.dbunit.dataset.SortedTable;
-import org.dbunit.dataset.filter.DefaultColumnFilter;
-import org.dbunit.dataset.xml.FlatXmlDataSetBuilder;
-import org.dbunit.operation.DatabaseOperation;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
-import org.testcontainers.junit.jupiter.Container;
 import org.testcontainers.junit.jupiter.Testcontainers;
-import org.testcontainers.postgresql.PostgreSQLContainer;
 
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 import java.sql.Connection;
 import java.sql.Statement;
 
-import static org.dbunit.Assertion.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 @Tag(Tags.DB_MIGRATION_TEST)
 @Tag(Tags.USES_TESTCONTAINERS)
 @Testcontainers(disabledWithoutDocker = true)
-public class V6_8_0_1__SettingsDataMigrationIT {
+class V6_8_0_1__SettingsDataMigrationIT {
     
     static final String MIGRATION_RESOURCE = "/db/migration/V6.8.0.1.sql";
     static final String TABLE_NAME = "setting";
     
-    @Container
-    static PostgreSQLContainer POSTGRES = SharedPostgresContainer.getInstance();
-    
     private IDatabaseTester databaseTester;
     
     @BeforeAll
@@ -65,13 +48,7 @@ lang    text constraint non_empty_lang check (lang <> ''::text),
     
     @BeforeEach
     void setUp() throws Exception {
-        // Create a new database tester instance, get the connection details from Testcontainers
-        databaseTester = new JdbcDatabaseTester(
-            POSTGRES.getDriverClassName(),
-            POSTGRES.getJdbcUrl(),
-            POSTGRES.getUsername(),
-            POSTGRES.getPassword()
-        );
+        databaseTester = SharedPostgresContainer.getTester();
     }
     
     @AfterEach
@@ -81,7 +58,7 @@ void tearDown() throws Exception {
         }
         // Clean up for the next test
         try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
-            stmt.execute("TRUNCATE TABLE setting RESTART IDENTITY CASCADE");
+            stmt.execute("TRUNCATE TABLE " + TABLE_NAME + " RESTART IDENTITY CASCADE");
         }
     }
     
@@ -96,11 +73,11 @@ void testMigrationConvertSimpleSettings() throws Exception {
                 <setting id="2" name="WorkflowsAdmin#IP_WHITELIST_KEY" content="127.0.0.1" />
             </dataset>
             """;
-        loadData(inputXml);
+        DBUnitHelper.loadData(databaseTester, inputXml);
         
         // WHEN
-        runMigrationScript();
-        var actualData = getActualData();
+        DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE);
+        var actualData = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id");
         
         // THEN
         // Notes: We don't specify 'id' in expected because they are auto-generated.
@@ -112,9 +89,9 @@ void testMigrationConvertSimpleSettings() throws Exception {
                 <setting name=":WorkflowsAdminIpWhitelist" content="127.0.0.1" />
             </dataset>
             """;
-        var expectedData = getExpectedData(expectedXml);
+        var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id");
         
-        Assertion.assertEquals(expectedData, actualData);
+        DBUnitHelper.assertTablesEqual(expectedData, actualData);
     }
     
     @Test
@@ -129,11 +106,11 @@ void testMigrationConvertTabularIngestSizeLimitToJsonFormat() throws Exception {
                 <setting id="5" name=":TabularIngestSizeLimit:tab" content="1000" />
             </dataset>
             """;
-        loadData(inputXml);
+        DBUnitHelper.loadData(databaseTester, inputXml);
         
         // WHEN
-        runMigrationScript();
-        var actualData = getActualData();
+        DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE);
+        var actualData = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id");
         
         // THEN
         // Notes: We don't specify 'id' in expected because they are auto-generated.
@@ -141,12 +118,12 @@ void testMigrationConvertTabularIngestSizeLimitToJsonFormat() throws Exception {
         var expectedXml = """
             <?xml version='1.0' encoding='UTF-8'?>
             <dataset>
-                <setting name=":TabularIngestSizeLimit" content='{"csv": 5000, "tab": 1000, "default": 2000}' />
+                <setting name=":TabularIngestSizeLimit" content='{"csv": 5000, "tab": 1000, "default": 2000}' lang="[null]" />
             </dataset>
             """;
-        var expectedData = getExpectedData(expectedXml);
+        var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id");
         
-        Assertion.assertEquals(expectedData, actualData);
+        DBUnitHelper.assertTablesEqual(expectedData, actualData);
     }
     
     @Test
@@ -156,19 +133,16 @@ void testMigrationTabularIngestSizeLimitToJsonHandlesNullLangValues() throws Exc
         var inputXml = """
             <?xml version='1.0' encoding='UTF-8'?>
             <dataset>
-                <setting id="1" name=":TabularIngestSizeLimit:csv" content="3000" lang="[null]" />
+                <setting id="1" name=":TabularIngestSizeLimit:csv" content="3000" />
             </dataset>
             """;
-        loadData(inputXml);
+        DBUnitHelper.loadData(databaseTester, inputXml);
         
         // WHEN
-        runMigrationScript();
+        DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE);
+        var actualTable = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id");
         
         // THEN
-        // Verify the migration completed...
-        IDataSet actualDataSet = databaseTester.getConnection().createDataSet();
-        ITable actualTable = actualDataSet.getTable("setting");
-        
         // Just verify we have at least one row (the migrated JSON setting)
         assertTrue(actualTable.getRowCount() > 0);
     }
@@ -183,11 +157,11 @@ void testMigrationTabularIngestSizeLimitToJsonHandlesNonNumericValuesGracefully(
                 <setting id="1" name=":TabularIngestSizeLimit:csv" content="not-a-number" />
             </dataset>
             """;
-        loadData(inputXml);
+        DBUnitHelper.loadData(databaseTester, inputXml);
         
         // WHEN
-        runMigrationScript();
-        var actualData = getActualData();
+        DBUnitHelper.runMigrationScript(MIGRATION_RESOURCE);
+        var actualData = DBUnitHelper.getActualData(databaseTester, TABLE_NAME, "name", "id");
         
         // THEN
         // Invalid values should be set to 0 (at least that's what the migration *should* do)
@@ -196,80 +170,11 @@ void testMigrationTabularIngestSizeLimitToJsonHandlesNonNumericValuesGracefully(
         var expectedXml = """
             <?xml version='1.0' encoding='UTF-8'?>
             <dataset>
-                <setting name=":TabularIngestSizeLimit" content='{"csv": 0}' />
+                <setting name=":TabularIngestSizeLimit" content='{"csv": 0}' lang="[null]" />
             </dataset>
             """;
-        var expectedData = getExpectedData(expectedXml);
+        var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id");
         
-        assertEquals(expectedData, actualData);
+        DBUnitHelper.assertTablesEqual(expectedData, actualData);
     }
-    
-    // --- Helper Methods ---
-    private void loadData(String inputXml) throws Exception {
-        try (InputStream xml = new ByteArrayInputStream(inputXml.getBytes(StandardCharsets.UTF_8))) {
-            IDataSet inputDataSet = new FlatXmlDataSetBuilder().build(xml);
-            databaseTester.setDataSet(inputDataSet);
-            databaseTester.setSetUpOperation(DatabaseOperation.CLEAN_INSERT);
-            databaseTester.onSetup();
-        }
-        
-        // Update the sequence to avoid primary key collisions with the data just inserted by DBUnit
-        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
-            stmt.execute("SELECT setval('setting_id_seq', (SELECT MAX(id) FROM setting))");
-        }
-    }
-    
-    /**
-     * Retrieves and processes the actual data from the database for verification purposes.
-     * The method performs the following steps:
-     * - Obtains the current dataset from the database connection.
-     * - Filters out the id and lang column from the data.
-     * - Sorts the resulting table based on predefined sorting criteria.
-     *
-     * @return a processed and sorted {@link ITable} object representing the actual state of the database.
-     * @throws Exception if an error occurs during dataset retrieval, column filtering, or table sorting.
-     */
-    private ITable getActualData() throws Exception {
-        IDataSet actualDataSet = databaseTester.getConnection().createDataSet();
-        ITable actualTable = actualDataSet.getTable(TABLE_NAME);
-        ITable filteredActual = DefaultColumnFilter.excludedColumnsTable(actualTable, new String[]{"id", "lang"});
-        SortedTable sortedActual = new SortedTable(filteredActual, new String[]{"name"});
-        return sortedActual;
-    }
-    
-    /**
-     * Constructs and returns an expected data table for testing database contents, based on the provided XML input.
-     * The method parses the given XML string into a dataset, filters out the id and lang column,
-     * and sorts the table based on predefined sorting criteria.
-     *
-     * @param expectedXml the XML string representing the expected dataset to be used for verification
-     * @return a processed and sorted {@link ITable} object representing the expected database state
-     * @throws Exception if an error occurs during XML parsing, dataset creation, or table manipulation
-     */
-    private ITable getExpectedData(String expectedXml) throws Exception {
-        try (InputStream xml = new ByteArrayInputStream(expectedXml.getBytes(StandardCharsets.UTF_8))) {
-            IDataSet expectedDataSet = new FlatXmlDataSetBuilder().build(xml);
-            ITable expectedTable = expectedDataSet.getTable(TABLE_NAME);
-            ITable filteredExpected = DefaultColumnFilter.excludedColumnsTable(expectedTable, new String[]{"id", "lang"});
-            SortedTable sortedExpected = new SortedTable(filteredExpected, new String[]{"name"});
-            return sortedExpected;
-        }
-    }
-    
-    private void runMigrationScript() throws Exception {
-        String migrationSql = loadResource(MIGRATION_RESOURCE);
-        try (Connection conn = SharedPostgresContainer.getConnection(); Statement stmt = conn.createStatement()) {
-            stmt.execute(migrationSql);
-        }
-    }
-    
-    private String loadResource(String path) throws Exception {
-        try (InputStream is = getClass().getResourceAsStream(path)) {
-            if (is == null) {
-                throw new RuntimeException("Resource not found: " + path);
-            }
-            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
-        }
-    }
-
 }
\ No newline at end of file

From 85e94eec400126df887127f2730a5725640a340d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 27 Nov 2025 14:37:28 +0100
Subject: [PATCH 09/20] fix(settings,workflow): migrate workflow keys to
 structured enums and simplify access methods #11996

Replaced hard-coded workflow keys with structured enum-based keys in `TriggerType`. Updated `WorkflowServiceBean` and `SettingsServiceBean` to use consistent key resolution methods, improving readability and maintainability. Updated related database migration script to align with new key naming schema.

This adds the missing keys after introduction of naming restrictions in #11654. The config keys for the default workflows will no longer be cleansed from the database during deployment.
---
 .../iq/dataverse/settings/SettingsServiceBean.java | 12 ++++++++++++
 .../iq/dataverse/workflow/WorkflowContext.java     | 13 ++++++++++++-
 .../iq/dataverse/workflow/WorkflowServiceBean.java | 14 ++++----------
 src/main/resources/db/migration/V6.8.0.1.sql       |  4 ++++
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 1cdac02a013..174264eabee 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -177,6 +177,18 @@ public enum Key {
          */
         WorkflowsAdminIpWhitelist,
         
+        /**
+         * Represents the workflow identifier for the "pre-publish dataset" operation.
+         * This identifier is used to manage and define the specific workflow
+         * triggered before a dataset is published within the application.
+         */
+        PrePublishDatasetWorkflowId,
+        /**
+         * Represents the configuration key for specifying the workflow identifier that
+         * will be executed after a dataset has been published.
+         */
+        PostPublishDatasetWorkflowId,
+        
         /**
          * A special secret that, if set, needs to be given when trying to manage internal users.
          * This key was formerly known as "BuiltinUsers.KEY", which never was a setting name aligning with the others.
diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java
index a0aee4cb4c9..178028d4241 100644
--- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java
+++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowContext.java
@@ -3,6 +3,7 @@
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key;
 import edu.harvard.iq.dataverse.workflow.step.WorkflowStep;
 
 import java.util.Map;
@@ -20,7 +21,17 @@
 public class WorkflowContext {
     
     public enum TriggerType {
-        PrePublishDataset, PostPublishDataset
+        PrePublishDataset(Key.PrePublishDatasetWorkflowId),
+        PostPublishDataset(Key.PostPublishDatasetWorkflowId);
+        
+        final Key key;
+        TriggerType(Key key) {
+            this.key = key;
+        }
+        
+        Key getKey() {
+            return key;
+        }
     }
     
     private final DataverseRequest request;
diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java
index 757d447b60a..ae1175f0e1d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java
@@ -53,7 +53,6 @@
 public class WorkflowServiceBean {
 
     private static final Logger logger = Logger.getLogger(WorkflowServiceBean.class.getName());
-    private static final String WORKFLOW_ID_KEY = "WorkflowServiceBean.WorkflowId:";
 
     @PersistenceContext(unitName = "VDCNet-ejbPU")
     EntityManager em;
@@ -452,7 +451,7 @@ public boolean deleteWorkflow(long workflowId) {
         if (doomedOpt.isPresent()) {
             // validate that this is not the default workflow
             for ( WorkflowContext.TriggerType tp : WorkflowContext.TriggerType.values() ) {
-                String defaultWorkflowId = settings.get(workflowSettingKey(tp));
+                String defaultWorkflowId = settings.getValueForKey(tp.getKey());
                 if (defaultWorkflowId != null
                         && Long.parseLong(defaultWorkflowId) == doomedOpt.get().getId()) {
                     throw new IllegalArgumentException("Workflow " + workflowId + " cannot be deleted as it is the default workflow for trigger " + tp.name() );
@@ -476,7 +475,7 @@ public PendingWorkflowInvocation getPendingWorkflow(String invocationId) {
     }
 
     public Optional<Workflow> getDefaultWorkflow( WorkflowContext.TriggerType type ) {
-        String defaultWorkflowId = settings.get(workflowSettingKey(type));
+        String defaultWorkflowId = settings.getValueForKey(type.getKey());
         if (defaultWorkflowId == null) {
             return Optional.empty();
         }
@@ -491,18 +490,13 @@ public Optional<Workflow> getDefaultWorkflow( WorkflowContext.TriggerType type )
      * @param type type of the workflow.
      */
     public void setDefaultWorkflowId(WorkflowContext.TriggerType type, Long id) {
-        String workflowKey = workflowSettingKey(type);
         if (id == null) {
-            settings.delete(workflowKey);
+            settings.deleteValueForKey(type.getKey());
         } else {
-            settings.set(workflowKey, id.toString());
+            settings.setValueForKey(type.getKey(), id.toString());
         }
     }
 
-    private String workflowSettingKey(WorkflowContext.TriggerType type) {
-        return WORKFLOW_ID_KEY+type.name();
-    }
-
     private WorkflowStep createStep(WorkflowStepData wsd) {
         WorkflowStepSPI provider = providers.get(wsd.getProviderId());
         if (provider == null) {
diff --git a/src/main/resources/db/migration/V6.8.0.1.sql b/src/main/resources/db/migration/V6.8.0.1.sql
index 63df0a44717..e00fe993de9 100644
--- a/src/main/resources/db/migration/V6.8.0.1.sql
+++ b/src/main/resources/db/migration/V6.8.0.1.sql
@@ -95,3 +95,7 @@ DO $$
             DELETE FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY';
         END IF;
     END $$;
+
+-- 4. Migrate WorkflowServiceBean.WorkflowId specials to new PrePublishDatasetWorkflowId and PostPublishDatasetWorkflowId
+UPDATE setting SET name = ':PrePublishDatasetWorkflowId' WHERE name = 'WorkflowServiceBean.WorkflowId:PrePublishDataset';
+UPDATE setting SET name = ':PostPublishDatasetWorkflowId' WHERE name = 'WorkflowServiceBean.WorkflowId:PostPublishDataset';

From 58a12372ae5eff074131b0c153bb8d2f9b7f5a17 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 27 Nov 2025 14:37:51 +0100
Subject: [PATCH 10/20] docs(workflow): add details on workflow configuration
 via Settings API

Expanded documentation to include new Settings API options for managing workflows. Added references for `WorkflowsAdminIpWhitelist`, `PrePublishDatasetWorkflowId`, and `PostPublishDatasetWorkflowId` with usage examples. Enhanced developer guide with a new "Administration" section for workflow-related settings.
---
 .../source/developers/workflows.rst           |  4 ++
 .../source/installation/config.rst            | 40 +++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/doc/sphinx-guides/source/developers/workflows.rst b/doc/sphinx-guides/source/developers/workflows.rst
index 38ca6f4e141..80830a1822d 100644
--- a/doc/sphinx-guides/source/developers/workflows.rst
+++ b/doc/sphinx-guides/source/developers/workflows.rst
@@ -27,6 +27,8 @@ If a step in a workflow fails, the Dataverse installation makes an effort to rol
   provider offers two steps for sending and receiving customizable HTTP requests.
   *http/sr* and *http/authExt*, detailed below, with the latter able to use the API to make changes to the dataset being processed. (Both lock the dataset to prevent other processes from changing the dataset between the time the step is launched to when the external process responds to the Dataverse instance.)
 
+.. _workflow_admin:
+
 Administration
 ~~~~~~~~~~~~~~
 
@@ -36,6 +38,8 @@ At the moment, defining a workflow for each trigger is done for the entire insta
 
 In order to prevent unauthorized resuming of workflows, the Dataverse installation maintains a "white list" of IP addresses from which resume requests are honored. This list is maintained using the ``/api/admin/workflows/ip-whitelist`` endpoint of the :doc:`/api/native-api`. By default, the Dataverse installation honors resume requests from localhost only (``127.0.0.1;::1``), so set-ups that use a single server work with no additional configuration.
 
+Note: these settings are also exposed and manageable via the Settings API.
+See :ref:`:WorkflowsAdminIpWhitelist`, :ref:`:PrePublishDatasetWorkflowId` and :ref:`:PostPublishDatasetWorkflowId`
 
 Available Steps
 ~~~~~~~~~~~~~~~
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 6c19464489d..d661b14f9c5 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2403,6 +2403,9 @@ The workflow id returned in this call (or available by doing a GET of /api/admin
 
 Once these steps are taken, new publication requests will automatically trigger submission of an archival copy to the specified archiver, Chronopolis' DuraCloud component in this example. For Chronopolis, as when using the API, it is currently the admin's responsibility to snap-shot the DuraCloud space and monitor the result. Failure of the workflow, (e.g. if DuraCloud is unavailable, the configuration is wrong, or the space for this dataset already exists due to a prior publication action or use of the API), will create a failure message but will not affect publication itself.
 
+Note: setting the default workflow is also available via the Settings API.
+See :ref:`:WorkflowsAdminIpWhitelist`, :ref:`:PrePublishDatasetWorkflowId` and :ref:`:PostPublishDatasetWorkflowId`
+
 .. _bag-info.txt:
 
 Configuring bag-info.txt
@@ -5085,6 +5088,43 @@ Number of errors to display to the user when creating DataFiles from a file uplo
 
 ``curl -X PUT -d '1' http://localhost:8080/api/admin/settings/:CreateDataFilesMaxErrorsToDisplay``
 
+.. _:WorkflowsAdminIpWhitelist:
+
+:WorkflowsAdminIpWhitelist
+++++++++++++++++++++++++++
+
+A semicolon-separated list of IP addresses from which workflow resume requests are honored.
+By default, the Dataverse installation honors resume requests from localhost only (``127.0.0.1;::1``).
+This setting allows for preventing unauthorized resuming of workflows.
+
+``curl -X PUT -d '127.0.0.1;::1;192.168.0.1' http://localhost:8080/api/admin/settings/:WorkflowsAdminIpWhitelist``
+
+See :ref:`Workflow Admin section <workflow_admin>` for more details and context.
+
+.. _:PrePublishDatasetWorkflowId:
+
+:PrePublishDatasetWorkflowId
+++++++++++++++++++++++++++++
+
+The identifier of the workflow to be executed prior to dataset publication.
+This pre-publish workflow is useful for preparing a dataset for public access (e.g., moving files, checking metadata) or starting an approval process.
+
+``curl -X PUT -d '1' http://localhost:8080/api/admin/settings/:PrePublishDatasetWorkflowId``
+
+See :ref:`Workflow Admin section <workflow_admin>` for more details and context.
+
+.. _:PostPublishDatasetWorkflowId:
+
+:PostPublishDatasetWorkflowId
++++++++++++++++++++++++++++++
+
+The identifier of the workflow to be executed after a dataset has been successfully published.
+This post-publish workflow is useful for actions such as sending notifications about the newly published dataset or archiving.
+
+``curl -X PUT -d '2' http://localhost:8080/api/admin/settings/:PostPublishDatasetWorkflowId``
+
+See :ref:`Workflow Admin section <workflow_admin>` for more details and context.
+
 .. _:BagItHandlerEnabled:
 
 :BagItHandlerEnabled

From a05fcc868d8329e4c6ccac720351528fd3705f0f Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 27 Nov 2025 14:38:19 +0100
Subject: [PATCH 11/20] test(migration): update tests to account for additional
 workflow settings

Enhanced `V6_8_0_1__SettingsDataMigrationIT` to include tests for new workflow settings (`PrePublishDatasetWorkflowId` and `PostPublishDatasetWorkflowId`). Updated expected data assertions and display names for better clarity and completeness.
---
 .../V6_8_0_1__SettingsDataMigrationIT.java         | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
index 6c442d86bd4..309c4779c0e 100644
--- a/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/db/migration/V6_8_0_1__SettingsDataMigrationIT.java
@@ -1,6 +1,6 @@
 package edu.harvard.iq.dataverse.db.migration;
 
-
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key;
 import edu.harvard.iq.dataverse.util.testing.Tags;
 import org.dbunit.IDatabaseTester;
 import org.junit.jupiter.api.AfterEach;
@@ -63,7 +63,7 @@ void tearDown() throws Exception {
     }
     
     @Test
-    @DisplayName("Test migrating BuiltinUsers.KEY and WorkflowsAdmin")
+    @DisplayName("Test migrating BuiltinUsers.KEY and Workflow settings")
     void testMigrationConvertSimpleSettings() throws Exception {
         // GIVEN
         var inputXml = """
@@ -71,6 +71,8 @@ void testMigrationConvertSimpleSettings() throws Exception {
             <dataset>
                 <setting id="1" name="BuiltinUsers.KEY" content="secret-key-123" />
                 <setting id="2" name="WorkflowsAdmin#IP_WHITELIST_KEY" content="127.0.0.1" />
+                <setting id="3" name="WorkflowServiceBean.WorkflowId:PrePublishDataset" content="1" />
+                <setting id="4" name="WorkflowServiceBean.WorkflowId:PostPublishDataset" content="2" />
             </dataset>
             """;
         DBUnitHelper.loadData(databaseTester, inputXml);
@@ -85,10 +87,12 @@ void testMigrationConvertSimpleSettings() throws Exception {
         var expectedXml = """
             <?xml version='1.0' encoding='UTF-8'?>
             <dataset>
-                <setting name=":BuiltinUsersKey" content="secret-key-123" />
-                <setting name=":WorkflowsAdminIpWhitelist" content="127.0.0.1" />
+                <setting name="%s" content="secret-key-123" lang="[null]" />
+                <setting name="%s" content="127.0.0.1" lang="[null]" />
+                <setting name="%s" content="1" lang="[null]" />
+                <setting name="%s" content="2" lang="[null]" />
             </dataset>
-            """;
+            """.formatted(Key.BuiltinUsersKey, Key.WorkflowsAdminIpWhitelist, Key.PrePublishDatasetWorkflowId, Key.PostPublishDatasetWorkflowId);
         var expectedData = DBUnitHelper.getExpectedData(expectedXml, TABLE_NAME, "name", "id");
         
         DBUnitHelper.assertTablesEqual(expectedData, actualData);

From a81859633a5cd09e9c9b86fac0ff9e3a96d1594a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 27 Nov 2025 14:38:33 +0100
Subject: [PATCH 12/20] refactor(migration): simplify setting name migrations
 with direct `UPDATE` statements

---
 src/main/resources/db/migration/V6.8.0.1.sql | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/main/resources/db/migration/V6.8.0.1.sql b/src/main/resources/db/migration/V6.8.0.1.sql
index e00fe993de9..1c3fbf8ffd7 100644
--- a/src/main/resources/db/migration/V6.8.0.1.sql
+++ b/src/main/resources/db/migration/V6.8.0.1.sql
@@ -79,22 +79,10 @@ DO $$
     END $$;
 
 -- 2. Migrate BuiltinUsers.KEY to the new setting name
-DO $$
-    BEGIN
-        IF EXISTS (SELECT 1 FROM setting WHERE name = 'BuiltinUsers.KEY') THEN
-            INSERT INTO setting (name, lang, content) VALUES (':BuiltinUsersKey', NULL, (SELECT content FROM setting WHERE name = 'BuiltinUsers.KEY'));
-            DELETE FROM setting WHERE name = 'BuiltinUsers.KEY';
-        END IF;
-    END $$;
+UPDATE setting SET name = ':BuiltinUsersKey' WHERE name = 'BuiltinUsers.KEY';
 
 -- 3. Migrate WorkflowsAdmin#IP_WHITELIST_KEY to the new setting name
-DO $$
-    BEGIN
-        IF EXISTS (SELECT 1 FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY') THEN
-            INSERT INTO setting (name, lang, content) VALUES (':WorkflowsAdminIpWhitelist', NULL, (SELECT content FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY'));
-            DELETE FROM setting WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY';
-        END IF;
-    END $$;
+UPDATE setting SET name = ':WorkflowsAdminIpWhitelist' WHERE name = 'WorkflowsAdmin#IP_WHITELIST_KEY';
 
 -- 4. Migrate WorkflowServiceBean.WorkflowId specials to new PrePublishDatasetWorkflowId and PostPublishDatasetWorkflowId
 UPDATE setting SET name = ':PrePublishDatasetWorkflowId' WHERE name = 'WorkflowServiceBean.WorkflowId:PrePublishDataset';

From c385abb7b5ad5179f7a4105dc0f1985c9cdf39b7 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 3 Dec 2025 16:48:03 +0100
Subject: [PATCH 13/20] refactor(settings): enhance cleanup logic with detailed
 logging of keys deleted from the database #11639

---
 .../flyway/SettingsCleanupCallback.java       | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java b/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java
index 4b02f07a810..c83472f4b17 100644
--- a/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java
+++ b/src/main/java/edu/harvard/iq/dataverse/flyway/SettingsCleanupCallback.java
@@ -11,7 +11,9 @@
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -40,6 +42,7 @@ public boolean canHandleInTransaction(Event event, Context context) {
 
     @Override
     public void handle(Event event, Context context) {
+        // Failsafe - we only run _after_ all migrations are done.
         if (event != Event.AFTER_MIGRATE) {
             return;
         }
@@ -61,10 +64,19 @@ public String getCallbackName() {
         return "SettingsCleanup";
     }
     
+    /**
+     * Cleans up invalid settings from the database by identifying and removing
+     * rows in the `setting` table where the `name` attribute does not correspond
+     * to a valid SettingsServiceBean.Key.
+     *
+     * @param connection the database connection to use for querying and updating the `setting` table
+     * @throws SQLException if a database access error occurs or the query fails
+     */
     private void cleanupInvalidSettings(Connection connection) throws SQLException {
-        // Collect IDs of rows to delete
-        List<Long> idsToDelete = new ArrayList<>();
+        // Collect IDs of rows to delete, together with the setting's "name" attribute.
+        Map<Long, String> entriesToDelete = new HashMap<>();
 
+        // IMPORTANT: as we cannot use JPQL mid-Flyway, this query needs to be carefully aligned with the Setting class!
         String selectSql = "SELECT id, name FROM setting";
         try (PreparedStatement ps = connection.prepareStatement(selectSql);
              ResultSet rs = ps.executeQuery()) {
@@ -77,24 +89,25 @@ private void cleanupInvalidSettings(Connection connection) throws SQLException {
                 // to a SettingsServiceBean.Key is considered invalid and will be removed.
                 SettingsServiceBean.Key key = SettingsServiceBean.Key.parse(name);
                 if (key == null) {
-                    idsToDelete.add(id);
+                    entriesToDelete.put(id, name);
                 }
             }
         }
 
-        if (idsToDelete.isEmpty()) {
+        if (entriesToDelete.isEmpty()) {
             logger.fine("Settings cleanup: no invalid settings found");
             return;
         }
 
-        logger.info(() -> "Settings cleanup: found " + idsToDelete.size()
-                + " invalid settings; deleting them");
+        logger.info(() -> "Settings cleanup: found " + entriesToDelete.size()
+                + " invalid/obsolete settings; deleting them.");
 
         String deleteSql = "DELETE FROM setting WHERE id = ?";
         try (PreparedStatement delete = connection.prepareStatement(deleteSql)) {
-            for (Long id : idsToDelete) {
-                delete.setLong(1, id);
+            for (Map.Entry<Long, String> entry : entriesToDelete.entrySet()) {
+                delete.setLong(1, entry.getKey());
                 delete.addBatch();
+                logger.info("Settings cleanup: deleting \"" + entry.getValue() + "\"");
             }
             int[] counts = delete.executeBatch();
             logger.info(() -> "Settings cleanup: deleted " + counts.length + " rows with invalid keys");

From 984cd27383441ea5b455f5f6f50c050d470f0e6d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 3 Dec 2025 16:48:54 +0100
Subject: [PATCH 14/20] docs: expand release notes with workflow keys and
 upgrade considerations #11639

Added details on `PrePublishDatasetWorkflowId` and `PostPublishDatasetWorkflowId` to release notes. Included important upgrade guidelines for customized forks and database backup tips.
---
 doc/release-notes/11639-db-opts-idempotency.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/doc/release-notes/11639-db-opts-idempotency.md b/doc/release-notes/11639-db-opts-idempotency.md
index f73cbdebf83..b01e3d955cb 100644
--- a/doc/release-notes/11639-db-opts-idempotency.md
+++ b/doc/release-notes/11639-db-opts-idempotency.md
@@ -43,3 +43,13 @@ The following database settings are were added to the official list within the c
 - `:LDNAnnounceRequiredFields`
 - `:LDNTarget`
 - `:WorkflowsAdminIpWhitelist` - formerly `WorkflowsAdmin#IP_WHITELIST_KEY`
+- `:PrePublishDatasetWorkflowId` - formerly `WorkflowServiceBean.WorkflowId:PrePublishDataset`
+- `:PostPublishDatasetWorkflowId` - formerly `WorkflowServiceBean.WorkflowId:PostPublishDataset`
+
+### Important Considerations During Upgrade Of Your Installation
+
+1. Running a customized fork? Make sure to add any custom settings to the SettingsServiceBean.Key enum before deploying!
+2. Any database settings not contained in the `SettingServiceBean.Key` will be removed from your database during each deployment cycle.
+3. As always when upgrading, make sure to backup your database beforehand!
+   You can also use the existing API endpoint `/api/admin/settings` to retrieve all settings as JSONish data for a quick backup before upgrading.
+

From 0ca703aaee483375fa5117e6cef48e2712d232bc Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 4 Dec 2025 23:14:54 +0100
Subject: [PATCH 15/20] fix(config): support numeric values for
 TabularIngestSizeLimit, improve validation and logging

Enhanced `getTabularIngestSizeLimits` to accept numeric values in addition to strings for size limits. Improved validation to handle invalid types, numbers, or decimal values. Updated related tests and configuration documentation.

This was done because the limitation to long numbers as string was artificial. Users can choose which way they like best. Also, the data migration uses numbers, so this lead to errors.
---
 .../source/installation/config.rst            |  6 ++-
 .../iq/dataverse/util/SystemConfig.java       | 25 +++++++----
 .../iq/dataverse/util/SystemConfigTest.java   | 42 +++++++++++++------
 3 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index de40b9a0033..3e3799f46d7 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -4517,7 +4517,11 @@ Using a JSON-based setting, you can set a global default and per-format limits f
 
 (In previous releases of Dataverse, a colon-separated form was used to specify per-format limits, such as ``:TabularIngestSizeLimit:Rdata``, but this is no longer supported. Now JSON is used.)
 
-The expected JSON is an object with key/value pairs like the following. Format names are case-insensitive, and all fields are optional. The size limits must be strings with double quotes around them (e.g. ``"10"``) rather than numbers (e.g. ``10``).
+The expected JSON is an object with key/value pairs like the following.
+Format names are case-insensitive, and all fields are optional (an empty JSON object equals not restricted).
+The size limits must be whole numbers, either presented as strings with double quotes around them (e.g. ``"10"``) or numeric values (e.g. ``10`` or ``10.0``).
+Note that decimal numbers like ``10.5`` are invalid.
+Any invalid setting will temporarily disable tabular ingest until corrected.
 
 .. code:: json
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index c1d61378f42..b8f62bf027a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -527,19 +527,30 @@ public Map<String, Long> getTabularIngestSizeLimits() {
                     limitsMap.put(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, -1L);
                     
                     for (String formatName : limits.keySet()) {
-                        // We deliberatly do not validate the formatNames here for backward compatibility.
-                        // But we transform to lowercase here, so the casing doesn't matter for lookups.
                         String lowercaseFormatName = formatName.toLowerCase();
                         
                         try {
-                            Long sizeOption = Long.valueOf(limits.getString(formatName));
+                            JsonValue value = limits.get(formatName);
+                            Long sizeOption;
+                            
+                            // We want to be able to use either numbers or string values, so detect which one it is.
+                            if (value.getValueType() == JsonValue.ValueType.STRING) {
+                                sizeOption = Long.valueOf(limits.getString(formatName));
+                            } else if (value.getValueType() == JsonValue.ValueType.NUMBER) {
+                                sizeOption = limits.getJsonNumber(formatName).longValueExact();
+                            } else {
+                                logger.warning("Invalid value type for format " + formatName + ": expected string or number");
+                                logger.warning("Disabling all tabular ingest completely until fixed!");
+                                return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
+                            }
+                            
                             limitsMap.put(lowercaseFormatName, sizeOption);
-                        } catch (ClassCastException cce) {
-                            logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long from JSON integer. You must provide the long number as string (use quotes) for format " + formatName);
+                        } catch (NumberFormatException nfe) {
+                            logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a valid number)");
                             logger.warning("Disabling all tabular ingest completely until fixed!");
                             return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
-                        } catch (NumberFormatException nfe) {
-                            logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a number)");
+                        } catch (ArithmeticException ae) {
+                            logger.warning("Number too large or has fractional part for format " + formatName);
                             logger.warning("Disabling all tabular ingest completely until fixed!");
                             return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
                         }
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
index 06026962d2c..0bf14ec5b59 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
@@ -8,9 +8,11 @@
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
 import org.mockito.InjectMocks;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;
+import org.springframework.beans.factory.annotation.Value;
 
 import java.util.Map;
 
@@ -202,10 +204,10 @@ void testGetTabularIngestSizeLimitsWithSingleInvalidValue() {
         assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY));
     }
     
-    @Test
-    void testGetTabularIngestSizeLimitsWithJsonButUnsupportedJsonInt() {
+    @ParameterizedTest
+    @ValueSource(strings = {"", "{ invalid: }"})
+    void testGetTabularIngestSizeLimitsWithInvalidJson(String invalidJson) {
         // given
-        String invalidJson = "{\"default\": 0}";
         doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit);
         
         // when
@@ -216,10 +218,10 @@ void testGetTabularIngestSizeLimitsWithJsonButUnsupportedJsonInt() {
         assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY));
     }
     
-    @Test
-    void testGetTabularIngestSizeLimitsWithInvalidJson() {
+    @ParameterizedTest
+    @ValueSource(strings = {"[]", "{[{\"tsv\": 1}]}", "{ \"invalid\": \"foobar\" }", "{ \"tsv\": 1.5}", "{ \"tsv\": \"1.0\"}"})
+    void testGetTabularIngestSizeLimitsWithInvalidNumbersInValidJson(String invalidJson) {
         // given
-        String invalidJson = "{invalid:}";
         doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit);
         
         // when
@@ -230,17 +232,33 @@ void testGetTabularIngestSizeLimitsWithInvalidJson() {
         assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY));
     }
     
-    @Test
-    void testGetTabularIngestSizeLimitsWithInvalidNumberInValidJson() {
+    @ParameterizedTest
+    @ValueSource(strings = {"{ \"tSv\": 1.0 }", "{ \"tsv\": \"1\"}", "{ \"tsv\": 1 }"})
+    void testGetTabularIngestSizeLimitsWithValidNumbersInValidJson(String validConfig) {
         // given
-        String invalidJson = "{\"csv\": \"this-is-not-a-number\", \"tSv\": \"10000\"}";
-        doReturn(invalidJson).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit);
+        doReturn(validConfig).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit);
         
         // when
         Map<String, Long> result = systemConfig.getTabularIngestSizeLimits();
         
         // then
-        assertEquals(1, result.size());
-        assertEquals(0L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY));
+        assertEquals(2, result.size());
+        assertEquals(-1L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY));
+        assertEquals(1L, (long) result.get("tsv"));
+    }
+    
+    @ParameterizedTest
+    @ValueSource(strings = {"{ \"tSv\": 429496729600.0 }", "{ \"tsv\": \"429496729600\"}", "{ \"tsv\": 429496729600 }"})
+    void testGetTabularIngestSizeLimitsWithValidLongNumbers(String validConfig) {
+        // given
+        doReturn(validConfig).when(settingsService).getValueForKey(SettingsServiceBean.Key.TabularIngestSizeLimit);
+        
+        // when
+        Map<String, Long> result = systemConfig.getTabularIngestSizeLimits();
+        
+        // then
+        assertEquals(2, result.size());
+        assertEquals(-1L, (long) result.get(SystemConfig.TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY));
+        assertEquals(429496729600L, (long) result.get("tsv"));
     }
 }

From 270d0394880afc557cd860be28ac50394ebce143 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 4 Dec 2025 23:24:30 +0100
Subject: [PATCH 16/20] refactor(config): improve JSON parsing and logging in
 getTabularIngestSizeLimits

Refactored `getTabularIngestSizeLimits` to utilize try-with-resources for safer JSON parsing. Enhanced logging with lambdas and improved iteration over JSON entries.
---
 .../iq/dataverse/util/SystemConfig.java       | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index b8f62bf027a..2883212be79 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -519,38 +519,41 @@ public Map<String, Long> getTabularIngestSizeLimits() {
         if (limitEntry != null) {
             // Case A: the setting is using JSON to support multiple formats
             if (limitEntry.trim().startsWith("{")) {
-                try {
-                    JsonObject limits = Json.createReader(new StringReader(limitEntry)).readObject();
+                try (JsonReader reader = Json.createReader(new StringReader(limitEntry))) {
+                    JsonObject limits = reader.readObject();
                     
                     Map<String, Long> limitsMap = new HashMap<>();
                     // We add the default in case the JSON does not contain the default (which is optional).
                     limitsMap.put(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, -1L);
                     
-                    for (String formatName : limits.keySet()) {
+                    for (Map.Entry<String, JsonValue> format : limits.entrySet()) {
+                        String formatName = format.getKey();
                         String lowercaseFormatName = formatName.toLowerCase();
                         
                         try {
-                            JsonValue value = limits.get(formatName);
-                            Long sizeOption;
+                            JsonValue value = format.getValue();
+                            long sizeOption;
                             
                             // We want to be able to use either numbers or string values, so detect which one it is.
+                            // This is necessary as we need to tell the JSON parser what to do, it doesn't automatically handle this for us.
                             if (value.getValueType() == JsonValue.ValueType.STRING) {
-                                sizeOption = Long.valueOf(limits.getString(formatName));
+                                sizeOption = Long.parseLong(limits.getString(formatName));
                             } else if (value.getValueType() == JsonValue.ValueType.NUMBER) {
+                                // Will throw if not a whole number!
                                 sizeOption = limits.getJsonNumber(formatName).longValueExact();
                             } else {
-                                logger.warning("Invalid value type for format " + formatName + ": expected string or number");
+                                logger.warning(() -> "Invalid value type for format " + formatName + ": expected string or number");
                                 logger.warning("Disabling all tabular ingest completely until fixed!");
                                 return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
                             }
                             
                             limitsMap.put(lowercaseFormatName, sizeOption);
                         } catch (NumberFormatException nfe) {
-                            logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a valid number)");
+                            logger.warning(() -> "Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long for format " + formatName + " (not a valid number)");
                             logger.warning("Disabling all tabular ingest completely until fixed!");
                             return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
                         } catch (ArithmeticException ae) {
-                            logger.warning("Number too large or has fractional part for format " + formatName);
+                            logger.warning(() -> "Number too large or has fractional part for format " + formatName);
                             logger.warning("Disabling all tabular ingest completely until fixed!");
                             return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
                         }
@@ -558,7 +561,7 @@ public Map<String, Long> getTabularIngestSizeLimits() {
                     
                     return Collections.unmodifiableMap(limitsMap);
                 } catch (JsonParsingException e) {
-                    logger.warning("Invalid TabularIngestSizeLimit option found, cannot parse JSON: " + e.getMessage());
+                    logger.warning(() -> "Invalid TabularIngestSizeLimit option found, cannot parse JSON: " + e.getMessage());
                     logger.warning("Disabling all tabular ingest completely until fixed!");
                     return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
                 }
@@ -568,7 +571,7 @@ public Map<String, Long> getTabularIngestSizeLimits() {
                     Long limit = Long.valueOf(limitEntry);
                     return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, limit);
                 } catch (NumberFormatException nfe) {
-                    logger.warning("Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe);
+                    logger.warning(() -> "Could not convert " + SettingsServiceBean.Key.TabularIngestSizeLimit + " to long: " + nfe);
                     logger.warning("Disabling all tabular ingest completely until fixed!");
                     return Map.of(TABULAR_INGEST_SIZE_LIMITS_DEFAULT_KEY, 0L);
                 }

From 37df10396daf6edea3066b03640dcdffefc9ea4d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 5 Dec 2025 08:26:06 +0100
Subject: [PATCH 17/20] style: remove unused (& illegal) Spring `@Value` import
 from `SystemConfigTest`

---
 .../java/edu/harvard/iq/dataverse/util/SystemConfigTest.java     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
index 0bf14ec5b59..123aacc601d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
@@ -12,7 +12,6 @@
 import org.mockito.InjectMocks;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;
-import org.springframework.beans.factory.annotation.Value;
 
 import java.util.Map;
 

From 28d90a7e85c9a0cd26e25543eb86656e1580fc1a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 8 Dec 2025 14:43:19 +0100
Subject: [PATCH 18/20] docs(settings): clarify Javadocs on
 TabularIngestSizeLimit usage #11639

---
 .../dataverse/settings/SettingsServiceBean.java   | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 8e09088c30f..37d26995017 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -303,13 +303,14 @@ public enum Key {
          */
         @Deprecated(since = "6.2", forRemoval = true)
         SystemEmail, 
-        /* size limit for Tabular data file ingests */
-        /* (can be set separately for specific ingestable formats; in which 
-        case the actual stored option will be TabularIngestSizeLimit:{FORMAT_NAME}
-        where {FORMAT_NAME} is the format identification tag returned by the 
-        getFormatName() method in the format-specific plugin; "sav" for the 
-        SPSS/sav format, "RData" for R, etc.
-        for example: :TabularIngestSizeLimit:RData */
+        
+        /**
+        <p>Size limit (in bytes) for tabular file ingest. Accepts either a single numeric value or JSON for per-format control.</p>
+        <p>Values: -1 (or absent) = no limit, 0 = disable ingest, >0 = byte threshold, or JSON object.</p>
+        <p>JSON object allows setting a "default" (same as single byte value) and override limits per-format for: CSV, DTA, POR, Rdata, SAV, XLSX.
+        Example: <code>{"default": "536870912", "CSV": "0", "Rdata": "1000000"}</code></p>
+        <p>Format names are case-insensitive. Invalid settings disable ingest until corrected.</p>
+        */
         TabularIngestSizeLimit,
         /* Validate physical files in the dataset when publishing, if the dataset size less than the threshold limit */
         DatasetChecksumValidationSizeLimit,

From 2f662ec895e21d1dcfe18fde8495a34786c44146 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 8 Dec 2025 14:47:00 +0100
Subject: [PATCH 19/20] test(files): refactor tabular ingest size limit tests
 with parameterized approach #11639

Simplified and improved test structure by introducing nested classes, parameterized tests, and utility methods for TabularIngestSizeLimit configurations. Enhanced code readability and reusability.
---
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 280 +++++++-----------
 1 file changed, 102 insertions(+), 178 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 83eb80104f3..24ccfd11bed 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -9,17 +9,22 @@
 import io.restassured.RestAssured;
 import io.restassured.response.Response;
 
+import java.nio.charset.StandardCharsets;
 import java.util.*;
 import java.util.logging.Logger;
 
 import edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism;
 import jakarta.json.JsonObject;
+import jakarta.ws.rs.core.Response.Status;
 import org.assertj.core.util.Lists;
+import org.hamcrest.Matcher;
+import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.BeforeAll;
 import io.restassured.path.json.JsonPath;
 
 import static edu.harvard.iq.dataverse.api.ApiConstants.*;
+import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key;
 import static io.restassured.path.json.JsonPath.with;
 import io.restassured.path.xml.XmlPath;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -45,6 +50,10 @@
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Matchers;
 import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import static org.hamcrest.CoreMatchers.*;
 import static org.junit.jupiter.api.Assertions.*;
@@ -59,15 +68,11 @@ public static void setUpClass() {
 
         Response removePublicInstall = UtilIT.deleteSetting(SettingsServiceBean.Key.PublicInstall);
         removePublicInstall.then().assertThat().statusCode(200);
-
-        Response removeLimit = UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit);
-        removeLimit.then().assertThat().statusCode(OK.getStatusCode());
     }
 
     @AfterAll
     public static void tearDownClass() {
         UtilIT.deleteSetting(SettingsServiceBean.Key.PublicInstall);
-        UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit);
     }
 
     /**
@@ -1211,183 +1216,102 @@ public void test_AddFileBadUploadFormat() {
         }
 
     }
-
-    @Test
-    public void testIngestSizeLimits() throws InterruptedException, IOException {
-        Response createUser = UtilIT.createRandomUser();
-        createUser.then().assertThat().statusCode(OK.getStatusCode());
-        String username = UtilIT.getUsernameFromResponse(createUser);
-        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
-        Response makeSuperUser = UtilIT.setSuperuserStatus(username, true);
-        makeSuperUser.then().assertThat().statusCode(OK.getStatusCode());
-
-        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
-        createDataverseResponse.prettyPrint();
-        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
-        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
-        createDatasetResponse.prettyPrint();
-        Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
-
-        String tinyCsvOnly = """
-{
-  "csv": "50"
-}
-""";
-
-        Response setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, tinyCsvOnly);
-        setLimit.then().assertThat().statusCode(OK.getStatusCode());
-
-        Path pathToDataFile = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv");
-        String contentOfCsv = ""
-                + "name,pounds,species,treats\n"
-                + "Midnight,15,dog,milkbones\n"
-                + "Tiger,17,cat,cat grass\n"
-                + "Panther,21,cat,cat nip\n";
-        java.nio.file.Files.write(pathToDataFile, contentOfCsv.getBytes());
-
-        Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken);
-        uploadFile.prettyPrint();
-        uploadFile.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data.files[0].label", equalTo("data.csv"));
-
-        String fileId1 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id");
-
-        Response getTabularFails = UtilIT.getFileDataTables(fileId1, apiToken);
-        getTabularFails.prettyPrint();
-        getTabularFails.then().assertThat()
-                .statusCode(BAD_REQUEST.getStatusCode())
-                .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")));
-
-        String largerCsv = """
-{
-  "csv": "123456"
-}
-""";
-
-        setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, largerCsv);
-        setLimit.then().assertThat().statusCode(OK.getStatusCode());
-
-        uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken);
-        uploadFile.prettyPrint();
-        uploadFile.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data.files[0].label", equalTo("data-1.csv"));
-
-        assertTrue(UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToDataFile);
-
-        String fileId2 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id");
-
-        Response getTabularWorks = UtilIT.getFileDataTables(fileId2, apiToken);
-        getTabularWorks.prettyPrint();
-        getTabularWorks.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data[0].varQuantity", equalTo(4));
-
-        String tinyDefaultSize = """
-{
-  "default": "50"
-}
-""";
-
-        setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, tinyDefaultSize);
-        setLimit.then().assertThat().statusCode(OK.getStatusCode());
-
-        uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken);
-        uploadFile.prettyPrint();
-        uploadFile.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data.files[0].label", equalTo("data-2.csv"));
-
-        String fileId3 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id");
-
-        getTabularFails = UtilIT.getFileDataTables(fileId3, apiToken);
-        getTabularFails.prettyPrint();
-        getTabularFails.then().assertThat()
-                .statusCode(BAD_REQUEST.getStatusCode())
-                .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")));
-
-        // The behavior of `"default": "-2"` is not documented in the guides
-        // but it acts like `"default": "0"` which disables ingest.
-        String unexpectedNegativeDefault = """
-{
-  "default": "-2"
-}
-""";
-
-        setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, unexpectedNegativeDefault);
-        setLimit.then().assertThat().statusCode(OK.getStatusCode());
-
-        uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken);
-        uploadFile.prettyPrint();
-        uploadFile.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data.files[0].label", equalTo("data-3.csv"));
-
-        String fileId4 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id");
-
-        getTabularFails = UtilIT.getFileDataTables(fileId4, apiToken);
-        getTabularFails.prettyPrint();
-        getTabularFails.then().assertThat()
-                .statusCode(BAD_REQUEST.getStatusCode())
-                .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")));
-
-        // As the guides say, you MUST provide a string, not a JSON number.
-        // That is, `"123"` in quotes rather than `123` with no quotes.
-        // If you provide a number (no quotes) rather than a string,
-        // all ingest will be disabled and you'll see an error in server.log
-        // about how the system is misconfigured.
-        String invalidNonString = """
-{
-  "default": 987654321
-}
-""";
-
-        setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, invalidNonString);
-        setLimit.then().assertThat().statusCode(OK.getStatusCode());
-
-        uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken);
-        uploadFile.prettyPrint();
-        uploadFile.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data.files[0].label", equalTo("data-4.csv"));
-
-        String fileId5 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id");
-
-        getTabularFails = UtilIT.getFileDataTables(fileId5, apiToken);
-        getTabularFails.prettyPrint();
-        getTabularFails.then().assertThat()
-                .statusCode(BAD_REQUEST.getStatusCode())
-                .body("message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")));
-
-        String defaultDisabledAndLargeCsvLimit = """
-{
-  "default": "0",
-  "csv": "123456"
-}
-""";
-
-        setLimit = UtilIT.setSetting(SettingsServiceBean.Key.TabularIngestSizeLimit, defaultDisabledAndLargeCsvLimit);
-        setLimit.then().assertThat().statusCode(OK.getStatusCode());
-
-        uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToDataFile.toString(), apiToken);
-        uploadFile.prettyPrint();
-        uploadFile.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("data.files[0].label", equalTo("data-5.csv"));
-
-        String fileId6 = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id");
-
-        getTabularWorks = UtilIT.getFileDataTables(fileId2, apiToken);
-        getTabularWorks.prettyPrint();
-        getTabularWorks.then().assertThat()
+    
+    @Nested
+    class IngestSizeLimits {
+        
+        static String apiToken;
+        static int datasetId;
+        @TempDir
+        static Path tempDir;
+        static String csvFileName = "data.csv";
+        static Path csvFile;
+        static final String csvData =
+            """
+            name,pounds,species,treats
+            Midnight,15,dog,milkbones
+            Tiger,17,cat,cat grass
+            Panther,21,cat,cat nip
+            """;
+        
+        @BeforeAll
+        static void setup() throws IOException {
+            // Create random user, collection and dataset to work with
+            Response createUser = UtilIT.createRandomUser();
+            createUser.then().assertThat().statusCode(OK.getStatusCode());
+            String username = UtilIT.getUsernameFromResponse(createUser);
+            apiToken = UtilIT.getApiTokenFromResponse(createUser);
+            Response makeSuperUser = UtilIT.setSuperuserStatus(username, true);
+            makeSuperUser.then().assertThat().statusCode(OK.getStatusCode());
+            
+            Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+            createDataverseResponse.prettyPrint();
+            String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+            Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+            createDatasetResponse.prettyPrint();
+            datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+            
+            // Create CSV datafile to work with
+            csvFile = tempDir.resolve(csvFileName);
+            java.nio.file.Files.writeString(csvFile, csvData, StandardCharsets.UTF_8);
+        }
+        
+        @AfterAll
+        static void teardown() {
+            // Remove the setting for test isolation purposes
+            Response removeLimit = UtilIT.deleteSetting(Key.TabularIngestSizeLimit);
+            removeLimit.then().assertThat().statusCode(OK.getStatusCode());
+        }
+        
+        static List<Arguments> configurations() {
+            return List.of(
+                // Too small for 134 chars
+                Arguments.of("{\"csv\": 50}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))),
+                Arguments.of("{\"default\": 50.0}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))),
+                
+                // The behavior of `"default": "-2"` is not documented in the guides
+                // but it acts like `"default": "0"` which disables ingest.
+                Arguments.of("{\"default\": -2}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))),
+                
+                // Large enough :-)
+                Arguments.of("{\"csv\": 123456}", OK, "data[0].varQuantity", equalTo(4)),
+                // Default is disabled, but exception for CSV
+                Arguments.of("{\"default\": 0,\"csv\": 123456}", OK, "data[0].varQuantity", equalTo(4))
+            );
+        }
+        
+        @ParameterizedTest
+        @MethodSource("configurations")
+        void testIngestSizeLimits(String ingestSizeLimitConfig, Status expectedStatus, String jsonPath, Matcher matcher) throws IOException {
+            // given
+            Response setLimit = UtilIT.setSetting(Key.TabularIngestSizeLimit, ingestSizeLimitConfig);
+            setLimit.then().assertThat().statusCode(OK.getStatusCode());
+            
+            // when
+            Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), csvFile.toString(), apiToken);
+            //uploadResponse.prettyPrint();
+            uploadResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body("data[0].varQuantity", equalTo(4));
-
-        Response removeLimit = UtilIT.deleteSetting(SettingsServiceBean.Key.TabularIngestSizeLimit);
-        removeLimit.then().assertThat().statusCode(OK.getStatusCode());
+                .body("data.files[0].label", equalTo(csvFileName));
+            String fileId = JsonPath.from(uploadResponse.body().asString()).getString("data.files[0].dataFile.id");
+            
+            // Wait for ingest to complete
+            assertTrue(UtilIT.sleepForLock(datasetId, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION),
+                "Failed test if Ingest Lock exceeds max duration");
+            
+            // then
+            Response getTabularFails = UtilIT.getFileDataTables(fileId, apiToken);
+            //getTabularFails.prettyPrint();
+            getTabularFails.then().assertThat()
+                .statusCode(expectedStatus.getStatusCode())
+                .body(jsonPath, matcher);
+            
+            // delete the file for the next test
+            UtilIT.deleteFile(Integer.valueOf(fileId), apiToken);
+        }
     }
 
+
     @Test
     public void testUningestFileViaApi() throws InterruptedException {
         Response createUser = UtilIT.createRandomUser();

From cd5f33d00a42d15fb0e27334a4c40e6806533cbe Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 8 Dec 2025 14:51:00 +0100
Subject: [PATCH 20/20] test(files): expand TabularIngestSizeLimit tests with
 additional numeric value cases #11639

Added more test cases to validate support for various numeric formats, including integers, strings, and decimals, in TabularIngestSizeLimit configurations.
---
 src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 24ccfd11bed..0e693f207e8 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -1274,7 +1274,12 @@ static List<Arguments> configurations() {
                 Arguments.of("{\"default\": -2}", BAD_REQUEST, "message", equalTo(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"))),
                 
                 // Large enough :-)
+                Arguments.of("-1", OK, "data[0].varQuantity", equalTo(4)),
+                Arguments.of("123456", OK, "data[0].varQuantity", equalTo(4)),
+                Arguments.of("{\"default\": 123456}", OK, "data[0].varQuantity", equalTo(4)),
                 Arguments.of("{\"csv\": 123456}", OK, "data[0].varQuantity", equalTo(4)),
+                Arguments.of("{\"csv\": \"123457\"}", OK, "data[0].varQuantity", equalTo(4)),
+                Arguments.of("{\"csv\": 123458.0}", OK, "data[0].varQuantity", equalTo(4)),
                 // Default is disabled, but exception for CSV
                 Arguments.of("{\"default\": 0,\"csv\": 123456}", OK, "data[0].varQuantity", equalTo(4))
             );
@@ -1282,7 +1287,7 @@ static List<Arguments> configurations() {
         
         @ParameterizedTest
         @MethodSource("configurations")
-        void testIngestSizeLimits(String ingestSizeLimitConfig, Status expectedStatus, String jsonPath, Matcher matcher) throws IOException {
+        void testIngestSizeLimits(String ingestSizeLimitConfig, Status expectedStatus, String jsonPath, Matcher matcher) {
             // given
             Response setLimit = UtilIT.setSetting(Key.TabularIngestSizeLimit, ingestSizeLimitConfig);
             setLimit.then().assertThat().statusCode(OK.getStatusCode());