Meaningful-Data · javihern98 · May 5, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 6, 2026
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -2,9 +2,9 @@ name: Testing
 
 on:
   push:
-    branches: [ "main", "dev" ]
+    branches: [ "main", "duckdb/main", "dev" ]
   pull_request:
-    branches: [ "main", "dev" ]
+    branches: [ "main", "duckdb/main", "dev" ]
   workflow_dispatch:
 
 permissions:
@@ -53,7 +53,13 @@ jobs:
       run: poetry run ruff check --output-format=github
     - name: Run type checks
       run: poetry run mypy --show-error-codes --pretty
-    - name: Run tests
+    - name: Run tests with pandas backend
+      env:
+        VTL_ENGINE_BACKEND: pandas
+      run: poetry run pytest -n auto --verbose --tb=short --durations=10
+    - name: Run tests with duckdb backend
+      env:
+        VTL_ENGINE_BACKEND: duckdb
       run: poetry run pytest --cov=vtlengine -n auto --verbose --tb=short --strict-markers --strict-config --durations=10
     - name: Check coverage
-      run: poetry run coverage report --fail-under=90
+      run: poetry run coverage report --fail-under=85
diff --git a/.github/workflows/ubuntu_test_24_04.yml b/.github/workflows/ubuntu_test_24_04.yml
@@ -2,9 +2,9 @@ name: Ubuntu 24.04 Tests
 
 on:
   push:
-    branches: [ "main", "dev" ]
+    branches: [ "main", "duckdb/main", "dev" ]
   pull_request:
-    branches: [ "main", "dev" ]
+    branches: [ "main", "duckdb/main", "dev" ]
 
 permissions:
   contents: read
@@ -37,6 +37,7 @@ jobs:
             python3-jsonschema \
             python3-networkx \
             python3-sqlglot \
+            python3-psutil \
             python3-pytest \
             cmake \
             g++ \
@@ -49,7 +50,7 @@ jobs:
             sdmxschemas==1.0.0 \
             parsy==2.2 \
             msgspec==0.19.0 \
-            duckdb==1.1 \
+            duckdb==1.4.1 \
             pysdmx==1.9.0
 
       - name: Download ANTLR4 C++ runtime
@@ -65,5 +66,12 @@ jobs:
       - name: Install C++ parser
         run: pip install --break-system-packages --no-deps .cpp-wheel/*.whl
 
-      - name: Run tests
+      - name: Run tests (pandas backend)
+        env:
+          VTL_ENGINE_BACKEND: pandas
         run: pytest --verbose --tb=short --strict-markers --strict-config --durations=10
+
+      - name: Run tests (duckdb backend)
+        env:
+          VTL_ENGINE_BACKEND: duckdb
+        run: pytest --verbose --tb=short --strict-markers --strict-config --durations=10
diff --git a/.github/workflows/version.yml b/.github/workflows/version.yml
@@ -2,9 +2,9 @@ name: Version Consistency Check
 
 on:
   push:
-    branches: [ main ]
+    branches: [ main, "duckdb/main" ]
   pull_request:
-    branches: [ main ]
+    branches: [ main, "duckdb/main" ]
 
 permissions:
   contents: read

diff --git a/.gitignore b/.gitignore
@@ -194,3 +194,6 @@ build/
 # Claude Code settings
 .claude/*
 !.claude/CLAUDE.md
+
+# Third-party files that we want to ignore
+third_party/*
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,12 +42,9 @@ dependencies = [
     "pyarrow>=14.0,<25.0",
     "numpy>=2.0.2,<2.1; python_version < '3.10'",
     "numpy>=2.2.0,<2.5; python_version >= '3.10'",
+    "psutil>=7.2,<8.0"
 ]
 
-[project.optional-dependencies]
-s3 = ["s3fs>=2022.11.0"]
-all = ["s3fs>=2022.11.0"]
-
 [project.urls]
 Repository = 'https://github.com/Meaningful-Data/vtlengine'
 Documentation = 'https://docs.vtlengine.meaningfuldata.eu'

diff --git a/src/vtlengine/API/_InternalApi.py b/src/vtlengine/API/_InternalApi.py
@@ -17,7 +17,6 @@
 )
 
 from vtlengine import AST as AST
-from vtlengine.__extras_check import __check_s3_extra
 from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
 from vtlengine.AST.ASTString import ASTString
 from vtlengine.DataTypes import SCALAR_TYPES
@@ -77,15 +76,9 @@ def _extract_data_type(component: Dict[str, Any]) -> Tuple[str, Any]:
     Raises:
         InputValidationException: If the data type key or value is invalid
     """
-    if "type" in component:
-        key = "type"
-        value = component["type"]
-    else:
-        key = "data_type"
-        value = component["data_type"]
-
-    check_key(key, _SCALAR_TYPE_KEYS, value)
-    return key, SCALAR_TYPES[value]
+    key = "type" if "type" in component else "data_type"
+    check_key(key, _SCALAR_TYPE_KEYS, component[key])
+    return key, SCALAR_TYPES[component[key]]
 
 
 def _load_dataset_from_structure(
@@ -211,25 +204,27 @@ def _load_single_datapoint(
     plain CSV, SDMX-CSV, and SDMX-ML file formats.
 
     Args:
-        datapoint: Path or S3 URI to the datapoint file.
+        datapoint: Path to the datapoint file.
         sdmx_mappings: Optional mapping from SDMX URNs to VTL dataset names.
     """
     if not isinstance(datapoint, (str, Path)):
         raise InputValidationException(
-            code="0-1-1-2", input=datapoint, message="Input must be a Path or an S3 URI"
+            code="0-1-1-2", input=datapoint, message="Input must be a Path"
         )
     # Handling of str values
     if isinstance(datapoint, str):
         if "s3://" in datapoint:
-            __check_s3_extra()
-            dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
-            return {dataset_name: datapoint}
-        # Converting to Path object if it is not an S3 URI
+            raise InputValidationException(
+                code="0-1-1-2",
+                input=datapoint,
+                message="S3 URIs are only supported with use_duckdb=True.",
+            )
+        # Converting to Path object
         try:
             datapoint = Path(datapoint)
         except Exception:
             raise InputValidationException(
-                code="0-1-1-2", input=datapoint, message="Input must refer to a Path or an S3 URI"
+                code="0-1-1-2", input=datapoint, message="Input must refer to a Path"
             )
     # Validation of Path object
     if not datapoint.exists():
@@ -274,7 +269,7 @@ def _load_datapoints_path(
     happens in load_datapoints() which supports both formats.
 
     Args:
-        datapoints: Dict, List, or single Path/S3 URI with datapoints.
+        datapoints: Dict, List, or single Path with datapoints.
         sdmx_mappings: Optional mapping from SDMX URNs to VTL dataset names.
 
     Returns:
@@ -294,11 +289,17 @@ def _load_datapoints_path(
                 raise InputValidationException(
                     code="0-1-1-2",
                     input=datapoint,
-                    message="Datapoints dictionary values must be Paths or S3 URIs.",
+                    message="Datapoints dictionary values must be Paths.",
                 )
 
             # Convert string to Path if not S3 or URL
-            if isinstance(datapoint, str) and "s3://" not in datapoint and not _is_url(datapoint):
+            if isinstance(datapoint, str) and _is_s3_uri(datapoint):
+                raise InputValidationException(
+                    code="0-1-1-2",
+                    input=datapoint,
+                    message="S3 URIs are only supported with use_duckdb=True.",
+                )
+            if isinstance(datapoint, str) and not _is_url(datapoint):
                 datapoint = Path(datapoint)
 
             # Validate file exists
@@ -522,14 +523,14 @@ def load_datasets_with_data(
         not isinstance(v, (str, Path)) for v in datapoints.values()
     ):
         raise InputValidationException(
-            "Invalid datapoints. All values in the dictionary must be Paths or S3 URIs, "
+            "Invalid datapoints. All values in the dictionary must be Paths, "
             "or all values must be Pandas Dataframes."
         )
 
-    # Handling Individual, List or Dict of Paths, S3 URIs, or URLs
+    # Handling Individual, List or Dict of Paths or URLs
     # At this point, datapoints is narrowed to exclude None and Dict[str, DataFrame]
     # All file types (CSV, SDMX) are returned as paths for lazy loading
-    # URLs are preserved as strings (like S3 URIs)
+    # URLs are preserved as strings
     datapoints_paths = _load_datapoints_path(
         cast(Union[Dict[str, Union[str, Path]], List[Union[str, Path]], str, Path], datapoints),
         sdmx_mappings=sdmx_mappings,
@@ -741,10 +742,11 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
     """
     if isinstance(output_folder, str):
         if "s3://" in output_folder:
-            __check_s3_extra()
-            if not output_folder.endswith("/"):
-                raise DataLoadError("0-3-1-2", folder=str(output_folder))
-            return
+            raise InputValidationException(
+                code="0-1-1-2",
+                input=output_folder,
+                message="S3 URIs are only supported with use_duckdb=True.",
+            )
         try:
             output_folder = Path(output_folder)
         except Exception:
@@ -900,6 +902,11 @@ def ast_to_sdmx(ast: AST.Start, agency_id: str, id: str, version: str) -> Transf
     return transformation_scheme
 
 
+def _is_s3_uri(value: Any) -> bool:
+    """Check if a value is an S3 URI."""
+    return isinstance(value, str) and "s3://" in value
+
+
 def _is_url(value: Any) -> bool:
     """
     Check if a value is an HTTP/HTTPS URL.