Improve converter template usability (#157)

ivorbosloper · m-mohr · web-flow · commit ed7db2cb74a1 · 2025-03-20T15:18:16.000-05:00
* Improve converter template usability

* Update fiboa_cli/datasets/template.py

Co-authored-by: Matthias Mohr &lt;matthias@mohr.ws&gt;

* Update fiboa_cli/datasets/template.py

Co-authored-by: Matthias Mohr &lt;matthias@mohr.ws&gt;

* Apply suggestions from code review

Co-authored-by: Matthias Mohr &lt;matthias@mohr.ws&gt;

* Add Changelog

* Formatting

* Update fiboa_cli/datasets/template.py

Co-authored-by: Matthias Mohr &lt;matthias@mohr.ws&gt;

* formatting

---------

Co-authored-by: Matthias Mohr &lt;matthias@mohr.ws&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+- Improve Converter template.py usability
+
 ## [v0.10.0] - 2025-03-11
 
 ### Added
diff --git a/fiboa_cli/datasets/template.py b/fiboa_cli/datasets/template.py
@@ -6,15 +6,18 @@
 
 from ..convert_utils import BaseConverter
 
+# You can remove attributes that you don't need.
+# Also, please remove all comments that you didn't add yourself from the template.
 
-class TemplateConverter(BaseConverter):  # change this class name to your use case
+
+class Converter(BaseConverter):
     # File(s) to read the data from, usually publicly accessible URLs.
     # Can read any (zipped) tabular data format that GeoPandas can read through read_file() or read_parquet().
     # Supported protocols: HTTP(S), GCS, S3, or the local file system
-    #
+
     # Multiple options are possible:
     # 1. a single URL (filename must be in the URL). The file is read as is.
-    sources = "https://fiboa.example/file.xyz"
+    sources = "https://fiboa.example/data.shp.zip"
 
     # 2. a dictionary with a mapping of URLs (where the filename can't necessarily be determined from the URL) to filenames.
     # sources = {
@@ -26,7 +29,7 @@ class TemplateConverter(BaseConverter):  # change this class name to your use ca
     #   "https://fiboa.example/north_america.zip": ["us.gpkg", "canaga.gpkg"]
     # }
 
-    # If multiple years are available, you can replace sources by years.
+    # 4. if multiple years are available, you can replace sources by years.
     # The dict-key can be used on the cli command line, the value will be used as 'sources'
     #
     # years = {
@@ -54,7 +57,7 @@ class TemplateConverter(BaseConverter):  # change this class name to your use ca
         {"name": "ABC Corp", "url": "https://abc.example", "roles": ["producer", "licensor"]}
     ]
 
-    # Attribution (e.g. copyright or citation statement as requested by provider).
+    # Attribution (e.g. copyright or citation statement as requested by provider) as a string.
     # The attribution is usually shown on the map, in the lower right corner.
     # Can be None if not applicable
     attribution = "© 2024 ABC Corp."
@@ -68,20 +71,22 @@ class TemplateConverter(BaseConverter):  # change this class name to your use ca
     # Map original column names to fiboa property names
     # You also need to list any column that you may have added in the MIGRATION function (see below).
     # GeoJSON: Nested objects can be accessed using a dot, e.g. "area.value" for {"area": {"value": 123}}
-    columns = {"area_m": "area"}
+    columns = {
+        "some_are_col": "area",
+        "geom": "geometry",
+    }
 
     # Add columns with constant values.
     # The key is the column name, the value is a constant value that's used for all rows.
     column_additions = {}
 
-    # A list of implemented extension identifiers
-    extensions = set()
+    # A set of implemented extension identifiers
+    extensions = {"https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml"}
 
     # Functions to migrate data in columns to match the fiboa specification.
     # Example: You have a column area_m in square meters and want to convert
     # to hectares as required for the area field in fiboa.
-    # Function signature:
-    #   func(column: pd.Series) -> pd.Series
+    # requires: func(column: pd.Series) -> pd.Series
     column_migrations = {"area_m": lambda column: column * 0.0001}
 
     # Filter columns to only include the ones that are relevant for the collection,
@@ -92,6 +97,7 @@ class TemplateConverter(BaseConverter):  # change this class name to your use ca
     # Override to migrate the full GeoDataFrame if the other options are not sufficient
     # This should be the last resort!
     # def migrate(self, gdf) -> gpd.GeoDataFrame:
+    #     gdf["column"] *= 10
     #     return gdf
 
     # Custom function to execute actions on the the GeoDataFrame that are loaded from individual file or layers.
@@ -103,29 +109,12 @@ class TemplateConverter(BaseConverter):  # change this class name to your use ca
     # def file_migration(self, gdf: gpd.GeoDataFrame, path: str, uri: str, layer: str = None) -> gpd.GeoDataFrame:
     #     return data
 
-    # Schemas for the fields that are not defined in fiboa
+    # Schemas for the fields that are not defined in the core or the used extensions
     # Keys must be the values from the COLUMNS dict, not the keys
     missing_schemas = {
         "required": ["my_id"],  # i.e. non-nullable properties
-        "properties": {"my_id": {"type": "string"}},
+        "properties": {
+            "some_col": {"type": "string"},
+            "category": {"type": "string", "enum": ["A", "B"]},
+        },
     }
-
-    """
-    def convert(self, *args, **kwargs):
-    Converts the field boundary datasets to fiboa.
-
-    For reference, this is the order in which the conversion steps are applied:
-    0. Read GeoDataFrame from file(s) / layer(s) and run self.file_migration()
-    1. Run global migration self.migrate()
-    2. Run filters to remove rows that shall not be in the final data
-       (if provided through self.column_filters)
-    3. Add columns with constant values
-    4. Run column migrations (if provided through self.column_migrations)
-    5. Duplicate columns (if an array is provided as the value in self.columns)
-    6. Rename columns (as provided in self.columns)
-    7. Remove columns (if column is not present as value in self.columns)
-    8. Create the collection
-    9. Change data types of the columns based on the provided schemas
-    (fiboa spec, extensions, and self.missing_schemas)
-    10. Write the data to the Parquet file
-    """
diff --git a/fiboa_cli/datasets/template_convert_function.py b/fiboa_cli/datasets/template_convert_function.py