Skip to content

Commit ed7db2c

Browse files
ivorbosloperm-mohr
andauthored
Improve converter template usability (#157)
* Improve converter template usability * Update fiboa_cli/datasets/template.py Co-authored-by: Matthias Mohr <matthias@mohr.ws> * Update fiboa_cli/datasets/template.py Co-authored-by: Matthias Mohr <matthias@mohr.ws> * Apply suggestions from code review Co-authored-by: Matthias Mohr <matthias@mohr.ws> * Add Changelog * Formatting * Update fiboa_cli/datasets/template.py Co-authored-by: Matthias Mohr <matthias@mohr.ws> * formatting --------- Co-authored-by: Matthias Mohr <matthias@mohr.ws>
1 parent f70a20c commit ed7db2c

3 files changed

Lines changed: 23 additions & 193 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
- Improve Converter template.py usability
11+
1012
## [v0.10.0] - 2025-03-11
1113

1214
### Added

fiboa_cli/datasets/template.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@
66

77
from ..convert_utils import BaseConverter
88

9+
# You can remove attributes that you don't need.
10+
# Also, please remove all comments that you didn't add yourself from the template.
911

10-
class TemplateConverter(BaseConverter): # change this class name to your use case
12+
13+
class Converter(BaseConverter):
1114
# File(s) to read the data from, usually publicly accessible URLs.
1215
# Can read any (zipped) tabular data format that GeoPandas can read through read_file() or read_parquet().
1316
# Supported protocols: HTTP(S), GCS, S3, or the local file system
14-
#
17+
1518
# Multiple options are possible:
1619
# 1. a single URL (filename must be in the URL). The file is read as is.
17-
sources = "https://fiboa.example/file.xyz"
20+
sources = "https://fiboa.example/data.shp.zip"
1821

1922
# 2. a dictionary with a mapping of URLs (where the filename can't necessarily be determined from the URL) to filenames.
2023
# sources = {
@@ -26,7 +29,7 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
2629
# "https://fiboa.example/north_america.zip": ["us.gpkg", "canaga.gpkg"]
2730
# }
2831

29-
# If multiple years are available, you can replace sources by years.
32+
# 4. if multiple years are available, you can replace sources by years.
3033
# The dict-key can be used on the cli command line, the value will be used as 'sources'
3134
#
3235
# years = {
@@ -54,7 +57,7 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
5457
{"name": "ABC Corp", "url": "https://abc.example", "roles": ["producer", "licensor"]}
5558
]
5659

57-
# Attribution (e.g. copyright or citation statement as requested by provider).
60+
# Attribution (e.g. copyright or citation statement as requested by provider) as a string.
5861
# The attribution is usually shown on the map, in the lower right corner.
5962
# Can be None if not applicable
6063
attribution = "© 2024 ABC Corp."
@@ -68,20 +71,22 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
6871
# Map original column names to fiboa property names
6972
# You also need to list any column that you may have added in the MIGRATION function (see below).
7073
# GeoJSON: Nested objects can be accessed using a dot, e.g. "area.value" for {"area": {"value": 123}}
71-
columns = {"area_m": "area"}
74+
columns = {
75+
"some_are_col": "area",
76+
"geom": "geometry",
77+
}
7278

7379
# Add columns with constant values.
7480
# The key is the column name, the value is a constant value that's used for all rows.
7581
column_additions = {}
7682

77-
# A list of implemented extension identifiers
78-
extensions = set()
83+
# A set of implemented extension identifiers
84+
extensions = {"https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml"}
7985

8086
# Functions to migrate data in columns to match the fiboa specification.
8187
# Example: You have a column area_m in square meters and want to convert
8288
# to hectares as required for the area field in fiboa.
83-
# Function signature:
84-
# func(column: pd.Series) -> pd.Series
89+
# requires: func(column: pd.Series) -> pd.Series
8590
column_migrations = {"area_m": lambda column: column * 0.0001}
8691

8792
# Filter columns to only include the ones that are relevant for the collection,
@@ -92,6 +97,7 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
9297
# Override to migrate the full GeoDataFrame if the other options are not sufficient
9398
# This should be the last resort!
9499
# def migrate(self, gdf) -> gpd.GeoDataFrame:
100+
# gdf["column"] *= 10
95101
# return gdf
96102

97103
# Custom function to execute actions on the the GeoDataFrame that are loaded from individual file or layers.
@@ -103,29 +109,12 @@ class TemplateConverter(BaseConverter): # change this class name to your use ca
103109
# def file_migration(self, gdf: gpd.GeoDataFrame, path: str, uri: str, layer: str = None) -> gpd.GeoDataFrame:
104110
# return data
105111

106-
# Schemas for the fields that are not defined in fiboa
112+
# Schemas for the fields that are not defined in the core or the used extensions
107113
# Keys must be the values from the COLUMNS dict, not the keys
108114
missing_schemas = {
109115
"required": ["my_id"], # i.e. non-nullable properties
110-
"properties": {"my_id": {"type": "string"}},
116+
"properties": {
117+
"some_col": {"type": "string"},
118+
"category": {"type": "string", "enum": ["A", "B"]},
119+
},
111120
}
112-
113-
"""
114-
def convert(self, *args, **kwargs):
115-
Converts the field boundary datasets to fiboa.
116-
117-
For reference, this is the order in which the conversion steps are applied:
118-
0. Read GeoDataFrame from file(s) / layer(s) and run self.file_migration()
119-
1. Run global migration self.migrate()
120-
2. Run filters to remove rows that shall not be in the final data
121-
(if provided through self.column_filters)
122-
3. Add columns with constant values
123-
4. Run column migrations (if provided through self.column_migrations)
124-
5. Duplicate columns (if an array is provided as the value in self.columns)
125-
6. Rename columns (as provided in self.columns)
126-
7. Remove columns (if column is not present as value in self.columns)
127-
8. Create the collection
128-
9. Change data types of the columns based on the provided schemas
129-
(fiboa spec, extensions, and self.missing_schemas)
130-
10. Write the data to the Parquet file
131-
"""

fiboa_cli/datasets/template_convert_function.py

Lines changed: 0 additions & 161 deletions
This file was deleted.

0 commit comments

Comments
 (0)