Skip to content

Commit 241f3d4

Browse files
m-mohravis-giri
andcommitted
Add ML split converters
Co-authored-by: Aviskar Giri <aviskar.giri@taylorgeospatial.org>
1 parent c676732 commit 241f3d4

21 files changed

Lines changed: 2640 additions & 6 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
3131
- AddHCATMixin assures hcat-extension validity and csv-based data-conversion if required
3232
- EuroCropsConverterMixin is a BaseClass for EuroCrops-provided datasets
3333
- EuroLandBaseConverter is a BaseClass for Euroland-provided datasets
34+
- Addes some converters that contain splits for ML usecases
3435
- Avoid base property schema override
3536
- Add Converter for Bulgaria
3637
- Remove unintended CommonMark formatting (indentation) from descriptions in converters

fiboa_cli/datasets/ai4sf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from ..conversion.fiboa_converter import FiboaBaseConverter
44

55

6-
class Converter(FiboaBaseConverter):
6+
class Ai4SfConverter(FiboaBaseConverter):
77
sources = {
88
# Cambodia
99
"https://phys-techsciences.datastations.nl/api/access/datafile/100634?gbrecs=true": "2_cambodia_areas.gpkg",
@@ -72,7 +72,7 @@ class Converter(FiboaBaseConverter):
7272
}
7373

7474
id = "ai4sf"
75-
short_name = "Cambodia/Vietnam (AI4SmallFarms)"
75+
short_name = "Cambodia/Vietnam (AI4SF)"
7676
title = "Field boundaries for Cambodia and Vietnam (AI4SmallFarms)"
7777
# from https://research.tudelft.nl/en/publications/ai4smallfarms-a-dataset-for-crop-field-delineation-in-southeast-a
7878
description = """

fiboa_cli/datasets/ai4sf_ml.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from .ai4sf import Ai4SfConverter
2+
from .commons.ml_splits import MlSplitsMixin
3+
4+
5+
class Ai4SfMlConverter(MlSplitsMixin, Ai4SfConverter):
6+
7+
def migrate(self, gdf):
8+
# Download file with splits
9+
urls = {
10+
"https://phys-techsciences.datastations.nl/api/access/datafile/100418?gbrecs=true": "tiles_asia.gpkg",
11+
}
12+
paths = self.download_files(urls, self.cache)
13+
tiles = self.read_data(paths, **self.open_options)
14+
15+
# Add splits
16+
splits = tiles[["id", "country", "split"]].drop_duplicates(subset=["id", "country"])
17+
gdf = gdf.merge(splits, on=["id", "country"], how="left")
18+
gdf["split"] = gdf["split"].replace({"validate": "val"})
19+
20+
return super().migrate(gdf)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
class MlSplitsMixin:
2+
def __init__(self, *args, **kwargs):
3+
super().__init__(*args, **kwargs)
4+
5+
self.cache = None
6+
7+
self.id = self.id + "_ml"
8+
self.title += " with splits"
9+
self.short_name += " with splits"
10+
11+
self.columns["split"] = "split"
12+
13+
if "required" not in self.missing_schemas:
14+
self.missing_schemas["required"] = []
15+
self.missing_schemas["required"].append("split")
16+
if "properties" not in self.missing_schemas:
17+
self.missing_schemas["properties"] = {}
18+
self.missing_schemas["properties"]["split"] = {
19+
"type": "string",
20+
"enum": ["train", "val", "test"],
21+
}
22+
23+
def download_files(self, uris, cache_folder=None, **kwargs):
24+
# Store cache folder for later use in migrate
25+
self.cache = cache_folder
26+
return super().download_files(uris, cache_folder, **kwargs)

0 commit comments

Comments
 (0)