cli/fiboa_cli/datasets/es_nc.py at c0acce4ee3b757c51c2f2754b097a5600a64eece · fiboa/cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import re
from os import makedirs, path

import pandas as pd
import requests

from ..util import log, name_from_uri
from .es import ESBaseConverter


# SOURCES = "https://filescartografia.navarra.es/2_CARTOGRAFIA_TEMATICA/2_6_SIGPAC/" # FULL Download timeout
class NCConverter(ESBaseConverter):
    id = "es_nc"
    short_name = "Spain Navarra"
    title = "Spain Navarra Crop fields"
    description = """
    SIGPAC Crop fields of Spain - Navarra
    """
    license = "CC-BY-4.0"  # https://sigpac.navarra.es/descargas/
    attribution = "Comunidad Foral de Navarra"
    providers = [
        {
            "name": "Comunidad Foral de Navarra",
            "url": "https://gobiernoabierto.navarra.es/",
            "roles": ["producer", "licensor"],
        }
    ]
    columns = {
        "id": "id",
        "geometry": "geometry",
        "BEGINLIFE": "determination_datetime",
        "IDUSO24": "crop:code",
        "crop:name": "crop:name",
        "crop:name_en": "crop:name_en",
    }
    column_migrations = {
        "BEGINLIFE": lambda col: pd.to_datetime(col, format="%d/%m/%Y"),
    }
    use_code_attribute = "IDUSO24"
    index_as_id = True

    def get_urls(self):
        # scrape HTML page for sources
        content = requests.get("https://sigpac.navarra.es/descargas/", verify=False).text
        base = re.search('var rutaBase = "(.*?)";', content).group(1)
        last = base.rsplit("/", 1)[-1]
        return {
            f"https://sigpac.navarra.es/descargas/{base}{src}.zip": [f"{last}{src}.shp"]
            for src in re.findall(r'value:"(\d+)"', content)
        }

    def prefill_cache(self, uris, cache_folder=None):
        if cache_folder is None:
            log("Use -c <cache_dir> to prefill the cache dir, working around SSL errors", "warning")
            return

        makedirs(cache_folder, exist_ok=True)
        log("Suppressing SSL-errors, filling cache with unverified SSL requests", "warning")
        requests.packages.urllib3.disable_warnings()  # Suppress InsecureRequestWarning
        for uri in list(uris):
            target = path.join(cache_folder, name_from_uri(uri))
            if not path.exists(target):
                r = requests.get(uri, verify=False)
                if r.status_code == 200:
                    with open(target, "wb") as f:
                        f.write(r.content)
                else:
                    log(f"Skipping url {uri}, status_code={r.status_code}", "error")
                    uris.pop(uri)

    def download_files(self, uris, cache_folder=None):
        # Hostname has invalid SSL, prefill cache and avoid ssl-errors
        self.prefill_cache(uris, cache_folder)

        return super().download_files(uris, cache_folder)