-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathes_nc.py
More file actions
75 lines (66 loc) · 2.67 KB
/
es_nc.py
File metadata and controls
75 lines (66 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import re
from os import makedirs, path
import pandas as pd
import requests
from ..util import log, name_from_uri
from .es import ESBaseConverter
# SOURCES = "https://filescartografia.navarra.es/2_CARTOGRAFIA_TEMATICA/2_6_SIGPAC/" # FULL Download timeout
class NCConverter(ESBaseConverter):
id = "es_nc"
short_name = "Spain Navarra"
title = "Spain Navarra Crop fields"
description = """
SIGPAC Crop fields of Spain - Navarra
"""
license = "CC-BY-4.0" # https://sigpac.navarra.es/descargas/
attribution = "Comunidad Foral de Navarra"
providers = [
{
"name": "Comunidad Foral de Navarra",
"url": "https://gobiernoabierto.navarra.es/",
"roles": ["producer", "licensor"],
}
]
columns = {
"id": "id",
"geometry": "geometry",
"BEGINLIFE": "determination_datetime",
"IDUSO24": "crop:code",
"crop:name": "crop:name",
"crop:name_en": "crop:name_en",
}
column_migrations = {
"BEGINLIFE": lambda col: pd.to_datetime(col, format="%d/%m/%Y"),
}
use_code_attribute = "IDUSO24"
index_as_id = True
def get_urls(self):
# scrape HTML page for sources
content = requests.get("https://sigpac.navarra.es/descargas/", verify=False).text
base = re.search('var rutaBase = "(.*?)";', content).group(1)
last = base.rsplit("/", 1)[-1]
return {
f"https://sigpac.navarra.es/descargas/{base}{src}.zip": [f"{last}{src}.shp"]
for src in re.findall(r'value:"(\d+)"', content)
}
def prefill_cache(self, uris, cache_folder=None):
if cache_folder is None:
log("Use -c <cache_dir> to prefill the cache dir, working around SSL errors", "warning")
return
makedirs(cache_folder, exist_ok=True)
log("Suppressing SSL-errors, filling cache with unverified SSL requests", "warning")
requests.packages.urllib3.disable_warnings() # Suppress InsecureRequestWarning
for uri in list(uris):
target = path.join(cache_folder, name_from_uri(uri))
if not path.exists(target):
r = requests.get(uri, verify=False)
if r.status_code == 200:
with open(target, "wb") as f:
f.write(r.content)
else:
log(f"Skipping url {uri}, status_code={r.status_code}", "error")
uris.pop(uri)
def download_files(self, uris, cache_folder=None):
# Hostname has invalid SSL, prefill cache and avoid ssl-errors
self.prefill_cache(uris, cache_folder)
return super().download_files(uris, cache_folder)