Skip to content

Commit 7b38fb4

Browse files
committed
update distributed search functionality
1 parent dbbd7e2 commit 7b38fb4

37 files changed

Lines changed: 453 additions & 94 deletions

File tree

default-sample.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Authors: Tom Kralidis <tomkralidis@gmail.com>
44
# Angelos Tzotsos <tzotsos@gmail.com>
55
#
6-
# Copyright (c) 2024 Tom Kralidis
6+
# Copyright (c) 2026 Tom Kralidis
77
# Copyright (c) 2024 Angelos Tzotsos
88
#
99
# Permission is hereby granted, free of charge, to any person
@@ -55,7 +55,14 @@ profiles:
5555
- apiso
5656

5757
federatedcatalogues:
58-
- http://catalog.data.gov/csw
58+
- id: arctic-sdi-csw
59+
type: CSW
60+
title: Arctic SDI
61+
url: https://catalogue.arctic-sdi.org/csw
62+
- id: pycsw-cite-demo
63+
type: OARec
64+
title: pycsw OGC CITE demo and Reference Implementation
65+
url: https://demo.pycsw.org/cite
5966

6067
manager:
6168
transactions: false

docker/compose/pycsw.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Authors: Tom Kralidis <tomkralidis@gmail.com>
44
# Ricardo Garcia Silva <ricardo.garcia.silva@gmail.com>
55
#
6-
# Copyright (c) 2024 Tom Kralidis
6+
# Copyright (c) 2026 Tom Kralidis
77
# Copyright (c) 2017 Ricardo Garcia Silva
88
#
99
# Permission is hereby granted, free of charge, to any person
@@ -52,7 +52,10 @@ profiles:
5252
- apiso
5353

5454
federatedcatalogues:
55-
- http://catalog.data.gov/csw
55+
- id: fedcat01
56+
type: CSW
57+
title: Arctic SDI
58+
url: https://catalogue.arctic-sdi.org/csw
5659

5760
manager:
5861
transactions: false

docker/helm/values.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ pycsw:
4848
profiles:
4949
- apiso
5050
# federatedcatalogues:
51-
# - http://catalog.data.gov/csw
51+
# - id: fedcat01
52+
# type: CSW
53+
# title: Arctic SDI
54+
# url: https://catalogue.arctic-sdi.org/csw
5255
manager:
5356
transactions: "false"
5457
allowed_ips:

docker/kubernetes/pycsw-configmap.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ data:
77
# Ricardo Garcia Silva <ricardo.garcia.silva@gmail.com>
88
# Angelos Tzotsos <tzotsos@gmail.com>
99
#
10-
# Copyright (c) 2024 Tom Kralidis
10+
# Copyright (c) 2026 Tom Kralidis
1111
# Copyright (c) 2017 Ricardo Garcia Silva
1212
# Copyright (c) 2024 Angelos Tzotsos
1313
#
@@ -57,7 +57,10 @@ data:
5757
- apiso
5858
5959
federatedcatalogues:
60-
- http://catalog.data.gov/csw
60+
- id: fedcat01
61+
type: CSW
62+
title: Arctic SDI
63+
url: https://catalogue.arctic-sdi.org/csw
6164
6265
manager:
6366
transactions: false

docker/pycsw.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Authors: Tom Kralidis <tomkralidis@gmail.com>
44
# Ricardo Garcia Silva <ricardo.garcia.silva@gmail.com>
55
#
6-
# Copyright (c) 2024 Tom Kralidis
6+
# Copyright (c) 2026 Tom Kralidis
77
# Copyright (c) 2017 Ricardo Garcia Silva
88
#
99
# Permission is hereby granted, free of charge, to any person
@@ -52,7 +52,10 @@ profiles:
5252
- apiso
5353

5454
federatedcatalogues:
55-
- http://catalog.data.gov/csw
55+
- id: fedcat01
56+
type: CSW
57+
title: Arctic SDI
58+
url: https://catalogue.arctic-sdi.org/csw
5659

5760
manager:
5861
transactions: false

docs/configuration.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pycsw's runtime configuration is defined by ``default.yml``. pycsw ships with a
1616
- **level**: the logging level (see https://docs.python.org/library/logging.html#logging-levels)
1717
- **logfile**: the full file path to the logfile
1818
- **ogc_schemas_base**: base URL of OGC XML schemas tree file structure (default is http://schemas.opengis.net)
19-
- **federatedcatalogues**: comma delimited list of CSW endpoints to be used for distributed searching, if requested by the client (see :ref:`distributedsearching`)
19+
- **federatedcatalogues**: arrray of distributed catalogue endpoints to be used for distributed searching, if requested by the client (see :ref:`distributedsearching`)
2020
- **pretty_print**: whether to pretty print the output (``true`` or ``false``). Default is ``false``
2121
- **gzip_compresslevel**: gzip compression level, lowest is ``1``, highest is ``9``. Default is off. **NOTE**: if gzip compression is already enabled via your web server, do not enable this directive (or else the server will try to compress the response twice, resulting in degraded performance)
2222
- **domainquerytype**: for GetDomain operations, how to output domain values. Accepted values are ``list`` and ``range`` (min/max). Default is ``list``

pycsw/core/admin.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Authors: Tom Kralidis <tomkralidis@gmail.com>
55
# Angelos Tzotsos <tzotsos@gmail.com>
66
#
7-
# Copyright (c) 2024 Tom Kralidis
7+
# Copyright (c) 2026 Tom Kralidis
88
# Copyright (c) 2015 Angelos Tzotsos
99
#
1010
# Permission is hereby granted, free of charge, to any person
@@ -632,7 +632,9 @@ def cli_migrate_config(ctx, config, verbosity):
632632
elif name == 'profiles':
633633
dict_[name] = value.split(',')
634634
elif name == 'federatedcatalogues':
635-
dict_[name] = value.split(',')
635+
dict_[name] = []
636+
for count, fc in enumerate(value.split(',')):
637+
dict_[name].append({'id': f'fedcat{count}', 'url': fc})
636638
else:
637639
dict_['server'][name] = get_typed_value(value)
638640

pycsw/ogc/api/oapi.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,10 @@ def gen_oapi(config, oapi_filepath, mode='ogcapi-records'):
199199
'style': 'form',
200200
'explode': False
201201
}
202-
oapi['components']['parameters']['distributed'] = {
203-
'name': 'distributed',
202+
oapi['components']['parameters']['distributedSearch'] = {
203+
'name': 'distributedSearch',
204204
'in': 'query',
205-
'description': 'Whether to invoke distributed mode',
205+
'description': 'Whether to invoke distributed search',
206206
'schema': {
207207
'type': 'boolean',
208208
'default': False
@@ -390,6 +390,60 @@ def gen_oapi(config, oapi_filepath, mode='ogcapi-records'):
390390
oapi['paths']['/collections/{collectionId}/queryables'] = path2
391391
oapi['components']['parameters']['collectionId']['default'] = 'metadata:main' # noqa
392392

393+
path = {
394+
'get': {
395+
'tags': ['Federated catalogs'],
396+
'summary': 'Federated catalogs page',
397+
'description': 'Federated catalogs page',
398+
'operationId': 'getFederatedCatalogs',
399+
'parameters': [
400+
{'$ref': '#/components/parameters/collectionId'},
401+
{'$ref': '#/components/parameters/f'}
402+
],
403+
'responses': {
404+
'200': {
405+
'$ref': '#/components/responses/FederatedCatalogs'
406+
},
407+
'500': {
408+
'$ref': '#/components/responses/ServerError'
409+
}
410+
}
411+
}
412+
}
413+
414+
oapi['paths']['/collections/{collectionId}/federatedCatalogs'] = path
415+
416+
path = {
417+
'get': {
418+
'tags': ['Federated catalogs'],
419+
'summary': 'Federated catalogs page',
420+
'description': 'Federated catalogs page',
421+
'operationId': 'getFederatedCatalog',
422+
'parameters': [
423+
{'$ref': '#/components/parameters/collectionId'},
424+
{'name': 'catalogId',
425+
'in': 'path',
426+
'description': 'catalog ID',
427+
'required': True,
428+
'schema': {
429+
'type': 'string'
430+
}
431+
},
432+
{'$ref': '#/components/parameters/f'}
433+
],
434+
'responses': {
435+
'200': {
436+
'$ref': '#/components/responses/FederatedCatalog'
437+
},
438+
'500': {
439+
'$ref': '#/components/responses/ServerError'
440+
}
441+
}
442+
}
443+
}
444+
445+
oapi['paths']['/collections/{collectionId}/federatedCatalogs/{catalogId}'] = path
446+
393447
path = {
394448
'get': {
395449
'tags': ['metadata'],

pycsw/ogc/api/records.py

Lines changed: 107 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Authors: Tom Kralidis <tomkralidis@gmail.com>
44
# Angelos Tzotsos <tzotsos@gmail.com>
55
#
6-
# Copyright (c) 2025 Tom Kralidis
6+
# Copyright (c) 2026 Tom Kralidis
77
# Copyright (c) 2021 Angelos Tzotsos
88
#
99
# Permission is hereby granted, free of charge, to any person
@@ -483,6 +483,23 @@ def collection(self, headers_, args, collection='metadata:main'):
483483
'hreflang': self.config['server']['language']
484484
}]
485485

486+
if collection == 'metadata:main' and 'federatedcatalogues' in self.config:
487+
LOGGER.debug('Adding federated catalogues')
488+
response['links'].append({
489+
'rel': 'http://www.opengis.net/def/rel/ogc/1.0/federatedCatalogues',
490+
'type': 'application/json',
491+
'title': 'Federated catalogs as JSON',
492+
'href': f"{url_base}/federatedCatalogs?f=json",
493+
'hreflang': self.config['server']['language']
494+
})
495+
response['links'].append({
496+
'rel': 'http://www.opengis.net/def/rel/ogc/1.0/federatedCatalogues',
497+
'type': 'text/html',
498+
'title': 'Federated catalogs as HTML',
499+
'href': f"{url_base}/federatedCatalogs?f=html",
500+
'hreflang': self.config['server']['language']
501+
})
502+
486503
return self.get_response(200, headers_, response, 'collection.html')
487504

488505
def queryables(self, headers_, args, collection='metadata:main'):
@@ -550,7 +567,7 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'):
550567
headers_['Content-Type'] = self.get_content_type(headers_, args)
551568

552569
reserved_query_params = [
553-
'distributed',
570+
'distributedsearch',
554571
'f',
555572
'facets',
556573
'filter',
@@ -796,19 +813,27 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'):
796813
for record in records:
797814
response['features'].append(record2json(record, self.config['server']['url'], collection, self.mode))
798815

799-
response['distributedFeatures'] = []
816+
response['federatedSearchResults'] = {}
800817

801-
distributed = str2bool(args.get('distributed', False))
818+
distributed = str2bool(args.get('distributedsearch', False))
802819

803820
if distributed:
804821
for fc in self.config.get('federatedcatalogues', []):
805-
LOGGER.debug(f'Running distributed search against {fc}')
806-
fc_url, _, fc_collection = fc.rsplit('/', 2)
822+
if fc['type'] != 'OARec':
823+
LOGGER.debug(f"Federated catalogue type {fc['type']} not supported; skipping")
824+
continue
825+
LOGGER.debug(f"Running distributed search against {fc['url']}")
826+
fc_url, _, fc_collection = fc['url'].rsplit('/', 2)
827+
response['federatedSearchResults'][fc['id']] = {
828+
'type': 'FeatureCollection',
829+
'features': []
830+
}
807831
try:
808832
w = Records(fc_url)
833+
args.pop('distributedsearch')
809834
fc_results = w.collection_items(fc_collection, **args)
810835
for feature in fc_results['features']:
811-
response['distributedFeatures'].append(feature)
836+
response['federatedSearchResults'][fc['id']]['features'].append(feature)
812837
except Exception as err:
813838
LOGGER.warning(err)
814839

@@ -946,7 +971,10 @@ def item(self, headers_, args, collection, item):
946971

947972
if distributed:
948973
for fc in self.config.get('federatedcatalogues', []):
949-
LOGGER.debug(f'Running distributed item search against {fc}')
974+
if fc['type'] != 'OARec':
975+
LOGGER.debug(f"Federated catalogue type {fc['type']} not supported; skipping")
976+
continue
977+
LOGGER.debug(f"Running distributed item search against {fc['url']}")
950978
fc_url, _, fc_collection = fc.rsplit('/', 2)
951979
try:
952980
w = Records(fc_url)
@@ -1133,18 +1161,79 @@ def get_collection_info(self, collection_name: str = 'metadata:main',
11331161
}]
11341162
}
11351163

1136-
if collection_name == 'metadata:main':
1164+
return collection_info
1165+
1166+
def federated_catalogues(self, headers_, args, collection):
1167+
"""
1168+
Provide federated catalogues
1169+
1170+
:param headers_: copy of HEADERS object
1171+
:param args: request parameters
1172+
:param collection: name of collection
1173+
1174+
:returns: tuple of headers, status code, content
1175+
"""
1176+
1177+
headers_['Content-Type'] = self.get_content_type(headers_, args)
1178+
1179+
response = {}
1180+
fedcats = []
1181+
1182+
if collection == 'metadata:main':
11371183
if 'federatedcatalogues' in self.config:
1138-
LOGGER.debug('Adding federated catalogues')
1139-
collection_info['federatedCatalogues'] = []
1140-
if self.config.get('federatedcatalogues') not in [None, '']: # if empty in config
1141-
for fc in self.config.get('federatedcatalogues'):
1142-
collection_info['federatedCatalogues'].append({
1143-
'type': 'OGC API - Records',
1144-
'url': fc
1145-
})
1184+
LOGGER.debug('Adding federated catalogue {fc}')
1185+
fedcats = self.config.get('federatedcatalogues')
11461186

1147-
return collection_info
1187+
if headers_['Content-Type'] == 'text/html':
1188+
response['title'] = self.config['metadata']['identification']['title']
1189+
response['collection'] = collection
1190+
response['fedcats'] = fedcats
1191+
else:
1192+
response = fedcats
1193+
1194+
template = 'federatedcatalogs.html'
1195+
1196+
return self.get_response(200, headers_, response, template)
1197+
1198+
def federated_catalogue(self, headers_, args, collection, catalogue):
1199+
"""
1200+
Provide federated catalogue
1201+
1202+
:param headers_: copy of HEADERS object
1203+
:param args: request parameters
1204+
:param collection: name of collection
1205+
:param catalogue: id of catalogue
1206+
1207+
:returns: tuple of headers, status code, content
1208+
"""
1209+
1210+
headers_['Content-Type'] = self.get_content_type(headers_, args)
1211+
1212+
response = {}
1213+
fedcat = None
1214+
1215+
if collection == 'metadata:main':
1216+
fedcats = self.config.get('federatedcatalogues')
1217+
for fedcat_ in fedcats:
1218+
if fedcat_['id'] == catalogue:
1219+
fedcat = fedcat_
1220+
break
1221+
1222+
if fedcat is None:
1223+
msg = 'Federated catalogue does not exist'
1224+
LOGGER.exception(msg)
1225+
return self.get_exception(404, headers_, 'InvalidParameterValue', msg)
1226+
1227+
if headers_['Content-Type'] == 'text/html':
1228+
response['title'] = self.config['metadata']['identification']['title']
1229+
response['collection'] = collection
1230+
response['fedcat'] = fedcat
1231+
else:
1232+
response = fedcat
1233+
1234+
template = 'federatedcatalog.html'
1235+
1236+
return self.get_response(200, headers_, response, template)
11481237

11491238
def get_all_collections(self) -> list:
11501239
"""

0 commit comments

Comments
 (0)