Skip to content

Commit 87a6682

Browse files
committed
Merge branch 'main' into qa
2 parents f41143d + a397387 commit 87a6682

5 files changed

Lines changed: 190 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## v1.0.2 - 5/18/23
4+
- Add research_catalog_identifier_helper function
5+
36
## v1.0.1 - 4/3/23
47
- Add transaction support to RedshiftClient
58

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ This package contains common Python utility classes and functions.
1717
* Reading a YAML config file and putting the contents in os.environ
1818
* Creating a logger in the appropriate format
1919
* Obfuscating a value using bcrypt
20+
* Parsing/building Research Catalog identifiers
2021

2122
## Usage
2223
```python
@@ -81,6 +82,7 @@ This repo uses the [Main-QA-Production](https://github.com/NYPL/engineering-gene
8182
- Cut a feature branch off of `main`
8283
- Commit changes to your feature branch
8384
- File a pull request against `main` and assign a reviewer (who must be an owner)
85+
- Include relevant updates to pyproject.toml and README
8486
- In order for the PR to be accepted, it must pass all unit tests, have no lint issues, and update the CHANGELOG (or contain the `Skip-Changelog` label in GitHub)
8587
- After the PR is accepted, merge into `main`
8688
- Merge `main` > `qa`

pyproject.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.0.1"
7+
version = "1.0.2"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -63,8 +63,11 @@ config-helper = [
6363
obfuscation-helper = [
6464
"bcrypt>=4.0.1"
6565
]
66+
research-catalog-identifier-helper = [
67+
"requests>=2.28.1"
68+
]
6669
development = [
67-
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper]",
70+
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]",
6871
"flake8>=6.0.0",
6972
"freezegun>=1.2.2",
7073
"mock>=4.0.3",
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import os
2+
import re
3+
import requests
4+
from requests.exceptions import JSONDecodeError, RequestException
5+
6+
CACHE = {}
7+
8+
9+
def parse_research_catalog_identifier(identifier: str):
10+
"""
11+
Given a RC identifier (e.g. "b1234", "pb9876", "pi4567"), returns a dict
12+
defining:
13+
- nyplSource: One of sierra-nypl, recap-pul, recap-cul, or recap-hl (at
14+
writing)
15+
- nyplType: One of bib, holding, or item
16+
- id: The numeric string id
17+
"""
18+
if not isinstance(identifier, str):
19+
raise ResearchCatalogIdentifierError(
20+
f'Invalid RC identifier: {identifier}')
21+
22+
# Extract prefix from the identifier:
23+
match = re.match(r'^([a-z]+)', identifier)
24+
if match is None:
25+
raise ResearchCatalogIdentifierError(
26+
f'Invalid RC identifier: {identifier}')
27+
prefix = match[0]
28+
29+
# The id is the identifier without the prefix:
30+
id = identifier.replace(prefix, '')
31+
nyplType = None
32+
nyplSource = None
33+
34+
# Look up nyplType and nyplSource in nypl-core based on the prefix:
35+
for _nyplSource, mapping in nypl_core_source_mapping().items():
36+
if mapping.get('bibPrefix') == prefix:
37+
nyplType = 'bib'
38+
elif mapping.get('itemPrefix') == prefix:
39+
nyplType = 'item'
40+
elif mapping.get('holdingPrefix') == prefix:
41+
nyplType = 'holding'
42+
if nyplType is not None:
43+
nyplSource = _nyplSource
44+
break
45+
46+
if nyplSource is None:
47+
raise ResearchCatalogIdentifierError(
48+
f'Invalid RC identifier: {identifier}')
49+
50+
return {
51+
'nyplSource': nyplSource,
52+
'nyplType': nyplType,
53+
'id': id
54+
}
55+
56+
57+
def research_catalog_id_prefix(nyplSource: str, nyplType='bib'):
58+
"""
59+
Given a nyplSource (e.g. 'sierra-nypl') and nyplType (e.g. 'item'), returns
60+
the relevant prefix used in the RC identifier (e.g. 'i')
61+
"""
62+
if nypl_core_source_mapping().get(nyplSource) is None:
63+
raise ResearchCatalogIdentifierError(
64+
f'Invalid nyplSource: {nyplSource}')
65+
66+
if not isinstance(nyplType, str):
67+
raise ResearchCatalogIdentifierError(
68+
f'Invalid nyplType: {nyplType}')
69+
70+
prefixKey = f'{nyplType}Prefix'
71+
if nypl_core_source_mapping()[nyplSource].get(prefixKey) is None:
72+
raise ResearchCatalogIdentifierError(f'Invalid nyplType: {nyplType}')
73+
74+
return nypl_core_source_mapping()[nyplSource][prefixKey]
75+
76+
77+
def nypl_core_source_mapping():
78+
"""
79+
Builds a nypl-source-mapping by retrieving the mapping from NYPL-Core
80+
"""
81+
name = 'nypl-core-source-mapping'
82+
if not CACHE.get(name) is None:
83+
return CACHE[name]
84+
85+
url = os.environ.get('NYPL_CORE_SOURCE_MAPPING_URL',
86+
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json') # noqa
87+
try:
88+
response = requests.get(url)
89+
response.raise_for_status()
90+
except RequestException as e:
91+
raise ResearchCatalogIdentifierError(
92+
'Failed to retrieve nypl-core source-mapping file from {url}:'
93+
' {errorType} {errorMessage}'
94+
.format(url=url, errorType=type(e), errorMessage=e)) from None
95+
96+
try:
97+
CACHE[name] = response.json()
98+
return CACHE[name]
99+
except (JSONDecodeError, KeyError) as e:
100+
raise ResearchCatalogIdentifierError(
101+
'Failed to parse nypl-core source-mapping file: {errorType}'
102+
' {errorMessage}'
103+
.format(errorType=type(e), errorMessage=e)) from None
104+
105+
106+
class ResearchCatalogIdentifierError(Exception):
107+
def __init__(self, message=None):
108+
self.message = message
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import pytest
2+
import json
3+
4+
from nypl_py_utils.functions.research_catalog_identifier_helper import (
5+
parse_research_catalog_identifier, research_catalog_id_prefix,
6+
ResearchCatalogIdentifierError)
7+
8+
_TEST_MAPPING = {
9+
'sierra-nypl': {
10+
'organization': 'nyplOrg:0001',
11+
'bibPrefix': 'b',
12+
'holdingPrefix': 'h',
13+
'itemPrefix': 'i'
14+
},
15+
'recap-pul': {
16+
'organization': 'nyplOrg:0003',
17+
'bibPrefix': 'pb',
18+
'itemPrefix': 'pi'
19+
},
20+
'recap-cul': {
21+
'organization': 'nyplOrg:0002',
22+
'bibPrefix': 'cb',
23+
'itemPrefix': 'ci'
24+
},
25+
'recap-hl': {
26+
'organization': 'nyplOrg:0004',
27+
'bibPrefix': 'hb',
28+
'itemPrefix': 'hi'
29+
}
30+
}
31+
32+
33+
class TestResearchCatalogIdentifierHelper:
34+
@pytest.fixture(autouse=True)
35+
def test_instance(self, requests_mock):
36+
requests_mock.get(
37+
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json', # noqa
38+
text=json.dumps(_TEST_MAPPING))
39+
40+
def test_parse_research_catalog_identifier_parses_valid(self):
41+
assert parse_research_catalog_identifier('b1234') == \
42+
{'id': '1234', 'nyplSource': 'sierra-nypl', 'nyplType': 'bib'}
43+
assert parse_research_catalog_identifier('cb1234') == \
44+
{'id': '1234', 'nyplSource': 'recap-cul', 'nyplType': 'bib'}
45+
assert parse_research_catalog_identifier('pi1234') == \
46+
{'id': '1234', 'nyplSource': 'recap-pul', 'nyplType': 'item'}
47+
assert parse_research_catalog_identifier('h1234') == \
48+
{'id': '1234', 'nyplSource': 'sierra-nypl',
49+
'nyplType': 'holding'}
50+
51+
def test_parse_research_catalog_identifier_fails_nonsense(self):
52+
for invalidIdentifier in [None, 1234, 'z1234', '1234']:
53+
with pytest.raises(ResearchCatalogIdentifierError):
54+
parse_research_catalog_identifier(invalidIdentifier)
55+
56+
def test_research_catalog_id_prefix_parses_valid(self, mocker):
57+
assert research_catalog_id_prefix('sierra-nypl') == 'b'
58+
assert research_catalog_id_prefix('sierra-nypl', 'bib') == 'b'
59+
assert research_catalog_id_prefix('sierra-nypl', 'item') == 'i'
60+
assert research_catalog_id_prefix('sierra-nypl', 'holding') == 'h'
61+
assert research_catalog_id_prefix('recap-pul', 'bib') == 'pb'
62+
assert research_catalog_id_prefix('recap-hl', 'bib') == 'hb'
63+
assert research_catalog_id_prefix('recap-hl', 'item') == 'hi'
64+
assert research_catalog_id_prefix('recap-pul', 'item') == 'pi'
65+
66+
def test_research_catalog_id_prefix_fails_nonsense(self, mocker):
67+
for invalidSource in ['sierra-cul', None, 'recap-nypl']:
68+
with pytest.raises(ResearchCatalogIdentifierError):
69+
research_catalog_id_prefix(invalidSource)
70+
for invalidType in [None, '...']:
71+
with pytest.raises(ResearchCatalogIdentifierError):
72+
research_catalog_id_prefix('sierra-nypl', invalidType)

0 commit comments

Comments
 (0)