Skip to content

Commit 139a550

Browse files
Merge branch 'main' into qa
2 parents f41143d + 40a97cc commit 139a550

5 files changed

Lines changed: 210 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## v1.0.3 - 5/19/23
4+
- Add research_catalog_identifier_helper function
5+
6+
## v1.0.2 - 5/18/23
7+
- Identical to v1.0.1 -- this was mistakenly deployed to QA without any changes
8+
39
## v1.0.1 - 4/3/23
410
- Add transaction support to RedshiftClient
511

README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ This package contains common Python utility classes and functions.
1717
* Reading a YAML config file and putting the contents in os.environ
1818
* Creating a logger in the appropriate format
1919
* Obfuscating a value using bcrypt
20+
* Parsing/building Research Catalog identifiers
2021

2122
## Usage
2223
```python
@@ -57,7 +58,22 @@ When a new client or helper file is created, a new optional dependency set shoul
5758
The optional dependency sets also give the developer the option to manually list out the dependencies of the clients rather than relying upon what the package thinks is required, which can be beneficial in certain circumstances. For instance, AWS lambda functions come with `boto3` and `botocore` pre-installed, so it's not necessary to include these (rather hefty) dependencies in the lambda deployment package.
5859

5960
### Troubleshooting
60-
If running `main.py` in this virtual environment produces the following error:
61+
#### Using PostgreSQLClient in an AWS Lambda
62+
Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows:
63+
```bash
64+
pip install --target ./package nypl-py-utils[postgresql-client]==1.0.1
65+
66+
pip install \
67+
--platform manylinux2014_x86_64 \
68+
--target=./package \
69+
--implementation cp \
70+
--python 3.9 \
71+
--only-binary=:all: --upgrade \
72+
'psycopg[binary]'
73+
```
74+
75+
#### Using PostgreSQLClient locally
76+
If using the `PostgreSQLClient` produces the following error locally:
6177
```
6278
ImportError: no pq wrapper available.
6379
Attempts made:
@@ -67,7 +83,7 @@ Attempts made:
6783
```
6884

6985
then try running:
70-
```
86+
```bash
7187
pip uninstall psycopg
7288
pip install "psycopg[c]"
7389
```
@@ -81,6 +97,7 @@ This repo uses the [Main-QA-Production](https://github.com/NYPL/engineering-gene
8197
- Cut a feature branch off of `main`
8298
- Commit changes to your feature branch
8399
- File a pull request against `main` and assign a reviewer (who must be an owner)
100+
- Include relevant updates to pyproject.toml and README
84101
- In order for the PR to be accepted, it must pass all unit tests, have no lint issues, and update the CHANGELOG (or contain the `Skip-Changelog` label in GitHub)
85102
- After the PR is accepted, merge into `main`
86103
- Merge `main` > `qa`

pyproject.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.0.1"
7+
version = "1.0.3"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -63,8 +63,11 @@ config-helper = [
6363
obfuscation-helper = [
6464
"bcrypt>=4.0.1"
6565
]
66+
research-catalog-identifier-helper = [
67+
"requests>=2.28.1"
68+
]
6669
development = [
67-
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper]",
70+
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]",
6871
"flake8>=6.0.0",
6972
"freezegun>=1.2.2",
7073
"mock>=4.0.3",
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import os
2+
import re
3+
import requests
4+
from requests.exceptions import JSONDecodeError, RequestException
5+
6+
CACHE = {}
7+
8+
9+
def parse_research_catalog_identifier(identifier: str):
10+
"""
11+
Given a RC identifier (e.g. "b1234", "pb9876", "pi4567"), returns a dict
12+
defining:
13+
- nyplSource: One of sierra-nypl, recap-pul, recap-cul, or recap-hl (at
14+
writing)
15+
- nyplType: One of bib, holding, or item
16+
- id: The numeric string id
17+
"""
18+
if not isinstance(identifier, str):
19+
raise ResearchCatalogIdentifierError(
20+
f'Invalid RC identifier: {identifier}')
21+
22+
# Extract prefix from the identifier:
23+
match = re.match(r'^([a-z]+)', identifier)
24+
if match is None:
25+
raise ResearchCatalogIdentifierError(
26+
f'Invalid RC identifier: {identifier}')
27+
prefix = match[0]
28+
29+
# The id is the identifier without the prefix:
30+
id = identifier.replace(prefix, '')
31+
nyplType = None
32+
nyplSource = None
33+
34+
# Look up nyplType and nyplSource in nypl-core based on the prefix:
35+
for _nyplSource, mapping in nypl_core_source_mapping().items():
36+
if mapping.get('bibPrefix') == prefix:
37+
nyplType = 'bib'
38+
elif mapping.get('itemPrefix') == prefix:
39+
nyplType = 'item'
40+
elif mapping.get('holdingPrefix') == prefix:
41+
nyplType = 'holding'
42+
if nyplType is not None:
43+
nyplSource = _nyplSource
44+
break
45+
46+
if nyplSource is None:
47+
raise ResearchCatalogIdentifierError(
48+
f'Invalid RC identifier: {identifier}')
49+
50+
return {
51+
'nyplSource': nyplSource,
52+
'nyplType': nyplType,
53+
'id': id
54+
}
55+
56+
57+
def research_catalog_id_prefix(nyplSource: str, nyplType='bib'):
58+
"""
59+
Given a nyplSource (e.g. 'sierra-nypl') and nyplType (e.g. 'item'), returns
60+
the relevant prefix used in the RC identifier (e.g. 'i')
61+
"""
62+
if nypl_core_source_mapping().get(nyplSource) is None:
63+
raise ResearchCatalogIdentifierError(
64+
f'Invalid nyplSource: {nyplSource}')
65+
66+
if not isinstance(nyplType, str):
67+
raise ResearchCatalogIdentifierError(
68+
f'Invalid nyplType: {nyplType}')
69+
70+
prefixKey = f'{nyplType}Prefix'
71+
if nypl_core_source_mapping()[nyplSource].get(prefixKey) is None:
72+
raise ResearchCatalogIdentifierError(f'Invalid nyplType: {nyplType}')
73+
74+
return nypl_core_source_mapping()[nyplSource][prefixKey]
75+
76+
77+
def nypl_core_source_mapping():
78+
"""
79+
Builds a nypl-source-mapping by retrieving the mapping from NYPL-Core
80+
"""
81+
name = 'nypl-core-source-mapping'
82+
if not CACHE.get(name) is None:
83+
return CACHE[name]
84+
85+
url = os.environ.get('NYPL_CORE_SOURCE_MAPPING_URL',
86+
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json') # noqa
87+
try:
88+
response = requests.get(url)
89+
response.raise_for_status()
90+
except RequestException as e:
91+
raise ResearchCatalogIdentifierError(
92+
'Failed to retrieve nypl-core source-mapping file from {url}:'
93+
' {errorType} {errorMessage}'
94+
.format(url=url, errorType=type(e), errorMessage=e)) from None
95+
96+
try:
97+
CACHE[name] = response.json()
98+
return CACHE[name]
99+
except (JSONDecodeError, KeyError) as e:
100+
raise ResearchCatalogIdentifierError(
101+
'Failed to parse nypl-core source-mapping file: {errorType}'
102+
' {errorMessage}'
103+
.format(errorType=type(e), errorMessage=e)) from None
104+
105+
106+
class ResearchCatalogIdentifierError(Exception):
107+
def __init__(self, message=None):
108+
self.message = message
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import pytest
2+
import json
3+
4+
from nypl_py_utils.functions.research_catalog_identifier_helper import (
5+
parse_research_catalog_identifier, research_catalog_id_prefix,
6+
ResearchCatalogIdentifierError)
7+
8+
_TEST_MAPPING = {
9+
'sierra-nypl': {
10+
'organization': 'nyplOrg:0001',
11+
'bibPrefix': 'b',
12+
'holdingPrefix': 'h',
13+
'itemPrefix': 'i'
14+
},
15+
'recap-pul': {
16+
'organization': 'nyplOrg:0003',
17+
'bibPrefix': 'pb',
18+
'itemPrefix': 'pi'
19+
},
20+
'recap-cul': {
21+
'organization': 'nyplOrg:0002',
22+
'bibPrefix': 'cb',
23+
'itemPrefix': 'ci'
24+
},
25+
'recap-hl': {
26+
'organization': 'nyplOrg:0004',
27+
'bibPrefix': 'hb',
28+
'itemPrefix': 'hi'
29+
}
30+
}
31+
32+
33+
class TestResearchCatalogIdentifierHelper:
34+
@pytest.fixture(autouse=True)
35+
def test_instance(self, requests_mock):
36+
requests_mock.get(
37+
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json', # noqa
38+
text=json.dumps(_TEST_MAPPING))
39+
40+
def test_parse_research_catalog_identifier_parses_valid(self):
41+
assert parse_research_catalog_identifier('b1234') == \
42+
{'id': '1234', 'nyplSource': 'sierra-nypl', 'nyplType': 'bib'}
43+
assert parse_research_catalog_identifier('cb1234') == \
44+
{'id': '1234', 'nyplSource': 'recap-cul', 'nyplType': 'bib'}
45+
assert parse_research_catalog_identifier('pi1234') == \
46+
{'id': '1234', 'nyplSource': 'recap-pul', 'nyplType': 'item'}
47+
assert parse_research_catalog_identifier('h1234') == \
48+
{'id': '1234', 'nyplSource': 'sierra-nypl',
49+
'nyplType': 'holding'}
50+
51+
def test_parse_research_catalog_identifier_fails_nonsense(self):
52+
for invalidIdentifier in [None, 1234, 'z1234', '1234']:
53+
with pytest.raises(ResearchCatalogIdentifierError):
54+
parse_research_catalog_identifier(invalidIdentifier)
55+
56+
def test_research_catalog_id_prefix_parses_valid(self, mocker):
57+
assert research_catalog_id_prefix('sierra-nypl') == 'b'
58+
assert research_catalog_id_prefix('sierra-nypl', 'bib') == 'b'
59+
assert research_catalog_id_prefix('sierra-nypl', 'item') == 'i'
60+
assert research_catalog_id_prefix('sierra-nypl', 'holding') == 'h'
61+
assert research_catalog_id_prefix('recap-pul', 'bib') == 'pb'
62+
assert research_catalog_id_prefix('recap-hl', 'bib') == 'hb'
63+
assert research_catalog_id_prefix('recap-hl', 'item') == 'hi'
64+
assert research_catalog_id_prefix('recap-pul', 'item') == 'pi'
65+
66+
def test_research_catalog_id_prefix_fails_nonsense(self, mocker):
67+
for invalidSource in ['sierra-cul', None, 'recap-nypl']:
68+
with pytest.raises(ResearchCatalogIdentifierError):
69+
research_catalog_id_prefix(invalidSource)
70+
for invalidType in [None, '...']:
71+
with pytest.raises(ResearchCatalogIdentifierError):
72+
research_catalog_id_prefix('sierra-nypl', invalidType)

0 commit comments

Comments
 (0)