Skip to content

Commit 80defb4

Browse files
committed
Merge branch 'main' into qa
2 parents 8730e6c + 16c94a7 commit 80defb4

16 files changed

Lines changed: 902 additions & 396 deletions

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
11
# Changelog
2+
## v1.6.3 1/27/25
3+
- Add capability to pull cloudLibrary events by the millisecond
4+
5+
## v1.6.2 12/2/24
6+
- Add record_num capability to patron_data_helper
7+
8+
## v1.6.1 11/26/24
9+
- Accidental duplicate of v1.6.0
10+
11+
## v1.6.0 11/20/24
12+
- Added patron_data_helper functions
13+
- Use executemany instead of execute when appropriate in PostgreSQLClient
14+
- Add capability to retry connecting to a database to the MySQL, PostgreSQL, and Redshift clients
15+
- Automatically close database connection upon error in the MySQL, PostgreSQL, and Redshift clients
16+
- Delete old PostgreSQLPoolClient, which was not production ready
17+
218
## v1.5.0 11/19/24
319
- Added cloudLibrary client
420

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ help:
99
@echo " lint project files using the flake8 linter"
1010

1111
test:
12-
pytest
12+
pytest -W ignore::FutureWarning
1313

1414
lint:
1515
flake8 --exclude *env

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ This package contains common Python utility classes and functions.
1111
* Downloading files from a remote SSH SFTP server
1212
* Connecting to and querying a MySQL database
1313
* Connecting to and querying a PostgreSQL database
14-
* Connecting to and querying a PostgreSQL database using a connection pool
1514
* Connecting to and querying Redshift
1615
* Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra
1716
* Interacting with vendor APIs such as cloudLibrary
@@ -21,6 +20,7 @@ This package contains common Python utility classes and functions.
2120
* Creating a logger in the appropriate format
2221
* Obfuscating a value using bcrypt
2322
* Parsing/building Research Catalog identifiers
23+
* Mapping between barcodes and Sierra patron ids plus getting patron data from Sierra and Redshift using those ids or record_nums
2424

2525
## Usage
2626
```python
@@ -38,7 +38,7 @@ kinesis_client = KinesisClient(...)
3838
# Do not use any version below 1.0.0
3939
# All available optional dependencies can be found in pyproject.toml.
4040
# See the "Managing dependencies" section below for more details.
41-
nypl-py-utils[kinesis-client,config-helper]==1.5.0
41+
nypl-py-utils[kinesis-client,config-helper]==1.x.y
4242
```
4343

4444
## Developing locally
@@ -64,7 +64,7 @@ The optional dependency sets also give the developer the option to manually list
6464
### Using PostgreSQLClient in an AWS Lambda
6565
Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows:
6666
```bash
67-
pip install --target ./package nypl-py-utils[postgresql-client]==1.5.0
67+
pip install --target ./package nypl-py-utils[postgresql-client]==1.x.y
6868

6969
pip install \
7070
--platform manylinux2014_x86_64 \

pyproject.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.5.0"
7+
version = "1.6.2"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -48,9 +48,6 @@ oauth2-api-client = [
4848
postgresql-client = [
4949
"psycopg[binary]>=3.1.6"
5050
]
51-
postgresql-pool-client = [
52-
"psycopg[binary,pool]>=3.1.6"
53-
]
5451
redshift-client = [
5552
"botocore>=1.29.5",
5653
"redshift-connector>=2.0.909"
@@ -73,11 +70,15 @@ config-helper = [
7370
obfuscation-helper = [
7471
"bcrypt>=4.0.1"
7572
]
73+
patron-data-helper = [
74+
"nypl_py_utils[postgresql-client,redshift-client]>=1.1.5",
75+
"pandas>=2.2.2"
76+
]
7677
research-catalog-identifier-helper = [
7778
"requests>=2.28.1"
7879
]
7980
development = [
80-
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]",
81+
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]",
8182
"flake8>=6.0.0",
8283
"freezegun>=1.2.2",
8384
"mock>=4.0.3",

src/nypl_py_utils/classes/cloudlibrary_client.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import hmac
44
import requests
55

6-
from datetime import datetime, timedelta, timezone
6+
from datetime import datetime, timezone
77
from nypl_py_utils.functions.log_helper import create_log
88
from requests.adapters import HTTPAdapter, Retry
99

@@ -35,28 +35,25 @@ def get_library_events(self, start_date=None,
3535
optional timeframe. Pulls past 24 hours of events by default.
3636
3737
start_date and end_date are optional parameters, and must be
38-
formatted either YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS
38+
formatted either YYYY-MM-DD, YYYY-MM-DD:SS, or YYYY-MM-DDTHH:MM:SS.fff
3939
"""
40-
date_format = "%Y-%m-%dT%H:%M:%S"
41-
today = datetime.now(timezone.utc)
42-
yesterday = today - timedelta(1)
43-
start_date = datetime.strftime(
44-
yesterday, date_format) if start_date is None else start_date
45-
end_date = datetime.strftime(
46-
today, date_format) if end_date is None else end_date
47-
48-
if (datetime.strptime(start_date, date_format) >
49-
datetime.strptime(end_date, date_format)):
50-
error_message = (f"Start date {start_date} greater than end date "
51-
f"{end_date}, cannot retrieve library events")
52-
self.logger.error(error_message)
53-
raise CloudLibraryClientError(error_message)
54-
55-
self.logger.info(
56-
(f"Fetching all library events in "
57-
f"time frame {start_date} to {end_date}..."))
40+
path = "data/cloudevents"
41+
if None not in (start_date, end_date):
42+
if (self._parse_event_date(start_date) >
43+
self._parse_event_date(end_date)):
44+
error_message = (f"Start date {start_date} greater than end "
45+
f"date {end_date}, cannot retrieve library "
46+
f"events")
47+
self.logger.error(error_message)
48+
raise CloudLibraryClientError(error_message)
49+
50+
path += f"?startdate={start_date}&enddate={end_date}"
51+
self.logger.info(f"Fetching all library events in "
52+
f"time frame {start_date} to {end_date}...")
53+
else:
54+
self.logger.info("Fetching all library events "
55+
"from the past day...")
5856

59-
path = f"data/cloudevents?startdate={start_date}&enddate={end_date}"
6057
response = self.request(path=path, method_type="GET")
6158
return response
6259

@@ -66,7 +63,7 @@ def create_request_body(self, request_type,
6663
Helper function to generate request body when performing item
6764
and/or patron-specific functions (ex. checking out a title).
6865
"""
69-
request_template = "<%(request_type)s><ItemId>%(item_id)s</ItemId><PatronId>%(patron_id)s</PatronId></%(request_type)s>" # noqa
66+
request_template = "<%(request_type)s><ItemId>%(item_id)s</ItemId><PatronId>%(patron_id)s</PatronId></%(request_type)s>" # noqa
7067
return request_template % {
7168
"request_type": request_type,
7269
"item_id": item_id,
@@ -113,6 +110,16 @@ def request(self, path, method_type="GET",
113110

114111
return response
115112

113+
def _parse_event_date(self, event_date) -> datetime:
114+
for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
115+
try:
116+
return datetime.strptime(event_date, fmt)
117+
except ValueError:
118+
pass
119+
error_message = (f"Invalid date format found: {event_date}")
120+
self.logger.error(error_message)
121+
raise CloudLibraryClientError(error_message)
122+
116123
def _build_headers(self, method_type, path) -> dict:
117124
time, authorization = self._build_authorization(
118125
method_type, path)

src/nypl_py_utils/classes/mysql_client.py

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import mysql.connector
2+
import time
23

34
from nypl_py_utils.functions.log_helper import create_log
45

@@ -15,35 +16,49 @@ def __init__(self, host, port, database, user, password):
1516
self.user = user
1617
self.password = password
1718

18-
def connect(self, **kwargs):
19+
def connect(self, retry_count=0, backoff_factor=5, **kwargs):
1920
"""
2021
Connects to a MySQL database using the given credentials.
2122
22-
Keyword args can be passed into the connection to set certain options.
23-
All possible arguments can be found here:
24-
https://dev.mysql.com/doc/connector-python/en/connector-python-connectargs.html.
25-
26-
Common arguments include:
27-
autocommit: bool
28-
Whether to automatically commit each query rather than running
29-
them as part of a transaction. By default False.
23+
Parameters
24+
----------
25+
retry_count: int, optional
26+
The number of times to retry connecting before throwing an error.
27+
By default no retry occurs.
28+
backoff_factor: int, optional
29+
The backoff factor when retrying. The amount of time to wait before
30+
retrying is backoff_factor ** number_of_retries_made.
31+
kwargs:
32+
All possible arguments can be found here:
33+
https://dev.mysql.com/doc/connector-python/en/connector-python-connectargs.html
3034
"""
3135
self.logger.info('Connecting to {} database'.format(self.database))
32-
try:
33-
self.conn = mysql.connector.connect(
34-
host=self.host,
35-
port=self.port,
36-
database=self.database,
37-
user=self.user,
38-
password=self.password,
39-
**kwargs)
40-
except mysql.connector.Error as e:
41-
self.logger.error(
42-
'Error connecting to {name} database: {error}'.format(
43-
name=self.database, error=e))
44-
raise MySQLClientError(
45-
'Error connecting to {name} database: {error}'.format(
46-
name=self.database, error=e)) from None
36+
attempt_count = 0
37+
while attempt_count <= retry_count:
38+
try:
39+
try:
40+
self.conn = mysql.connector.connect(
41+
host=self.host,
42+
port=self.port,
43+
database=self.database,
44+
user=self.user,
45+
password=self.password,
46+
**kwargs)
47+
break
48+
except (mysql.connector.Error):
49+
if attempt_count < retry_count:
50+
self.logger.info('Failed to connect -- retrying')
51+
time.sleep(backoff_factor ** attempt_count)
52+
attempt_count += 1
53+
else:
54+
raise
55+
except Exception as e:
56+
self.logger.error(
57+
'Error connecting to {name} database: {error}'.format(
58+
name=self.database, error=e))
59+
raise MySQLClientError(
60+
'Error connecting to {name} database: {error}'.format(
61+
name=self.database, error=e)) from None
4762

4863
def execute_query(self, query, query_params=None, **kwargs):
4964
"""
@@ -83,6 +98,8 @@ def execute_query(self, query, query_params=None, **kwargs):
8398
return cursor.fetchall()
8499
except Exception as e:
85100
self.conn.rollback()
101+
cursor.close()
102+
self.close_connection()
86103
self.logger.error(
87104
('Error executing {name} database query \'{query}\': {error}')
88105
.format(name=self.database, query=query, error=e))

0 commit comments

Comments
 (0)