Skip to content

Commit f9f5746

Browse files
Merge pull request #39 from NYPL/patron-data-methods
Add patron data methods and database connection retries
2 parents 30b18f4 + 9538159 commit f9f5746

14 files changed

Lines changed: 765 additions & 352 deletions

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
# Changelog
2+
## v1.6.0 11/20/24
3+
- Added patron_data_helper functions
4+
- Use executemany instead of execute when appropriate in PostgreSQLClient
5+
- Add capability to retry connecting to a database to the MySQL, PostgreSQL, and Redshift clients
6+
- Automatically close database connection upon error in the MySQL, PostgreSQL, and Redshift clients
7+
- Delete old PostgreSQLPoolClient, which was not production ready
8+
29
## v1.5.0 11/19/24
310
- Added cloudLibrary client
411

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ help:
99
@echo " lint project files using the flake8 linter"
1010

1111
test:
12-
pytest
12+
pytest -W ignore::FutureWarning
1313

1414
lint:
1515
flake8 --exclude *env

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ This package contains common Python utility classes and functions.
1111
* Downloading files from a remote SSH SFTP server
1212
* Connecting to and querying a MySQL database
1313
* Connecting to and querying a PostgreSQL database
14-
* Connecting to and querying a PostgreSQL database using a connection pool
1514
* Connecting to and querying Redshift
1615
* Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra
1716
* Interacting with vendor APIs such as cloudLibrary
@@ -21,6 +20,7 @@ This package contains common Python utility classes and functions.
2120
* Creating a logger in the appropriate format
2221
* Obfuscating a value using bcrypt
2322
* Parsing/building Research Catalog identifiers
23+
* Mapping between barcodes and Sierra patron ids plus getting patron data from Sierra and Redshift using those ids
2424

2525
## Usage
2626
```python
@@ -38,7 +38,7 @@ kinesis_client = KinesisClient(...)
3838
# Do not use any version below 1.0.0
3939
# All available optional dependencies can be found in pyproject.toml.
4040
# See the "Managing dependencies" section below for more details.
41-
nypl-py-utils[kinesis-client,config-helper]==1.5.0
41+
nypl-py-utils[kinesis-client,config-helper]==1.x.y
4242
```
4343

4444
## Developing locally
@@ -64,7 +64,7 @@ The optional dependency sets also give the developer the option to manually list
6464
### Using PostgreSQLClient in an AWS Lambda
6565
Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows:
6666
```bash
67-
pip install --target ./package nypl-py-utils[postgresql-client]==1.5.0
67+
pip install --target ./package nypl-py-utils[postgresql-client]==1.x.y
6868

6969
pip install \
7070
--platform manylinux2014_x86_64 \

pyproject.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.5.0"
7+
version = "1.6.0"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -48,9 +48,6 @@ oauth2-api-client = [
4848
postgresql-client = [
4949
"psycopg[binary]>=3.1.6"
5050
]
51-
postgresql-pool-client = [
52-
"psycopg[binary,pool]>=3.1.6"
53-
]
5451
redshift-client = [
5552
"botocore>=1.29.5",
5653
"redshift-connector>=2.0.909"
@@ -73,11 +70,15 @@ config-helper = [
7370
obfuscation-helper = [
7471
"bcrypt>=4.0.1"
7572
]
73+
patron-data-helper = [
74+
"nypl_py_utils[postgresql-client,redshift-client]>=1.1.5",
75+
"pandas>=2.2.2"
76+
]
7677
research-catalog-identifier-helper = [
7778
"requests>=2.28.1"
7879
]
7980
development = [
80-
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]",
81+
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]",
8182
"flake8>=6.0.0",
8283
"freezegun>=1.2.2",
8384
"mock>=4.0.3",

src/nypl_py_utils/classes/mysql_client.py

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import mysql.connector
2+
import time
23

34
from nypl_py_utils.functions.log_helper import create_log
45

@@ -15,35 +16,49 @@ def __init__(self, host, port, database, user, password):
1516
self.user = user
1617
self.password = password
1718

18-
def connect(self, **kwargs):
19+
def connect(self, retry_count=0, backoff_factor=5, **kwargs):
1920
"""
2021
Connects to a MySQL database using the given credentials.
2122
22-
Keyword args can be passed into the connection to set certain options.
23-
All possible arguments can be found here:
24-
https://dev.mysql.com/doc/connector-python/en/connector-python-connectargs.html.
25-
26-
Common arguments include:
27-
autocommit: bool
28-
Whether to automatically commit each query rather than running
29-
them as part of a transaction. By default False.
23+
Parameters
24+
----------
25+
retry_count: int, optional
26+
The number of times to retry connecting before throwing an error.
27+
By default no retry occurs.
28+
backoff_factor: int, optional
29+
The backoff factor when retrying. The amount of time to wait before
30+
retrying is backoff_factor ** number_of_retries_made.
31+
kwargs:
32+
All possible arguments can be found here:
33+
https://dev.mysql.com/doc/connector-python/en/connector-python-connectargs.html
3034
"""
3135
self.logger.info('Connecting to {} database'.format(self.database))
32-
try:
33-
self.conn = mysql.connector.connect(
34-
host=self.host,
35-
port=self.port,
36-
database=self.database,
37-
user=self.user,
38-
password=self.password,
39-
**kwargs)
40-
except mysql.connector.Error as e:
41-
self.logger.error(
42-
'Error connecting to {name} database: {error}'.format(
43-
name=self.database, error=e))
44-
raise MySQLClientError(
45-
'Error connecting to {name} database: {error}'.format(
46-
name=self.database, error=e)) from None
36+
attempt_count = 0
37+
while attempt_count <= retry_count:
38+
try:
39+
try:
40+
self.conn = mysql.connector.connect(
41+
host=self.host,
42+
port=self.port,
43+
database=self.database,
44+
user=self.user,
45+
password=self.password,
46+
**kwargs)
47+
break
48+
except (mysql.connector.Error):
49+
if attempt_count < retry_count:
50+
self.logger.info('Failed to connect -- retrying')
51+
time.sleep(backoff_factor ** attempt_count)
52+
attempt_count += 1
53+
else:
54+
raise
55+
except Exception as e:
56+
self.logger.error(
57+
'Error connecting to {name} database: {error}'.format(
58+
name=self.database, error=e))
59+
raise MySQLClientError(
60+
'Error connecting to {name} database: {error}'.format(
61+
name=self.database, error=e)) from None
4762

4863
def execute_query(self, query, query_params=None, **kwargs):
4964
"""
@@ -83,6 +98,8 @@ def execute_query(self, query, query_params=None, **kwargs):
8398
return cursor.fetchall()
8499
except Exception as e:
85100
self.conn.rollback()
101+
cursor.close()
102+
self.close_connection()
86103
self.logger.error(
87104
('Error executing {name} database query \'{query}\': {error}')
88105
.format(name=self.database, query=query, error=e))

src/nypl_py_utils/classes/postgresql_client.py

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,59 @@
11
import psycopg
2+
import time
23

34
from nypl_py_utils.functions.log_helper import create_log
45

56

67
class PostgreSQLClient:
78
"""Client for managing individual connections to a PostgreSQL database"""
89

9-
def __init__(self, host, port, db_name, user, password):
10+
def __init__(self, host, port, database, user, password):
1011
self.logger = create_log('postgresql_client')
1112
self.conn = None
1213
self.conn_info = ('postgresql://{user}:{password}@{host}:{port}/'
13-
'{db_name}').format(user=user, password=password,
14-
host=host, port=port,
15-
db_name=db_name)
14+
'{database}').format(user=user, password=password,
15+
host=host, port=port,
16+
database=database)
17+
self.database = database
1618

17-
self.db_name = db_name
18-
19-
def connect(self, **kwargs):
19+
def connect(self, retry_count=0, backoff_factor=5, **kwargs):
2020
"""
2121
Connects to a PostgreSQL database using the given credentials.
2222
23-
Keyword args can be passed into the connection to set certain options.
24-
All possible arguments can be found here:
25-
https://www.psycopg.org/psycopg3/docs/api/connections.html#psycopg.Connection.connect.
26-
27-
Common arguments include:
28-
autocommit: bool
29-
Whether to automatically commit each query rather than running
30-
them as part of a transaction. By default False.
31-
row_factory: RowFactory
32-
A psycopg RowFactory that determines how the data will be
33-
returned. Defaults to tuple_row, which returns the rows as a
34-
list of tuples.
23+
Parameters
24+
----------
25+
retry_count: int, optional
26+
The number of times to retry connecting before throwing an error.
27+
By default no retry occurs.
28+
backoff_factor: int, optional
29+
The backoff factor when retrying. The amount of time to wait before
30+
retrying is backoff_factor ** number_of_retries_made.
31+
kwargs:
32+
All possible arguments (such as the row_factory) can be found here:
33+
https://www.psycopg.org/psycopg3/docs/api/connections.html#psycopg.Connection.connect
3534
"""
36-
self.logger.info('Connecting to {} database'.format(self.db_name))
37-
try:
38-
self.conn = psycopg.connect(self.conn_info, **kwargs)
39-
except psycopg.Error as e:
40-
self.logger.error(
41-
'Error connecting to {name} database: {error}'.format(
42-
name=self.db_name, error=e))
43-
raise PostgreSQLClientError(
44-
'Error connecting to {name} database: {error}'.format(
45-
name=self.db_name, error=e)) from None
35+
self.logger.info('Connecting to {} database'.format(self.database))
36+
attempt_count = 0
37+
while attempt_count <= retry_count:
38+
try:
39+
try:
40+
self.conn = psycopg.connect(self.conn_info, **kwargs)
41+
break
42+
except (psycopg.OperationalError,
43+
psycopg.errors.ConnectionTimeout):
44+
if attempt_count < retry_count:
45+
self.logger.info('Failed to connect -- retrying')
46+
time.sleep(backoff_factor ** attempt_count)
47+
attempt_count += 1
48+
else:
49+
raise
50+
except Exception as e:
51+
self.logger.error(
52+
'Error connecting to {name} database: {error}'.format(
53+
name=self.database, error=e))
54+
raise PostgreSQLClientError(
55+
'Error connecting to {name} database: {error}'.format(
56+
name=self.database, error=e)) from None
4657

4758
def execute_query(self, query, query_params=None, **kwargs):
4859
"""
@@ -53,7 +64,11 @@ def execute_query(self, query, query_params=None, **kwargs):
5364
query: str
5465
The query to execute
5566
query_params: sequence, optional
56-
The values to be used in a parameterized query
67+
The values to be used in a parameterized query. The values can be
68+
for a single insert query -- e.g. execute_query(
69+
"INSERT INTO x VALUES (%s, %s)", (1, "a"))
70+
or for multiple -- e.g execute_transaction(
71+
"INSERT INTO x VALUES (%s, %s)", [(1, "a"), (2, "b")])
5772
kwargs:
5873
All possible arguments can be found here:
5974
https://www.psycopg.org/psycopg3/docs/api/cursors.html#psycopg.Cursor.execute
@@ -65,30 +80,38 @@ def execute_query(self, query, query_params=None, **kwargs):
6580
based on the connection's row_factory if there's something to
6681
return (even if the result set is empty).
6782
"""
68-
self.logger.info('Querying {} database'.format(self.db_name))
83+
self.logger.info('Querying {} database'.format(self.database))
6984
self.logger.debug('Executing query {}'.format(query))
7085
try:
7186
cursor = self.conn.cursor()
72-
cursor.execute(query, query_params, **kwargs)
87+
if query_params is not None and all(
88+
isinstance(param, tuple) or isinstance(param, list)
89+
for param in query_params
90+
):
91+
cursor.executemany(query, query_params, **kwargs)
92+
else:
93+
cursor.execute(query, query_params, **kwargs)
7394
self.conn.commit()
7495
return None if cursor.description is None else cursor.fetchall()
7596
except Exception as e:
7697
self.conn.rollback()
98+
cursor.close()
99+
self.close_connection()
77100
self.logger.error(
78101
('Error executing {name} database query \'{query}\': '
79102
'{error}').format(
80-
name=self.db_name, query=query, error=e))
103+
name=self.database, query=query, error=e))
81104
raise PostgreSQLClientError(
82105
('Error executing {name} database query \'{query}\': '
83106
'{error}').format(
84-
name=self.db_name, query=query, error=e)) from None
107+
name=self.database, query=query, error=e)) from None
85108
finally:
86109
cursor.close()
87110

88111
def close_connection(self):
89112
"""Closes the database connection"""
90113
self.logger.debug('Closing {} database connection'.format(
91-
self.db_name))
114+
self.database))
92115
self.conn.close()
93116

94117

0 commit comments

Comments
 (0)