Skip to content

Commit 2b70e38

Browse files
Merge pull request #11 from NYPL/auto-close-pg-connections
Separate pooling and regular PostgreSQL clients
2 parents a6f7228 + 031c778 commit 2b70e38

19 files changed

Lines changed: 461 additions & 225 deletions

.github/workflows/run-unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
run: |
2929
python -m pip install --upgrade pip
3030
pip install .
31-
pip install '.[tests]'
31+
pip install '.[development]'
3232
3333
- name: Run linter and test suite
3434
run: |

CHANGELOG.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Changelog
22

3-
## v0.0.9 - 3/16/23
3+
## v1.0.0 - 3/22/23
44
- Improve Oauth2ApiClient token refresh and method responses
5-
6-
## v0.0.8 - 3/3/23
7-
- Pass in all kwargs from PostgreSQLClient to ConnectionPool so that all ConnectionPool settings
8-
can be set from the wrapper
5+
- Create separate PostgreSQLClient and PostgreSQLPoolClient classes
6+
- Update PostgreSQL and MySQL clients to accept write queries implicitly
7+
- Update RedshiftClient to ensure SSL is being used
8+
- Separate dependencies to slim down package installation
99

1010
## v0.0.7 - 3/1/23
1111
- Added Oauth2ApiClient for oauth2 authenticated calls to our Platform API and Sierra

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ This package contains common Python utility classes and functions.
88
* Decrypting values with KMS
99
* Encoding and decoding records using a given Avro schema
1010
* Connecting to and querying a MySQL database
11+
* Connecting to and querying a PostgreSQL database
1112
* Connecting to and querying a PostgreSQL database using a connection pool
1213
* Connecting to and querying Redshift
1314
* Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra
@@ -25,11 +26,16 @@ python3 -m venv testenv
2526
source testenv/bin/activate
2627
pip install --upgrade pip
2728
pip install .
28-
pip install '.[tests]'
29+
pip install '.[development]'
2930
deactivate && source testenv/bin/activate
3031
```
3132

32-
Add any new dependencies required by code in the `nypl_py_utils` directory to the `dependencies` section of `pyproject.toml`. Add dependencies only required by code in the `tests` directory to the `[project.optional-dependencies]` section.
33+
## Managing dependencies
34+
In order to prevent dependency bloat, this package has no required dependencies. Instead, each class and helper file has its own optional dependency set. For instance, if an app needs to use the KMS client and the obfuscation helper, it should add `nypl-py-utils[kms-client, obfuscation-helper]` to the app's requirements. This way, only the required dependencies are installed.
35+
36+
When a new client or helper file is created, a new optional dependency set should be added to `pyproject.toml`. The `development` dependency set, which includes all the dependencies required by all of the classes and tests, should also be updated.
37+
38+
The optional dependency sets also give the developer the option to manually list out the dependencies of the clients rather than relying upon what the package thinks is required, which can be beneficial in certain circumstances. For instance, AWS lambda functions come with `boto3` and `botocore` pre-installed, so it's not necessary to include these (rather hefty) dependencies in the lambda deployment package.
3339

3440
### Troubleshooting
3541
If running `main.py` in this virtual environment produces the following error:

pyproject.toml

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "0.0.8"
7+
version = "1.0.0"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -16,30 +16,59 @@ classifiers = [
1616
"License :: OSI Approved :: MIT License",
1717
"Operating System :: OS Independent",
1818
]
19-
dependencies = [
19+
dependencies = []
20+
21+
[project.urls]
22+
"Homepage" = "https://github.com/NYPL/python-utils"
23+
"Bug Tracker" = "https://github.com/NYPL/python-utils/issues"
24+
25+
[project.optional-dependencies]
26+
avro-encoder = [
2027
"avro>=1.11.1",
21-
"bcrypt>=4.0.1",
28+
"requests>=2.28.1"
29+
]
30+
kinesis-client = [
2231
"boto3>=1.26.5",
23-
"botocore>=1.29.5",
24-
"mysql-connector-python>=8.0.32",
32+
"botocore>=1.29.5"
33+
]
34+
kms-client = [
35+
"boto3>=1.26.5",
36+
"botocore>=1.29.5"
37+
]
38+
mysql-client = [
39+
"mysql-connector-python>=8.0.32"
40+
]
41+
oauth2-api-client = [
2542
"oauthlib>=3.2.2",
26-
"psycopg[binary,pool]>=3.1.6",
27-
"PyYAML>=6.0",
28-
"redshift-connector>=2.0.909",
29-
"requests>=2.28.1",
3043
"requests_oauthlib>=1.3.1"
3144
]
32-
33-
[project.optional-dependencies]
34-
tests = [
45+
postgresql-client = [
46+
"psycopg[binary]>=3.1.6"
47+
]
48+
postgresql-pool-client = [
49+
"psycopg[binary,pool]>=3.1.6"
50+
]
51+
redshift-client = [
52+
"botocore>=1.29.5",
53+
"redshift-connector>=2.0.909"
54+
]
55+
s3-client = [
56+
"boto3>=1.26.5",
57+
"botocore>=1.29.5"
58+
]
59+
config-helper = [
60+
"nypl_py_utils[kms-client]",
61+
"PyYAML>=6.0"
62+
]
63+
obfuscation-helper = [
64+
"bcrypt>=4.0.1"
65+
]
66+
development = [
67+
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper]",
3568
"flake8>=6.0.0",
3669
"freezegun>=1.2.2",
3770
"mock>=4.0.3",
3871
"pytest>=7.2.0",
3972
"pytest-mock>=3.10.0",
4073
"requests-mock>=1.10.0"
4174
]
42-
43-
[project.urls]
44-
"Homepage" = "https://github.com/NYPL/python-utils"
45-
"Bug Tracker" = "https://github.com/NYPL/python-utils/issues"

src/nypl_py_utils/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +0,0 @@
1-
from .classes.avro_encoder import AvroEncoder, AvroEncoderError # noqa
2-
from .classes.kinesis_client import KinesisClient, KinesisClientError # noqa
3-
from .classes.kms_client import KmsClient, KmsClientError # noqa
4-
from .classes.mysql_client import MySQLClient, MySQLClientError # noqa
5-
from .classes.oauth2_api_client import Oauth2ApiClient, Oauth2ApiClientError # noqa
6-
from .classes.postgresql_client import PostgreSQLClient, PostgreSQLClientError # noqa
7-
from .classes.redshift_client import RedshiftClient, RedshiftClientError # noqa
8-
from .classes.s3_client import S3Client, S3ClientError # noqa

src/nypl_py_utils/classes/mysql_client.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,28 @@ def __init__(self, host, port, database, user, password):
1515
self.user = user
1616
self.password = password
1717

18-
def connect(self):
19-
"""Connects to a MySQL database using the given credentials"""
18+
def connect(self, **kwargs):
19+
"""
20+
Connects to a MySQL database using the given credentials.
21+
22+
Keyword args can be passed into the connection to set certain options.
23+
All possible arguments can be found here:
24+
https://dev.mysql.com/doc/connector-python/en/connector-python-connectargs.html.
25+
26+
Common arguments include:
27+
autocommit: bool
28+
Whether to automatically commit each query rather than running
29+
them as part of a transaction. By default False.
30+
"""
2031
self.logger.info('Connecting to {} database'.format(self.database))
2132
try:
2233
self.conn = mysql.connector.connect(
2334
host=self.host,
2435
port=self.port,
2536
database=self.database,
2637
user=self.user,
27-
password=self.password)
38+
password=self.password,
39+
**kwargs)
2840
except mysql.connector.Error as e:
2941
self.logger.error(
3042
'Error connecting to {name} database: {error}'.format(
@@ -33,37 +45,38 @@ def connect(self):
3345
'Error connecting to {name} database: {error}'.format(
3446
name=self.database, error=e)) from None
3547

36-
def execute_query(self, query, is_write_query=False, query_params=None,
37-
dictionary=False):
48+
def execute_query(self, query, query_params=None, **kwargs):
3849
"""
3950
Executes an arbitrary query against the given database connection.
4051
4152
Parameters
4253
----------
4354
query: str
4455
The query to execute
45-
is_write_query: bool, optional
46-
Whether or not the query is writing to the database, in which case
47-
the transaction needs to be committed and None should be returned
4856
query_params: sequence, optional
4957
The values to be used in a parameterized query
50-
dictionary: bool, optional
51-
Whether the data will be returned as a dictionary. Defaults to
52-
False, which means the data is returned as a list of tuples.
58+
kwargs:
59+
All possible arguments can be found here:
60+
https://dev.mysql.com/doc/connector-python/en/connector-python-api-mysqlconnection-cursor.html.
61+
62+
Common arguments include:
63+
dictionary: bool
64+
Whether the data will be returned as a dictionary. Defaults
65+
to False, meaning the data is returned as a list of tuples.
5366
5467
Returns
5568
-------
5669
None or sequence
57-
None if is_write_query is True. A list of either tuples or
58-
dictionaries (based on the dictionary input) if is_write_query is
59-
False.
70+
None if the cursor has nothing to return. A list of either tuples
71+
or dictionaries (based on the dictionary input) if there's
72+
something to return (even if the result set is empty).
6073
"""
6174
self.logger.info('Querying {} database'.format(self.database))
6275
self.logger.debug('Executing query {}'.format(query))
6376
try:
64-
cursor = self.conn.cursor(dictionary=dictionary)
77+
cursor = self.conn.cursor(**kwargs)
6578
cursor.execute(query, query_params)
66-
if is_write_query:
79+
if cursor.description is None:
6780
self.conn.commit()
6881
return None
6982
else:

src/nypl_py_utils/classes/postgresql_client.py

Lines changed: 48 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,41 @@
11
import psycopg
22

33
from nypl_py_utils.functions.log_helper import create_log
4-
from psycopg.rows import tuple_row
5-
from psycopg_pool import ConnectionPool
64

75

86
class PostgreSQLClient:
9-
"""
10-
Client for managing connections to a PostgreSQL database (such as Sierra)
11-
"""
7+
"""Client for managing individual connections to a PostgreSQL database"""
128

13-
def __init__(self, host, port, db_name, user, password, **kwargs):
9+
def __init__(self, host, port, db_name, user, password):
1410
self.logger = create_log('postgresql_client')
15-
self.db_name = db_name
16-
self.timeout = kwargs.get('timeout', 300)
17-
11+
self.conn = None
1812
self.conn_info = ('postgresql://{user}:{password}@{host}:{port}/'
1913
'{db_name}').format(user=user, password=password,
2014
host=host, port=port,
2115
db_name=db_name)
22-
self.kwargs = kwargs
23-
self.kwargs['min_size'] = kwargs.get('min_size', 0)
24-
self.kwargs['max_size'] = kwargs.get('max_size', 1)
25-
self.pool = ConnectionPool(self.conn_info, open=False, **self.kwargs)
2616

27-
def connect(self):
17+
self.db_name = db_name
18+
19+
def connect(self, **kwargs):
2820
"""
29-
Opens the connection pool and connects to the given PostgreSQL database
30-
min_size number of times.
21+
Connects to a PostgreSQL database using the given credentials.
22+
23+
Keyword args can be passed into the connection to set certain options.
24+
All possible arguments can be found here:
25+
https://www.psycopg.org/psycopg3/docs/api/connections.html#psycopg.Connection.connect.
26+
27+
Common arguments include:
28+
autocommit: bool
29+
Whether to automatically commit each query rather than running
30+
them as part of a transaction. By default False.
31+
row_factory: RowFactory
32+
A psycopg RowFactory that determines how the data will be
33+
returned. Defaults to tuple_row, which returns the rows as a
34+
list of tuples.
3135
"""
3236
self.logger.info('Connecting to {} database'.format(self.db_name))
3337
try:
34-
if self.pool is None:
35-
self.pool = ConnectionPool(
36-
self.conn_info, open=False, **self.kwargs)
37-
self.pool.open(wait=True, timeout=self.timeout)
38+
self.conn = psycopg.connect(self.conn_info, **kwargs)
3839
except psycopg.Error as e:
3940
self.logger.error(
4041
'Error connecting to {name} database: {error}'.format(
@@ -43,59 +44,52 @@ def connect(self):
4344
'Error connecting to {name} database: {error}'.format(
4445
name=self.db_name, error=e)) from None
4546

46-
def execute_query(self, query, is_write_query=False, query_params=None,
47-
row_factory=tuple_row):
47+
def execute_query(self, query, query_params=None, **kwargs):
4848
"""
49-
Requests a connection from the pool and uses it to execute an arbitrary
50-
query. After the query is complete, returns the connection to the pool.
49+
Executes an arbitrary query against the given database connection.
5150
5251
Parameters
5352
----------
5453
query: str
5554
The query to execute
56-
is_write_query: bool, optional
57-
Whether or not the query is writing to the database, in which case
58-
the transaction needs to be committed and None should be returned
5955
query_params: sequence, optional
6056
The values to be used in a parameterized query
61-
row_factory: RowFactory, optional
62-
A psycopg RowFactory that determines how the data will be returned.
63-
Defaults to tuple_row, which returns the rows as a list of tuples.
57+
kwargs:
58+
All possible arguments can be found here:
59+
https://www.psycopg.org/psycopg3/docs/api/cursors.html#psycopg.Cursor.execute
6460
6561
Returns
6662
-------
6763
None or sequence
68-
None if is_write_query is True. Some type of sequence based on
69-
the row_factory input if is_write_query is False.
64+
None if the cursor has nothing to return. Some type of sequence
65+
based on the connection's row_factory if there's something to
66+
return (even if the result set is empty).
7067
"""
7168
self.logger.info('Querying {} database'.format(self.db_name))
7269
self.logger.debug('Executing query {}'.format(query))
73-
with self.pool.connection() as conn:
74-
try:
75-
conn.row_factory = row_factory
76-
cursor = conn.execute(query, query_params)
77-
if is_write_query:
78-
conn.commit()
79-
return None
80-
else:
81-
return cursor.fetchall()
82-
except Exception as e:
83-
conn.rollback()
84-
self.logger.error(
85-
('Error executing {name} database query \'{query}\': '
86-
'{error}').format(
87-
name=self.db_name, query=query, error=e))
88-
raise PostgreSQLClientError(
89-
('Error executing {name} database query \'{query}\': '
90-
'{error}').format(
91-
name=self.db_name, query=query, error=e)) from None
70+
try:
71+
cursor = self.conn.cursor()
72+
cursor.execute(query, query_params, **kwargs)
73+
self.conn.commit()
74+
return None if cursor.description is None else cursor.fetchall()
75+
except Exception as e:
76+
self.conn.rollback()
77+
self.logger.error(
78+
('Error executing {name} database query \'{query}\': '
79+
'{error}').format(
80+
name=self.db_name, query=query, error=e))
81+
raise PostgreSQLClientError(
82+
('Error executing {name} database query \'{query}\': '
83+
'{error}').format(
84+
name=self.db_name, query=query, error=e)) from None
85+
finally:
86+
cursor.close()
9287

9388
def close_connection(self):
94-
"""Closes the connection pool"""
89+
"""Closes the database connection"""
9590
self.logger.debug('Closing {} database connection'.format(
9691
self.db_name))
97-
self.pool.close()
98-
self.pool = None
92+
self.conn.close()
9993

10094

10195
class PostgreSQLClientError(Exception):

0 commit comments

Comments
 (0)