Skip to content

Commit a1809ae

Browse files
Add snowflake client
1 parent c7f2759 commit a1809ae

9 files changed

Lines changed: 315 additions & 70 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Changelog
2+
## v1.10.0 3/17/26
3+
- Add Snowflake client
4+
- Update config helper to allow loading config files without PLAINTEXT/ENCRYPTED structure
5+
- Update structured log helper to include name of the logger by default
6+
27
## v1.9.1 3/11/26
38
- Add merge_contextvars to default structlog configuration
49

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ This package contains common Python utility classes and functions.
1212
* Connecting to and querying a MySQL database
1313
* Connecting to and querying a PostgreSQL database
1414
* Connecting to and querying Redshift
15+
* Connecting to and querying Snowflake
1516
* Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra
1617
* Interacting with vendor APIs such as cloudLibrary
1718

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.9.1"
7+
version = "1.10.0"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -70,6 +70,10 @@ secrets-manager-client = [
7070
"boto3>=1.26.5",
7171
"botocore>=1.29.5"
7272
]
73+
snowflake-client = [
74+
"nypl_py_utils[log-helper]",
75+
"snowflake-connector-python>=4.3.0"
76+
]
7377
sftp-client = [
7478
"nypl_py_utils[log-helper]",
7579
"paramiko>=3.4.1"
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import snowflake.connector as sc
2+
3+
from nypl_py_utils.functions.log_helper import create_log
4+
5+
6+
class SnowflakeClient:
7+
"""Client for managing connections to Snowflake"""
8+
9+
def __init__(self, account, user, password, warehouse=None):
10+
self.logger = create_log('snowflake_client')
11+
self.conn = None
12+
self.account = account
13+
self.user = user
14+
self.password = password
15+
self.warehouse = warehouse
16+
17+
def connect(self, **kwargs):
18+
"""
19+
Connects to a Snowflake database using the given credentials. If
20+
warehouse parameter is None, uses the default warehouse for the user.
21+
22+
Parameters
23+
----------
24+
kwargs:
25+
All possible arguments (such as timeouts) can be found here:
26+
https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#connect
27+
"""
28+
self.logger.info('Connecting to Snowflake')
29+
try:
30+
self.conn = sc.connect(
31+
account=self.account,
32+
user=self.user,
33+
password=self.password,
34+
warehouse=self.warehouse,
35+
**kwargs)
36+
except Exception as e:
37+
raise SnowflakeClientError(
38+
f'Error connecting to Snowflake: {e}') from None
39+
40+
def execute_query(self, query, **kwargs):
41+
"""
42+
Executes an arbitrary query against the given database connection.
43+
44+
Note that:
45+
1) All results will be fetched by default, so this method is not
46+
suitable if you do not want to load all rows into memory
47+
2) AUTOCOMMIT is on by default, so this method is not suitable if
48+
you want to execute multiple queries in a single transaction
49+
3) This method can be used for both read and write queries, but
50+
it's not optimized for writing -- there is no parameter binding
51+
or executemany support, and the return value for write queries
52+
can be unpredictable.
53+
54+
Parameters
55+
----------
56+
kwargs:
57+
All possible arguments (such as timeouts) can be found here:
58+
https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#execute
59+
60+
Returns
61+
-------
62+
sequence
63+
A list of tuples
64+
"""
65+
self.logger.info('Querying database')
66+
cursor = self.conn.cursor()
67+
try:
68+
try:
69+
cursor.execute(query, **kwargs)
70+
return cursor.fetchall()
71+
except Exception:
72+
raise
73+
finally:
74+
cursor.close()
75+
except Exception as e:
76+
# If there was an error, also close the database connection
77+
self.close_connection()
78+
79+
short_q = str(query)
80+
if len(short_q) > 2500:
81+
short_q = short_q[:2497] + "..."
82+
raise SnowflakeClientError(
83+
f'Error executing Snowflake query {short_q}: {e}', self.logger
84+
) from None
85+
86+
def close_connection(self):
87+
"""Closes the connection"""
88+
self.logger.info('Closing Snowflake connection')
89+
self.conn.close()
90+
91+
92+
class SnowflakeClientError(Exception):
93+
def __init__(self, message='', logger=None):
94+
self.message = message
95+
if logger is not None:
96+
logger.error(message)
97+
98+
def __str__(self):
99+
return self.message

src/nypl_py_utils/functions/config_helper.py

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@
1010

1111
def load_env_file(run_type, file_string):
1212
"""
13-
This method loads a YAML config file containing environment variables,
14-
decrypts whichever are encrypted, and puts them all into os.environ as
15-
strings. For a YAML variable containing a list of values, the list is
16-
exported into os.environ as a json string and should be loaded as such.
13+
This method reads a YAML config file containing environment variables and
14+
loads them all into os.environ as strings. See _parse_yaml_dict for more.
1715
18-
It requires the YAML file to be split into a 'PLAINTEXT_VARIABLES' section
19-
and an 'ENCRYPTED_VARIABLES' section. See config/sample.yaml for an example
20-
config file.
16+
If the config file is divided into 'PLAINTEXT_VARIABLES' and
17+
'ENCRYPTED_VARIABLES' sections (see config/sample.yaml for an exmaple), the
18+
'ENCRYPTED_VARIABLES' variables will be decrypted first. Otherwise, all
19+
variables will be loaded as is.
2120
2221
Parameters
2322
----------
@@ -36,31 +35,53 @@ def load_env_file(run_type, file_string):
3635
try:
3736
env_dict = yaml.safe_load(env_stream)
3837
except yaml.YAMLError:
39-
logger.error('Invalid YAML file: {}'.format(open_file))
4038
raise ConfigHelperError(
4139
'Invalid YAML file: {}'.format(open_file)) from None
4240
except FileNotFoundError:
43-
logger.error('Could not find config file {}'.format(open_file))
4441
raise ConfigHelperError(
4542
'Could not find config file {}'.format(open_file)) from None
4643

4744
if env_dict:
48-
for key, value in env_dict.get('PLAINTEXT_VARIABLES', {}).items():
49-
if type(value) is list:
50-
os.environ[key] = json.dumps(value)
51-
else:
52-
os.environ[key] = str(value)
53-
54-
kms_client = KmsClient()
55-
for key, value in env_dict.get('ENCRYPTED_VARIABLES', {}).items():
56-
if type(value) is list:
57-
decrypted_list = [kms_client.decrypt(v) for v in value]
58-
os.environ[key] = json.dumps(decrypted_list)
59-
else:
60-
os.environ[key] = kms_client.decrypt(value)
61-
kms_client.close()
45+
if ('PLAINTEXT_VARIABLES' in env_dict
46+
or 'ENCRYPTED_VARIABLES' in env_dict):
47+
_parse_yaml_dict(env_dict.get('PLAINTEXT_VARIABLES', {}))
48+
49+
kms_client = KmsClient()
50+
_parse_yaml_dict(env_dict.get(
51+
'ENCRYPTED_VARIABLES', {}), kms_client)
52+
kms_client.close()
53+
else:
54+
_parse_yaml_dict(env_dict)
55+
56+
57+
def _parse_yaml_dict(yaml_dict, kms_client=None):
58+
"""
59+
Loads YAML dict into os.environ. All values are stored as strings to match
60+
how AWS Lambda environment variables are stored. For list variables, the
61+
list is exported into os.environ as a json string.
62+
63+
If kms_client is not empty, decrypts the variables first.
64+
65+
Does not allow for sub-dictionaries.
66+
"""
67+
for key, value in yaml_dict.items():
68+
if type(value) is dict:
69+
raise ConfigHelperError(
70+
'Found sub-dictionary in YAML config') from None
71+
elif type(value) is list:
72+
val = [kms_client.decrypt(v)
73+
for v in value] if kms_client else value
74+
os.environ[key] = json.dumps(val)
75+
else:
76+
val = kms_client.decrypt(value) if kms_client else value
77+
os.environ[key] = str(val)
6278

6379

6480
class ConfigHelperError(Exception):
65-
def __init__(self, message=None):
81+
def __init__(self, message='', logger=None):
6682
self.message = message
83+
if logger is not None:
84+
logger.error(message)
85+
86+
def __str__(self):
87+
return self.message

src/nypl_py_utils/functions/log_helper.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,37 @@
1212
}
1313

1414

15-
# Configure structlog to be machine-readable first and foremost
16-
# while still making it easy for humans to parse
17-
# End result (without additional bindings) is JSON like this:
18-
# {
19-
# "logger": "module param",
20-
# "message": "this is a test log event",
21-
# "level": "info",
22-
# "timestamp": "2023-11-01 18:50:47"
23-
# }
2415
def get_structlog(module):
25-
structlog.configure(
16+
"""
17+
Standard logging without additional bindings looks as follows:
18+
{
19+
"level": "info",
20+
"timestamp": "2026-01-01T12:00:00.613719Z",
21+
"logger": "module param",
22+
"message": "this is a test log event"
23+
}
24+
25+
Note that: 1) using bind_contextvars will bind variables to *all* loggers
26+
that have been created, and 2) you cannot use the same module name for a
27+
structlog and for a standard logger
28+
"""
29+
logger = logging.getLogger(module)
30+
logger.addHandler(logging.StreamHandler(sys.stdout))
31+
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO').upper())
32+
logger.propagate = False # Prevents double logging
33+
34+
return structlog.wrap_logger(
35+
logger,
2636
processors=[
2737
structlog.contextvars.merge_contextvars,
2838
structlog.processors.add_log_level,
2939
structlog.processors.TimeStamper(fmt='iso'),
40+
structlog.stdlib.add_logger_name,
3041
structlog.processors.EventRenamer('message'),
3142
structlog.processors.JSONRenderer(),
32-
],
33-
context_class=dict,
34-
logger_factory=structlog.PrintLoggerFactory(),
43+
]
3544
)
3645

37-
return structlog.get_logger(module)
38-
3946

4047
def standard_logger(module):
4148
logger = logging.getLogger(module)
@@ -58,7 +65,7 @@ def standard_logger(module):
5865

5966

6067
def create_log(module, json=False):
61-
if (json):
68+
if json:
6269
return get_structlog(module)
6370
else:
6471
return standard_logger(module)

0 commit comments

Comments
 (0)