Skip to content

Commit fe99672

Browse files
Merge pull request #56 from NYPL/snowflake-client
Add SnowflakeClient
2 parents 4d7025a + 8408520 commit fe99672

File tree

12 files changed

+376
-76
lines changed

12 files changed

+376
-76
lines changed

.github/workflows/deploy-production.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ jobs:
3232
- name: Checkout repo
3333
uses: actions/checkout@v6
3434

35-
- name: Set up Python 3.9
35+
- name: Set up Python 3.13
3636
uses: actions/setup-python@v6
3737
with:
38-
python-version: '3.9'
38+
python-version: '3.13'
3939
cache: 'pip'
4040
cache-dependency-path: 'pyproject.toml'
4141

.github/workflows/deploy-qa.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ jobs:
3232
- name: Checkout repo
3333
uses: actions/checkout@v6
3434

35-
- name: Set up Python 3.9
35+
- name: Set up Python 3.13
3636
uses: actions/setup-python@v6
3737
with:
38-
python-version: '3.9'
38+
python-version: '3.13'
3939
cache: 'pip'
4040
cache-dependency-path: 'pyproject.toml'
4141

.github/workflows/run-unit-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ jobs:
1717
- name: Checkout repo
1818
uses: actions/checkout@v6
1919

20-
- name: Set up Python 3.9
20+
- name: Set up Python 3.13
2121
uses: actions/setup-python@v6
2222
with:
23-
python-version: '3.9'
23+
python-version: '3.13'
2424
cache: 'pip'
2525
cache-dependency-path: 'pyproject.toml'
2626

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Changelog
2+
## v1.10.0 3/17/26
3+
- Add Snowflake client
4+
- Update config helper to allow loading config files without PLAINTEXT/ENCRYPTED structure
5+
- Update structured log helper to include name of the logger by default
6+
27
## v1.9.1 3/11/26
38
- Add merge_contextvars to default structlog configuration
49

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ This package contains common Python utility classes and functions.
1212
* Connecting to and querying a MySQL database
1313
* Connecting to and querying a PostgreSQL database
1414
* Connecting to and querying Redshift
15+
* Connecting to and querying Snowflake
1516
* Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra
1617
* Interacting with vendor APIs such as cloudLibrary
1718

pyproject.toml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.9.1"
7+
version = "1.10.0"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -74,6 +74,10 @@ sftp-client = [
7474
"nypl_py_utils[log-helper]",
7575
"paramiko>=3.4.1"
7676
]
77+
snowflake-client = [
78+
"nypl_py_utils[log-helper]",
79+
"snowflake-connector-python>=4.3.0"
80+
]
7781
config-helper = [
7882
"nypl_py_utils[kms-client,log-helper]",
7983
"PyYAML>=6.0"
@@ -93,11 +97,11 @@ research-catalog-identifier-helper = [
9397
"requests>=2.28.1"
9498
]
9599
development = [
96-
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]",
100+
"nypl_py_utils[avro-client,cloudlibrary-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,snowflake-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]",
97101
"flake8>=6.0.0",
98102
"freezegun>=1.2.2",
99103
"mock>=4.0.3",
100-
"pytest==8.0",
104+
"pytest>=8.0.0",
101105
"pytest-mock>=3.10.0",
102106
"requests-mock>=1.10.0"
103107
]
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import snowflake.connector as sc
2+
3+
from nypl_py_utils.functions.log_helper import create_log
4+
5+
6+
class SnowflakeClient:
7+
"""Client for managing connections to Snowflake"""
8+
9+
def __init__(self, account, user, private_key=None, password=None):
10+
self.logger = create_log('snowflake_client')
11+
if (password is None) == (private_key is None):
12+
raise SnowflakeClientError(
13+
'Either password or private key must be set (but not both)',
14+
self.logger
15+
) from None
16+
17+
self.conn = None
18+
self.account = account
19+
self.user = user
20+
self.private_key = private_key
21+
self.password = password
22+
23+
def connect(self, mfa_code=None, **kwargs):
24+
"""
25+
Connects to Snowflake using the given credentials. If you're connecting
26+
locally, you should be using the password and mfa_code. If the
27+
connection is for production code, a private_key should be set up.
28+
29+
Parameters
30+
----------
31+
mfa_code: str, optional
32+
The six-digit MFA code. Only necessary for connecting as a human
33+
user.
34+
kwargs:
35+
All possible arguments (such as which warehouse to use or how
36+
long to wait before timing out) can be found here:
37+
https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#connect
38+
"""
39+
self.logger.info('Connecting to Snowflake')
40+
if self.private_key is not None:
41+
try:
42+
self.conn = sc.connect(
43+
account=self.account,
44+
user=self.user,
45+
private_key=self.private_key,
46+
**kwargs)
47+
except Exception as e:
48+
raise SnowflakeClientError(
49+
f'Error connecting to Snowflake: {e}', self.logger
50+
) from None
51+
else:
52+
if mfa_code is None:
53+
raise SnowflakeClientError(
54+
'When using a password, an MFA code must also be provided',
55+
self.logger
56+
) from None
57+
58+
pw = self.password + mfa_code
59+
try:
60+
self.conn = sc.connect(
61+
account=self.account,
62+
user=self.user,
63+
password=pw,
64+
passcode_in_password=True,
65+
**kwargs)
66+
except Exception as e:
67+
raise SnowflakeClientError(
68+
f'Error connecting to Snowflake: {e}', self.logger
69+
) from None
70+
71+
def execute_query(self, query, **kwargs):
72+
"""
73+
Executes an arbitrary query against the given connection.
74+
75+
Note that:
76+
1) All results will be fetched by default, so this method is not
77+
suitable if you do not want to load all rows into memory
78+
2) AUTOCOMMIT is on by default, so this method is not suitable if
79+
you want to execute multiple queries in a single transaction
80+
3) This method can be used for both read and write queries, but
81+
it's not optimized for writing -- there is no parameter binding
82+
or executemany support, and the return value for write queries
83+
can be unpredictable.
84+
85+
Parameters
86+
----------
87+
query: str
88+
The SQL query to execute
89+
kwargs:
90+
All possible arguments (such as timeouts) can be found here:
91+
https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#execute
92+
93+
Returns
94+
-------
95+
sequence
96+
A list of tuples
97+
"""
98+
self.logger.info('Querying Snowflake')
99+
cursor = self.conn.cursor()
100+
try:
101+
try:
102+
cursor.execute(query, **kwargs)
103+
return cursor.fetchall()
104+
except Exception:
105+
raise
106+
finally:
107+
cursor.close()
108+
except Exception as e:
109+
# If there was an error, also close the connection
110+
self.close_connection()
111+
112+
short_q = str(query)
113+
if len(short_q) > 2500:
114+
short_q = short_q[:2497] + '...'
115+
raise SnowflakeClientError(
116+
f'Error executing Snowflake query {short_q}: {e}', self.logger
117+
) from None
118+
119+
def close_connection(self):
120+
"""Closes the connection"""
121+
self.logger.info('Closing Snowflake connection')
122+
self.conn.close()
123+
124+
125+
class SnowflakeClientError(Exception):
126+
def __init__(self, message='', logger=None):
127+
self.message = message
128+
if logger is not None:
129+
logger.error(message)
130+
131+
def __str__(self):
132+
return self.message

src/nypl_py_utils/functions/config_helper.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@
1010

1111
def load_env_file(run_type, file_string):
1212
"""
13-
This method loads a YAML config file containing environment variables,
14-
decrypts whichever are encrypted, and puts them all into os.environ as
15-
strings. For a YAML variable containing a list of values, the list is
16-
exported into os.environ as a json string and should be loaded as such.
13+
This method reads a YAML config file containing environment variables and
14+
loads them all into os.environ as strings. See _parse_yaml_dict for more.
1715
18-
It requires the YAML file to be split into a 'PLAINTEXT_VARIABLES' section
19-
and an 'ENCRYPTED_VARIABLES' section. See config/sample.yaml for an example
20-
config file.
16+
If the config file is divided into 'PLAINTEXT_VARIABLES' and
17+
'ENCRYPTED_VARIABLES' sections (see config/sample.yaml for an exmaple), the
18+
'ENCRYPTED_VARIABLES' variables will be decrypted first. Otherwise, all
19+
variables will be loaded as is.
2120
2221
Parameters
2322
----------
@@ -36,31 +35,50 @@ def load_env_file(run_type, file_string):
3635
try:
3736
env_dict = yaml.safe_load(env_stream)
3837
except yaml.YAMLError:
39-
logger.error('Invalid YAML file: {}'.format(open_file))
4038
raise ConfigHelperError(
4139
'Invalid YAML file: {}'.format(open_file)) from None
4240
except FileNotFoundError:
43-
logger.error('Could not find config file {}'.format(open_file))
4441
raise ConfigHelperError(
4542
'Could not find config file {}'.format(open_file)) from None
4643

4744
if env_dict:
48-
for key, value in env_dict.get('PLAINTEXT_VARIABLES', {}).items():
49-
if type(value) is list:
50-
os.environ[key] = json.dumps(value)
51-
else:
52-
os.environ[key] = str(value)
45+
if ('PLAINTEXT_VARIABLES' in env_dict
46+
or 'ENCRYPTED_VARIABLES' in env_dict):
47+
_parse_yaml_dict(env_dict.get('PLAINTEXT_VARIABLES', {}))
5348

54-
kms_client = KmsClient()
55-
for key, value in env_dict.get('ENCRYPTED_VARIABLES', {}).items():
56-
if type(value) is list:
57-
decrypted_list = [kms_client.decrypt(v) for v in value]
58-
os.environ[key] = json.dumps(decrypted_list)
59-
else:
60-
os.environ[key] = kms_client.decrypt(value)
61-
kms_client.close()
49+
kms_client = KmsClient()
50+
_parse_yaml_dict(env_dict.get(
51+
'ENCRYPTED_VARIABLES', {}), kms_client)
52+
kms_client.close()
53+
else:
54+
_parse_yaml_dict(env_dict)
55+
56+
57+
def _parse_yaml_dict(yaml_dict, kms_client=None):
58+
"""
59+
Loads YAML dict into os.environ. All values are stored as strings to match
60+
how AWS Lambda environment variables are stored. For list variables, the
61+
list is exported into os.environ as a json string.
62+
63+
If kms_client is not empty, decrypts the variables first.
64+
65+
Does not allow for sub-dictionaries.
66+
"""
67+
for key, value in yaml_dict.items():
68+
if type(value) is dict:
69+
raise ConfigHelperError(
70+
'Found sub-dictionary in YAML config') from None
71+
elif type(value) is list:
72+
val = [kms_client.decrypt(v)
73+
for v in value] if kms_client else value
74+
os.environ[key] = json.dumps(val)
75+
else:
76+
val = kms_client.decrypt(value) if kms_client else value
77+
os.environ[key] = str(val)
6278

6379

6480
class ConfigHelperError(Exception):
6581
def __init__(self, message=None):
6682
self.message = message
83+
if message is not None:
84+
logger.error(message)

src/nypl_py_utils/functions/log_helper.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,38 @@
1212
}
1313

1414

15-
# Configure structlog to be machine-readable first and foremost
16-
# while still making it easy for humans to parse
17-
# End result (without additional bindings) is JSON like this:
18-
# {
19-
# "logger": "module param",
20-
# "message": "this is a test log event",
21-
# "level": "info",
22-
# "timestamp": "2023-11-01 18:50:47"
23-
# }
2415
def get_structlog(module):
25-
structlog.configure(
16+
"""
17+
Standard logging without additional bindings looks as follows:
18+
{
19+
"level": "info",
20+
"timestamp": "2026-01-01T12:00:00.613719Z",
21+
"logger": "module param",
22+
"message": "this is a test log event"
23+
}
24+
25+
Note that: 1) you should *NOT* use the same module name for a structlog
26+
and for a standard logger, and 2) using bind_contextvars will bind
27+
variables to *all* loggers. To bind a context variable on one logger
28+
without binding it to others, use `logger = logger.bind(contextvar=0)`.
29+
"""
30+
logger = logging.getLogger(module)
31+
logger.addHandler(logging.StreamHandler(sys.stdout))
32+
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO').upper())
33+
logger.propagate = False # Prevents double logging
34+
35+
return structlog.wrap_logger(
36+
logger,
2637
processors=[
2738
structlog.contextvars.merge_contextvars,
2839
structlog.processors.add_log_level,
2940
structlog.processors.TimeStamper(fmt='iso'),
41+
structlog.stdlib.add_logger_name,
3042
structlog.processors.EventRenamer('message'),
3143
structlog.processors.JSONRenderer(),
32-
],
33-
context_class=dict,
34-
logger_factory=structlog.PrintLoggerFactory(),
44+
]
3545
)
3646

37-
return structlog.get_logger(module)
38-
3947

4048
def standard_logger(module):
4149
logger = logging.getLogger(module)
@@ -58,7 +66,7 @@ def standard_logger(module):
5866

5967

6068
def create_log(module, json=False):
61-
if (json):
69+
if json:
6270
return get_structlog(module)
6371
else:
6472
return standard_logger(module)

0 commit comments

Comments
 (0)