Skip to content

Commit f6acc22

Browse files
arulajmaniMukul Murthy
authored andcommitted
Place wheels in content hashed folders when uploading to dbfs instead of renaming them (#251)
Change the way we compute the hashed path for wheels. Wheels can't be renamed, so they are put in a folder with the same name as their content hash instead of the rename logic for all other file types.
1 parent 9c3ccc3 commit f6acc22

2 files changed

Lines changed: 22 additions & 5 deletions

File tree

databricks_cli/pipelines/api.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,15 @@ def _get_hashed_path(path):
119119

120120
file_hash = hash_buffer.hexdigest()
121121
# splitext includes the period in the extension
122-
path = '{}/{}{}'.format(base_pipelines_dir, file_hash, os.path.splitext(path)[1])
122+
extension = os.path.splitext(path)[1][1:]
123+
if extension == 'whl':
124+
# Wheels need to follow the format described in the PEP, so we simply
125+
# pre-pend the content hash to the wheel_name
126+
# basename in Python returns the extension as well
127+
wheel_name = os.path.basename(path)
128+
path = '{}/{}/{}'.format(base_pipelines_dir, file_hash, wheel_name)
129+
else:
130+
path = '{}/{}.{}'.format(base_pipelines_dir, file_hash, extension)
123131
return path
124132

125133
@staticmethod

tests/pipelines/test_api.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ def file_exists_stub(_, dbfs_path):
5656
exist_mapping = {
5757
'dbfs:/pipelines/code/40bd001563085fc35165329ea1ff5c5ecbdbbeef.jar': True, # sha1 of 123
5858
'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18.jar': False, # sha1 of 456
59+
'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18/wheel-name-conv.whl':
60+
False # sha1 456
5961
}
6062
return exist_mapping[dbfs_path.absolute_path]
6163

@@ -82,6 +84,7 @@ def test_deploy(put_file_mock, get_credentials_mock, dbfs_path_validate, pipelin
8284
jar2 = tmpdir.join('jar2.jar').strpath
8385
jar3 = tmpdir.join('jar3.jar').strpath
8486
jar4 = tmpdir.join('jar4.jar').strpath
87+
wheel1 = tmpdir.join('wheel-name-conv.whl').strpath
8588
jar3_relpath = os.path.relpath(jar3, os.getcwd())
8689
jar4_file_prefix = 'file:{}'.format(jar4)
8790
with open(jar1, 'w') as f:
@@ -92,11 +95,14 @@ def test_deploy(put_file_mock, get_credentials_mock, dbfs_path_validate, pipelin
9295
f.write('456')
9396
with open(jar4, 'w') as f:
9497
f.write('456')
98+
with open(wheel1, 'w') as f:
99+
f.write('456')
95100
libraries = [{'jar': 'dbfs:/pipelines/code/file.jar'},
96101
{'jar': jar1},
97102
{'jar': jar2},
98103
{'jar': jar3_relpath},
99-
{'jar': jar4_file_prefix}]
104+
{'jar': jar4_file_prefix},
105+
{'whl': wheel1}]
100106
spec = copy.deepcopy(SPEC)
101107
spec['libraries'] = libraries
102108
expected_spec = copy.deepcopy(SPEC)
@@ -105,18 +111,21 @@ def test_deploy(put_file_mock, get_credentials_mock, dbfs_path_validate, pipelin
105111
{'jar': 'dbfs:/pipelines/code/40bd001563085fc35165329ea1ff5c5ecbdbbeef.jar'},
106112
{'jar': 'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18.jar'},
107113
{'jar': 'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18.jar'},
108-
{'jar': 'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18.jar'}
114+
{'jar': 'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18.jar'},
115+
{'whl':
116+
'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18/wheel-name-conv.whl'}
109117
]
110118
expected_spec['credentials'] = CREDENTIALS
111119

112120
pipelines_api.deploy(spec)
113-
assert dbfs_path_validate.call_count == 4
114-
assert put_file_mock.call_count == 3
121+
assert dbfs_path_validate.call_count == 5
122+
assert put_file_mock.call_count == 4
115123
assert put_file_mock.call_args_list[0][0][0] == jar2
116124
assert put_file_mock.call_args_list[0][0][1].absolute_path ==\
117125
'dbfs:/pipelines/code/51eac6b471a284d3341d8c0c63d0f1a286262a18.jar'
118126
assert put_file_mock.call_args_list[1][0][0] == jar3_relpath
119127
assert put_file_mock.call_args_list[2][0][0] == jar4
128+
assert put_file_mock.call_args_list[3][0][0] == wheel1
120129
deploy_mock.assert_called_with('PUT', '/pipelines/{}'.format(PIPELINE_ID),
121130
data=expected_spec, headers=None)
122131

0 commit comments

Comments
 (0)