|
| 1 | +# Databricks CLI |
| 2 | +# Copyright 2017 Databricks, Inc. |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"), except |
| 5 | +# that the use of services to which certain application programming |
| 6 | +# interfaces (each, an "API") connect requires that the user first obtain |
| 7 | +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), |
| 8 | +# by creating an account at www.databricks.com and agreeing to either (a) |
| 9 | +# the Community Edition Terms of Service, (b) the Databricks Terms of |
| 10 | +# Service, or (c) another written agreement between Licensee and Databricks |
| 11 | +# for the use of the APIs. |
| 12 | +# |
| 13 | +# You may not use this file except in compliance with the License. |
| 14 | +# You may obtain a copy of the License at |
| 15 | +# |
| 16 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 17 | +# |
| 18 | +# Unless required by applicable law or agreed to in writing, software |
| 19 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 20 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 21 | +# See the License for the specific language governing permissions and |
| 22 | +# limitations under the License. |
| 23 | + |
| 24 | + |
| 25 | +from hashlib import sha1 |
| 26 | +import os |
| 27 | + |
| 28 | +from six.moves import urllib |
| 29 | + |
| 30 | +from databricks_cli.sdk import DeltaPipelinesService |
| 31 | +from databricks_cli.dbfs.api import DbfsApi |
| 32 | +from databricks_cli.dbfs.dbfs_path import DbfsPath |
| 33 | + |
| 34 | +# These imports are specific to the credentials part |
| 35 | +from databricks_cli.configure.config import get_profile_from_context |
| 36 | +from databricks_cli.configure.provider import get_config, ProfileConfigProvider |
| 37 | +from databricks_cli.utils import InvalidConfigurationError |
| 38 | + |
| 39 | +BUFFER_SIZE = 1024 * 64 |
| 40 | +base_pipelines_dir = 'dbfs:/pipelines/code' |
| 41 | + |
| 42 | + |
| 43 | +class PipelinesApi(object): |
| 44 | + def __init__(self, api_client): |
| 45 | + self.client = DeltaPipelinesService(api_client) |
| 46 | + self.dbfs_client = DbfsApi(api_client) |
| 47 | + |
| 48 | + def deploy(self, spec, headers=None): |
| 49 | + lib_objects = LibraryObject.from_json(spec.get('libraries', [])) |
| 50 | + local_lib_objects, external_lib_objects = \ |
| 51 | + self._identify_local_libraries(lib_objects) |
| 52 | + |
| 53 | + spec['libraries'] = LibraryObject.to_json(external_lib_objects + |
| 54 | + self._upload_local_libraries(local_lib_objects)) |
| 55 | + spec['credentials'] = self._get_credentials_for_request() |
| 56 | + self.client.client.perform_query('PUT', |
| 57 | + '/pipelines/{}'.format(spec['id']), |
| 58 | + data=spec, |
| 59 | + headers=headers) |
| 60 | + |
| 61 | + def delete(self, pipeline_id, headers=None): |
| 62 | + self.client.delete(pipeline_id, self._get_credentials_for_request(), headers) |
| 63 | + |
| 64 | + @staticmethod |
| 65 | + def _identify_local_libraries(lib_objects): |
| 66 | + """ |
| 67 | + Partitions the given set of libraries into local and those already present in dbfs/s3 etc. |
| 68 | + Local libraries are (currently) jar files with a file scheme or no scheme at all. |
| 69 | + All other libraries should be present in a supported external source. |
| 70 | + :param lib_objects: List[LibraryObject] |
| 71 | + :return: List[List[LibraryObject], List[LibraryObject]] ([Local, External]) |
| 72 | + """ |
| 73 | + local_lib_objects, external_lib_objects = [], [] |
| 74 | + for lib_object in lib_objects: |
| 75 | + parsed_uri = urllib.parse.urlparse(lib_object.path) |
| 76 | + if lib_object.lib_type == 'jar' and parsed_uri.scheme == '': |
| 77 | + local_lib_objects.append(lib_object) |
| 78 | + elif lib_object.lib_type == 'jar' and parsed_uri.scheme.lower() == 'file': |
| 79 | + # exactly 1 or 3 |
| 80 | + if parsed_uri.path.startswith('//') or parsed_uri.netloc != '': |
| 81 | + raise RuntimeError('invalid file uri scheme, ' |
| 82 | + 'did you mean to use file:/ or file:///') |
| 83 | + local_lib_objects.append(LibraryObject(lib_object.lib_type, parsed_uri.path)) |
| 84 | + else: |
| 85 | + external_lib_objects.append(lib_object) |
| 86 | + return local_lib_objects, external_lib_objects |
| 87 | + |
| 88 | + def _upload_local_libraries(self, local_lib_objects): |
| 89 | + remote_lib_objects = [LibraryObject(llo.lib_type, self._get_hashed_path(llo.path)) |
| 90 | + for llo in local_lib_objects] |
| 91 | + |
| 92 | + transformed_remote_lib_objects = [LibraryObject(rlo.lib_type, DbfsPath(rlo.path)) |
| 93 | + for rlo in remote_lib_objects] |
| 94 | + upload_files = [llo_tuple for llo_tuple in |
| 95 | + zip(local_lib_objects, transformed_remote_lib_objects) |
| 96 | + if not self.dbfs_client.file_exists(llo_tuple[1].path)] |
| 97 | + |
| 98 | + for llo, rlo in upload_files: |
| 99 | + self.dbfs_client.put_file(llo.path, rlo.path, False) |
| 100 | + |
| 101 | + return remote_lib_objects |
| 102 | + |
| 103 | + @staticmethod |
| 104 | + def _get_hashed_path(path): |
| 105 | + """ |
| 106 | + Finds the corresponding dbfs file path for the file located at the supplied path by |
| 107 | + calculating its hash using SHA1. |
| 108 | + :param path: Local File Path |
| 109 | + :return: Remote Path (pipeline_base_dir + file_hash (dot) file_extension) |
| 110 | + """ |
| 111 | + hash_buffer = sha1() |
| 112 | + with open(path, 'rb') as f: |
| 113 | + while True: |
| 114 | + data = f.read(BUFFER_SIZE) |
| 115 | + if not data: |
| 116 | + break |
| 117 | + hash_buffer.update(data) |
| 118 | + |
| 119 | + file_hash = hash_buffer.hexdigest() |
| 120 | + # splitext includes the period in the extension |
| 121 | + path = '{}/{}{}'.format(base_pipelines_dir, file_hash, os.path.splitext(path)[1]) |
| 122 | + return path |
| 123 | + |
| 124 | + @staticmethod |
| 125 | + def _get_credentials_for_request(): |
| 126 | + """ |
| 127 | + Only required while the deploy/delete APIs require credentials in the body as well |
| 128 | + as the header. Once the API requirement is relaxed, we can remove this function" |
| 129 | + """ |
| 130 | + profile = get_profile_from_context() |
| 131 | + if profile: |
| 132 | + config = ProfileConfigProvider.get_config(profile) |
| 133 | + else: |
| 134 | + config = get_config() |
| 135 | + if not config or not config.is_valid: |
| 136 | + raise InvalidConfigurationError.for_profile(profile) |
| 137 | + |
| 138 | + if config.is_valid_with_token: |
| 139 | + return {'token': config.token} |
| 140 | + else: |
| 141 | + return {'user': config.username, 'password': config.password} |
| 142 | + |
| 143 | + |
| 144 | +class LibraryObject(object): |
| 145 | + def __init__(self, lib_type, lib_path): |
| 146 | + self.path = lib_path |
| 147 | + self.lib_type = lib_type |
| 148 | + |
| 149 | + @classmethod |
| 150 | + def from_json(cls, libraries): |
| 151 | + """ |
| 152 | + Serialize Libraries into LibraryObjects |
| 153 | + :param libraries: List[Dictionary{String, String}] |
| 154 | + :return: List[LibraryObject] |
| 155 | + """ |
| 156 | + lib_objects = [] |
| 157 | + for library in libraries: |
| 158 | + for lib_type, path in library.items(): |
| 159 | + lib_objects.append(LibraryObject(lib_type, path)) |
| 160 | + return lib_objects |
| 161 | + |
| 162 | + @classmethod |
| 163 | + def to_json(cls, lib_objects): |
| 164 | + """ |
| 165 | + Deserialize LibraryObjects |
| 166 | + :param lib_objects: List[LibraryObject] |
| 167 | + :return: List[Dictionary{String, String}] |
| 168 | + """ |
| 169 | + libraries = [] |
| 170 | + for lib_object in lib_objects: |
| 171 | + libraries.append({lib_object.lib_type: lib_object.path}) |
| 172 | + return libraries |
| 173 | + |
| 174 | + def __eq__(self, other): |
| 175 | + if not isinstance(other, LibraryObject): |
| 176 | + return NotImplemented |
| 177 | + return self.path == other.path and self.lib_type == other.lib_type |
0 commit comments