databricks
diff --git a/‎databricks_cli/cli.py‎
Lines changed: 2 additions & 0 deletions b/‎databricks_cli/cli.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎databricks_cli/click_types.py‎
Lines changed: 5 additions & 0 deletions b/‎databricks_cli/click_types.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎databricks_cli/pipelines/__init__.py‎ b/‎databricks_cli/pipelines/__init__.py‎
diff --git a/‎databricks_cli/pipelines/api.py‎
Lines changed: 177 additions & 0 deletions b/‎databricks_cli/pipelines/api.py‎
Lines changed: 177 additions & 0 deletions
diff --git a/‎databricks_cli/pipelines/cli.py‎
Lines changed: 112 additions & 0 deletions b/‎databricks_cli/pipelines/cli.py‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎databricks_cli/sdk/service.py‎
Lines changed: 43 additions & 0 deletions b/‎databricks_cli/sdk/service.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎tests/pipelines/__init__.py‎ b/‎tests/pipelines/__init__.py‎
@@ -37,6 +37,7 @@
 from databricks_cli.stack.cli import stack_group
 from databricks_cli.groups.cli import groups_group
 from databricks_cli.instance_pools.cli import instance_pools_group
+from databricks_cli.pipelines.cli import pipelines_group
 
 
 @click.group(context_settings=CONTEXT_SETTINGS)
@@ -59,6 +60,7 @@ def cli():
 cli.add_command(stack_group, name='stack')
 cli.add_command(groups_group, name='groups')
 cli.add_command(instance_pools_group, name="instance-pools")
+cli.add_command(pipelines_group, name='pipelines')
 
 if __name__ == "__main__":
     cli()
@@ -90,6 +90,11 @@ class SecretPrincipalClickType(ParamType):
     help = 'The name of the principal.'
 
 
+class PipelineSpecClickType(ParamType):
+    name = 'SPEC'
+    help = 'The path to the pipelines deployment spec file'
+
+
 class OneOfOption(Option):
     def __init__(self, *args, **kwargs):
         self.one_of = kwargs.pop('one_of')
 
@@ -0,0 +1,177 @@
+# Databricks CLI
+# Copyright 2017 Databricks, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"), except
+# that the use of services to which certain application programming
+# interfaces (each, an "API") connect requires that the user first obtain
+# a license for the use of the APIs from Databricks, Inc. ("Databricks"),
+# by creating an account at www.databricks.com and agreeing to either (a)
+# the Community Edition Terms of Service, (b) the Databricks Terms of
+# Service, or (c) another written agreement between Licensee and Databricks
+# for the use of the APIs.
+#
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from hashlib import sha1
+import os
+
+from six.moves import urllib
+
+from databricks_cli.sdk import DeltaPipelinesService
+from databricks_cli.dbfs.api import DbfsApi
+from databricks_cli.dbfs.dbfs_path import DbfsPath
+
+# These imports are specific to the credentials part
+from databricks_cli.configure.config import get_profile_from_context
+from databricks_cli.configure.provider import get_config, ProfileConfigProvider
+from databricks_cli.utils import InvalidConfigurationError
+
+BUFFER_SIZE = 1024 * 64
+base_pipelines_dir = 'dbfs:/pipelines/code'
+
+
+class PipelinesApi(object):
+    def __init__(self, api_client):
+        self.client = DeltaPipelinesService(api_client)
+        self.dbfs_client = DbfsApi(api_client)
+
+    def deploy(self, spec, headers=None):
+        lib_objects = LibraryObject.from_json(spec.get('libraries', []))
+        local_lib_objects, external_lib_objects = \
+            self._identify_local_libraries(lib_objects)
+
+        spec['libraries'] = LibraryObject.to_json(external_lib_objects +
+                                                  self._upload_local_libraries(local_lib_objects))
+        spec['credentials'] = self._get_credentials_for_request()
+        self.client.client.perform_query('PUT',
+                                         '/pipelines/{}'.format(spec['id']),
+                                         data=spec,
+                                         headers=headers)
+
+    def delete(self, pipeline_id, headers=None):
+        self.client.delete(pipeline_id, self._get_credentials_for_request(), headers)
+
+    @staticmethod
+    def _identify_local_libraries(lib_objects):
+        """
+        Partitions the given set of libraries into local and those already present in dbfs/s3 etc.
+        Local libraries are (currently) jar files with a file scheme or no scheme at all.
+        All other libraries should be present in a supported external source.
+        :param lib_objects: List[LibraryObject]
+        :return: List[List[LibraryObject], List[LibraryObject]] ([Local, External])
+        """
+        local_lib_objects, external_lib_objects = [], []
+        for lib_object in lib_objects:
+            parsed_uri = urllib.parse.urlparse(lib_object.path)
+            if lib_object.lib_type == 'jar' and parsed_uri.scheme == '':
+                local_lib_objects.append(lib_object)
+            elif lib_object.lib_type == 'jar' and parsed_uri.scheme.lower() == 'file':
+                # exactly 1 or 3
+                if parsed_uri.path.startswith('//') or parsed_uri.netloc != '':
+                    raise RuntimeError('invalid file uri scheme, '
+                                       'did you mean to use file:/ or file:///')
+                local_lib_objects.append(LibraryObject(lib_object.lib_type, parsed_uri.path))
+            else:
+                external_lib_objects.append(lib_object)
+        return local_lib_objects, external_lib_objects
+
+    def _upload_local_libraries(self, local_lib_objects):
+        remote_lib_objects = [LibraryObject(llo.lib_type, self._get_hashed_path(llo.path))
+                              for llo in local_lib_objects]
+
+        transformed_remote_lib_objects = [LibraryObject(rlo.lib_type, DbfsPath(rlo.path))
+                                          for rlo in remote_lib_objects]
+        upload_files = [llo_tuple for llo_tuple in
+                        zip(local_lib_objects, transformed_remote_lib_objects)
+                        if not self.dbfs_client.file_exists(llo_tuple[1].path)]
+
+        for llo, rlo in upload_files:
+            self.dbfs_client.put_file(llo.path, rlo.path, False)
+
+        return remote_lib_objects
+
+    @staticmethod
+    def _get_hashed_path(path):
+        """
+        Finds the corresponding dbfs file path for the file located at the supplied path by
+        calculating its hash using SHA1.
+        :param path: Local File Path
+        :return: Remote Path (pipeline_base_dir + file_hash (dot) file_extension)
+        """
+        hash_buffer = sha1()
+        with open(path, 'rb') as f:
+            while True:
+                data = f.read(BUFFER_SIZE)
+                if not data:
+                    break
+                hash_buffer.update(data)
+
+        file_hash = hash_buffer.hexdigest()
+        # splitext includes the period in the extension
+        path = '{}/{}{}'.format(base_pipelines_dir, file_hash, os.path.splitext(path)[1])
+        return path
+
+    @staticmethod
+    def _get_credentials_for_request():
+        """
+        Only required while the deploy/delete APIs require credentials in the body as well
+        as the header. Once the API requirement is relaxed, we can remove this function"
+        """
+        profile = get_profile_from_context()
+        if profile:
+            config = ProfileConfigProvider.get_config(profile)
+        else:
+            config = get_config()
+        if not config or not config.is_valid:
+            raise InvalidConfigurationError.for_profile(profile)
+
+        if config.is_valid_with_token:
+            return {'token': config.token}
+        else:
+            return {'user': config.username, 'password': config.password}
+
+
+class LibraryObject(object):
+    def __init__(self, lib_type, lib_path):
+        self.path = lib_path
+        self.lib_type = lib_type
+
+    @classmethod
+    def from_json(cls, libraries):
+        """
+        Serialize Libraries into LibraryObjects
+        :param libraries: List[Dictionary{String, String}]
+        :return: List[LibraryObject]
+        """
+        lib_objects = []
+        for library in libraries:
+            for lib_type, path in library.items():
+                lib_objects.append(LibraryObject(lib_type, path))
+        return lib_objects
+
+    @classmethod
+    def to_json(cls, lib_objects):
+        """
+        Deserialize LibraryObjects
+        :param lib_objects: List[LibraryObject]
+        :return: List[Dictionary{String, String}]
+        """
+        libraries = []
+        for lib_object in lib_objects:
+            libraries.append({lib_object.lib_type: lib_object.path})
+        return libraries
+
+    def __eq__(self, other):
+        if not isinstance(other, LibraryObject):
+            return NotImplemented
+        return self.path == other.path and self.lib_type == other.lib_type
@@ -0,0 +1,112 @@
+# Databricks CLI
+# Copyright 2017 Databricks, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"), except
+# that the use of services to which certain application programming
+# interfaces (each, an "API") connect requires that the user first obtain
+# a license for the use of the APIs from Databricks, Inc. ("Databricks"),
+# by creating an account at www.databricks.com and agreeing to either (a)
+# the Community Edition Terms of Service, (b) the Databricks Terms of
+# Service, or (c) another written agreement between Licensee and Databricks
+# for the use of the APIs.
+#
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from json import loads as json_loads
+import os
+
+import click
+
+from databricks_cli.click_types import PipelineSpecClickType
+from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS
+from databricks_cli.version import print_version_callback, version
+from databricks_cli.pipelines.api import PipelinesApi
+from databricks_cli.configure.config import provide_api_client, profile_option, debug_option
+
+
+@click.command(context_settings=CONTEXT_SETTINGS,
+               short_help='Deploys a delta pipeline according to the pipeline specification')
+@click.argument('spec_arg', default=None, required=False)
+@click.option('--spec', default=None, help=PipelineSpecClickType.help)
+@debug_option
+@profile_option
+@eat_exceptions
+@provide_api_client
+def deploy_cli(api_client, spec_arg, spec):
+    """
+    Deploys a delta pipeline according to the pipeline specification.
+    * The pipeline spec is a deployment specification that explains how to run a
+    Delta Pipeline on Databricks.
+    * The CLI simply forwards the spec to Databricks.
+    * All the local libraries referenced in the spec are uploaded to DBFS.
+    """
+    if bool(spec_arg) == bool(spec):
+        raise RuntimeError('The spec should be provided either by an option or argument')
+    src = spec_arg if bool(spec_arg) else spec
+    spec_obj = _read_spec(src)
+    PipelinesApi(api_client).deploy(spec_obj)
+
+
+@click.command(context_settings=CONTEXT_SETTINGS,
+               short_help='Stops a delta pipeline and cleans '
+                          'up Databricks resources associated with it')
+@click.argument('spec_arg', default=None, required=False)
+@click.option('--spec', default=None, help=PipelineSpecClickType.help)
+@click.option('--pipeline-id', default=None,
+              help='id associated with the pipeline to be stopped')
+@debug_option
+@profile_option
+@eat_exceptions
+@provide_api_client
+def delete_cli(api_client, spec_arg, spec, pipeline_id):
+    """
+    Stops a delta pipeline and cleans up Databricks resources associated with it
+    """
+    # Only one out of spec/pipeline_id/spec_arg should be supplied
+    if bool(spec_arg) + bool(spec) + bool(pipeline_id) != 1:
+        raise RuntimeError('Either spec should be provided as an argument '
+                           'or option, or the pipeline-id should be provided')
+    if bool(spec_arg) or bool(spec):
+        src = spec_arg if bool(spec_arg) else spec
+        pipeline_id = _read_spec(src)["id"]
+    PipelinesApi(api_client).delete(pipeline_id)
+
+
+def _read_spec(src):
+    """
+    Reads the spec at src as a JSON if no file extension is provided, or if in the extension format
+    if the format is supported.
+    """
+    extension = os.path.splitext(src)[1]
+    if extension.lower() == '.json':
+        with open(src, 'r') as f:
+            json = f.read()
+        return json_loads(json)
+    else:
+        raise RuntimeError('The provided file extension for the spec is not supported')
+
+
+@click.group(context_settings=CONTEXT_SETTINGS,
+             short_help='Utility to interact with the Databricks Delta Pipelines.')
+@click.option('--version', '-v', is_flag=True, callback=print_version_callback,
+              expose_value=False, is_eager=True, help=version)
+@debug_option
+@profile_option
+def pipelines_group():
+    """
+    Utility to interact with the Databricks pipelines.
+    """
+    pass
+
+
+pipelines_group.add_command(deploy_cli, name='deploy')
+pipelines_group.add_command(delete_cli, name='delete')
@@ -794,3 +794,46 @@ def get_instance_pool(self, instance_pool_id=None, headers=None):
     def list_instance_pools(self, headers=None):
         _data = {}
         return self.client.perform_query('GET', '/instance-pools/list', data=_data, headers=headers)
+
+
+class DeltaPipelinesService(object):
+    def __init__(self, client):
+        self.client = client
+
+    def deploy(self, pipeline_id=None, id=None, name=None, storage=None, filters=None,
+               clusters=None, libraries=None, transformations=None, credentials=None,
+               headers=None):
+        _data = {}
+        if pipeline_id is not None:
+            _data['pipeline_id'] = pipeline_id
+        if id is not None:
+            _data['id'] = id
+        if name is not None:
+            _data['name'] = name
+        if storage is not None:
+            _data['storage'] = storage
+        if filters is not None:
+            _data['filters'] = filters
+            if not isinstance(filters, dict):
+                raise TypeError('Expected databricks.Filters() or dict for field filters')
+        if clusters is not None:
+            _data['clusters'] = clusters
+        if libraries is not None:
+            _data['libraries'] = libraries
+        if transformations is not None:
+            _data['transformations'] = transformations
+        if credentials is not None:
+            _data['credentials'] = credentials
+            if not isinstance(credentials, dict):
+                raise TypeError('Expected databricks.Credentials() or dict for field credentials')
+        return self.client.perform_query('PUT', '/pipelines/{}'.format(pipeline_id), data=_data, headers=headers)
+
+    def delete(self, pipeline_id=None, credentials=None, headers=None):
+        _data = {}
+        if pipeline_id is not None:
+            _data['pipeline_id'] = pipeline_id
+        if credentials is not None:
+            _data['credentials'] = credentials
+            if not isinstance(credentials, dict):
+                raise TypeError('Expected databricks.Credentials() or dict for field credentials')
+        return self.client.perform_query('DELETE', '/pipelines/{}'.format(pipeline_id), data=_data, headers=headers)