Initial commit

ardcore · ardcore · commit 5464f1762cf4 · 2020-11-20T20:57:08.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,90 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1 @@
+# todo: license
diff --git a/README.md b/README.md
@@ -0,0 +1,93 @@
+# Molecule One Batch Scoring API Wrapper
+
+## Usage:
+
+### Installation:
+
+```
+pip install git+https://github.com/molecule-one/m1wrapper-python
+```
+NOTE: make sure to install package to the intended python environment.
+
+### Initialization:
+```py
+from m1wrapper import MoleculeOneWrapper
+m1wrapper = MoleculeOneWrapper(token)
+```
+- *token*: API token you'll need to authorize in our system. You can get
+  generate yours at https://app.molecule.one/dashboard/user/api-tokens
+- *baseUrl* (optional): URI of the batch scoring service. Defaults to Molecule One's public
+  server, but you will need to provide custom value if you're using a dedicated solution.
+
+### Running batch scoring request:
+
+```py
+search = m1wrapper.run_batch_search(
+    targets=['cc', 'O=C(Nc1cc(Nc2nc(-c3cnccc3)ccn2)c(cc1)C)c3ccc(cc3)CN3CCN(CC3)C'],
+    parameters={'exploratory_search': False, 'detail_level': 'score'}
+)
+```
+- *targets*: list of target compounds in SMILES format
+- *parameters* (optional): additional configuration for your batch
+  scoring request. See [Batch Scoring API](https://github.com/molecule-one/api/blob/master/batch-scoring.md) for more information.
+
+
+### Getting exisiting scoring request by id:
+```py
+search = m1wrapper.get_batch_search(id)
+```
+
+### Checking if your scoring request processing is finished:
+```py
+search.is_finished()
+```
+
+### Checking full search status:
+```py
+status = search.get_status()
+```
+In response, you’ll get information about your batch scoring processing progress, i.e.:
+`{"queued":92,"running":4,"finished":104,"error":0}`
+
+### Getting partial results:
+Results are made available as soon as they are processed. This method
+provided a way to start working with some of your results without waiting until all targets are processed.
+This usually means implementing some kind of polling/scheduling on your side.
+```py
+results = search.get_partial_results(precision=5, only=["targetSmiles, "result"])
+```
+- *precision* (optional): format the floating point scores returned by the system (certainty, result, price) to given number of significant digits.
+- *only* (optional): fetch only a subset of values. Defaults to
+  all values.
+
+Returns JSON object of the following shape:
+```json
+    [
+      {
+        "targetSmiles": "Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C",
+        "status": "ok",
+        "result": "7.53",
+        "certainty": "0.581",
+        "price": "5230",
+        "reactionCount": 5,
+        "timedOut": false
+      },
+    ...
+    ]
+```
+See [Batch Scoring API](https://github.com/molecule-one/api/blob/master/batch-scoring.md) for a full explaination of returned fields.
+
+### Getting complete results:
+```py
+results = search.get_results(precision=5, only=["targetSmiles, "result"])
+```
+If you don't want to implement scheduling on your own, this method
+provides a simple way to wait until all targets are processed (sending periodical checks using
+`search.is_finished()`), and execute only when all results are available. It's a
+blocking operation.
+Parameters and returned JSON are the same as with `get_partial_results()`.
+
+### Deleting your data:
+```py
+m1wrapper.delete_batch_search(search.search_id)
+```
diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/example.py b/examples/example.py
@@ -0,0 +1,30 @@
+from m1wrapper import MoleculeOneWrapper
+
+if __name__ == '__main__':
+    # get your token at https://app.molecule.one/dashboard/user/api-tokens
+    token = 'f4614b1d96124d09ab14fbe6537c9007_4ea55651a3904037b9fe4c4a72d2b85d'
+
+    m1wrapper = MoleculeOneWrapper(token)
+
+    search = m1wrapper.run_batch_search(
+        targets=['cc', 'O=C(Nc1cc(Nc2nc(-c3cnccc3)ccn2)c(cc1)C)c3ccc(cc3)CN3CCN(CC3)C'],
+        parameters={'exploratory_search': False, 'detail_level': 'score'}
+    )
+    print('created search:', search.search_id)
+
+    search = m1wrapper.get_batch_search(search.search_id)
+    print('got search:', search.search_id)
+
+    status = search.get_status()
+    print('status:', status)
+
+    is_finished = search.is_finished()
+    print('is finished:', is_finished)
+
+    partial_results = search.get_partial_results()
+    print("partial results:", partial_results)
+
+    results = search.get_results(precision=4, only=['targetSmiles', 'price', 'result'])
+    print('results:', results)
+
+    m1wrapper.delete_batch_search(search.search_id)
diff --git a/m1wrapper/__init__.py b/m1wrapper/__init__.py
@@ -0,0 +1 @@
+from .m1wrapper import MoleculeOneWrapper
diff --git a/m1wrapper/config.py b/m1wrapper/config.py
@@ -0,0 +1,6 @@
+wrapper_version = 0.1
+api_token_version = 'v1'
+api_base_url = 'https://app.molecule.one/api/v1/'
+api_search_endpoint = 'batch-search'
+api_results_endpoint = 'batch-search-result'
+status_check_delay_s = 15
diff --git a/m1wrapper/m1wrapper.py b/m1wrapper/m1wrapper.py
@@ -0,0 +1,54 @@
+from typing import List, Dict
+
+from .search import BatchSearch
+from .config import api_token_version, wrapper_version, api_base_url
+
+
+class MoleculeOneWrapper:
+    """
+    Wrapper for MoleculeOne Batch Scoring REST API
+    """
+
+    def __init__(
+        self,
+        api_token: str,
+        api_base_url: str = api_base_url
+    ):
+        self.api_token = api_token
+        self.api_base_url = f'{api_base_url}/' # ensure base_url ends with '/'
+        self.request_headers = self.__prepare_request_headers()
+
+    def __prepare_request_headers(self) -> dict:
+        return {
+            'Content-Type': 'application/json',
+            'User-Agent': f'api-wrapper-python/{wrapper_version}',
+            'Authorization': f'ApiToken-{api_token_version} {self.api_token}'
+        }
+
+    def run_batch_search(
+            self,
+            targets: List[str],
+            parameters: Dict = None
+    ) -> BatchSearch:
+        return BatchSearch(
+                self.api_base_url,
+                self.request_headers,
+                targets=targets,
+                parameters=parameters
+            )
+
+    def get_batch_search(self, search_id: str) -> BatchSearch:
+        return BatchSearch.from_id(
+                self.api_base_url,
+                self.request_headers,
+                search_id
+        )
+
+    def delete_batch_search(self, search_id: str):
+        search = BatchSearch.from_id(
+                self.api_base_url,
+                self.request_headers,
+                search_id
+        )
+        return search.delete()
+
diff --git a/m1wrapper/search.py b/m1wrapper/search.py
@@ -0,0 +1,112 @@
+import requests
+import json
+import time
+from typing import List
+from urllib.parse import urljoin
+
+from .config import (
+    api_search_endpoint,
+    api_results_endpoint,
+    status_check_delay_s
+)
+
+
+def format_error_message(error):
+    if error["message"] and error["errors"]:
+        return f'{error["message"]}: {repr(error["errors"])}'
+    if error["message"]:
+        return f'{error["message"]}'
+    else:
+        return "unknown error"
+
+
+def maybe_handle_error(response):
+    if response.status_code >= 400 and response.status_code <= 500:
+        error = format_error_message(response.json())
+        raise requests.exceptions.HTTPError(error)
+    else:
+        response.raise_for_status()
+
+
+class BatchSearch:
+    def __init__(
+        self,
+        base_url,
+        headers,
+        search_id=None,
+        targets=None,
+        parameters=None,
+    ):
+        self.search_id = search_id
+        self.base_url = base_url
+        self.headers = headers
+        if self.search_id is None:
+            new_search = self.__run(targets=targets, parameters=parameters)
+            self.search_id = new_search['id']
+
+    def __prepare_payload(self, targets, parameters) -> dict:
+        return {
+            'targets': targets,
+            'params': parameters or {},
+        }
+
+    def __run(self, targets, parameters):
+        payload = self.__prepare_payload(targets, parameters)
+        url = urljoin(self.base_url, api_search_endpoint),
+        response = requests.post(
+            urljoin(self.base_url, api_search_endpoint),
+            data=json.dumps(payload),
+            headers=self.headers,
+        )
+        maybe_handle_error(response)
+        return response.json()
+
+    @classmethod
+    def from_id(cls, base_url, headers, search_id):
+        return cls(base_url, headers, search_id)
+
+    def get_status(self):
+        response = requests.get(
+            urljoin(self.base_url, f'{api_search_endpoint}/{self.search_id}'),
+            headers=self.headers,
+        )
+        maybe_handle_error(response)
+        return response.json()
+
+    def is_finished(self):
+        status = self.get_status()
+        return status['queued'] == 0 and status['running'] == 0
+
+    def get_results(
+            self,
+            precision: int = None,
+            only: List[str] = None
+    ):
+        while self.is_finished() is False:
+            time.sleep(status_check_delay_s)
+
+        return self.get_partial_results(precision, only)
+
+    def get_partial_results(
+        self,
+        precision: int = None,
+        only: List[str] = None
+    ):
+        response = requests.get(
+            urljoin(self.base_url, f'{api_results_endpoint}/{self.search_id}'),
+            headers=self.headers,
+            params={
+                'precision': precision,
+                'only': only
+            }
+        )
+        maybe_handle_error(response)
+        return response.json()
+
+    def delete(self):
+        response = requests.delete(
+            urljoin(self.base_url, f'{api_search_endpoint}/{self.search_id}'),
+            headers=self.headers,
+        )
+        maybe_handle_error(response)
+        return True
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .m1wrapper import MoleculeOneWrapper`