Skip to content

Commit 00410d0

Browse files
committed
first commit
0 parents  commit 00410d0

9 files changed

Lines changed: 802 additions & 0 deletions

File tree

.gitignore

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
yarn-debug.log*
6+
yarn-error.log*
7+
pnpm-debug.log*
8+
lerna-debug.log*
9+
10+
node_modules
11+
.DS_Store
12+
dist
13+
dist-ssr
14+
coverage
15+
*.local
16+
17+
/cypress/videos/
18+
/cypress/screenshots/
19+
20+
# Editor directories and files
21+
.vscode/*
22+
!.vscode/extensions.json
23+
.idea
24+
*.suo
25+
*.ntvs*
26+
*.njsproj
27+
*.sln
28+
*.sw?
29+
30+
__pycache__

LICENSE.md

Lines changed: 503 additions & 0 deletions
Large diffs are not rendered by default.

Makefile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
install:
2+
poetry install
3+
4+
test:
5+
poetry run pytest
6+
7+
build:
8+
poetry build
9+
10+
clean:
11+
rm -rf dist
12+
13+
local-install:
14+
pip install ./dist/obiba_opal-*.tar.gz

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# DataSHIELD Interface Python
2+
3+
This DataSHIELD Client Interface is a Python port of the original DataSHIELD Client Interface written in R ([DSI](https://github.com/datashield/DSI)). The provided interface can be implemented for accessing a data repository supporting the DataSHIELD infrastructure: controlled R commands to be executed on the server side are garanteeing that non disclosive information is returned to client side.

datashield/__init__.py

Whitespace-only changes.

datashield/interface.py

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
"""
2+
Classes to be implemented for each data repository type.
3+
"""
4+
5+
import importlib
6+
7+
class DSResult:
8+
"""
9+
This virtual class describes the result and state of execution of
10+
a DataSHIELD request (aggregation or assignment).
11+
"""
12+
13+
def is_completed(self) -> bool:
14+
"""
15+
Get whether the result from a previous assignment or aggregation operation was
16+
completed, either with a successful status or a failed one. This call must not
17+
wait for the completion, immediate response is expected. Once the result is
18+
identified as being completed, the raw result the operation can be get directly.
19+
"""
20+
raise NotImplementedError("DSResult function not available")
21+
22+
def fetch(self) -> any:
23+
"""
24+
Wait for the result to be available and fetch the result from a previous assignment or aggregation operation that may have been
25+
run asynchronously, in which case it is a one-shot call. When the assignment or aggregation operation was not asynchronous,
26+
the result is wrapped in the object and can be fetched multiple times.
27+
"""
28+
raise NotImplementedError("DSResult function not available")
29+
30+
31+
class DSConnection:
32+
"""
33+
Connection class to a DataSHIELD server.
34+
"""
35+
36+
def list_tables(self) -> list:
37+
"""
38+
List available table names from the data repository.
39+
"""
40+
raise NotImplementedError("DSConnection function not available")
41+
42+
def has_table(self, name: str) -> bool:
43+
"""
44+
Check whether a table with provided name exists in the data repository.
45+
46+
:param name: The name of the table to check
47+
"""
48+
raise NotImplementedError("DSConnection function not available")
49+
50+
def list_resources(self) -> list:
51+
"""
52+
List available resource names from the data repository.
53+
"""
54+
raise NotImplementedError("DSConnection function not available")
55+
56+
def has_resource(self, name: str) -> bool:
57+
"""
58+
Check whether a resource with provided name exists in the data repository.
59+
60+
:param name: The name of the resource to check
61+
"""
62+
raise NotImplementedError("DSConnection function not available")
63+
64+
def assign_table(self, symbol: str, table: str, variables: list = None,
65+
missings: bool = False, identifiers: str = None,
66+
id_name: str = None, asynchronous: bool = True) -> None:
67+
"""
68+
Assign a data table from the data repository to a symbol in the DataSHIELD R session.
69+
70+
:param symbol: The name of the destination symbol
71+
:param table: The name of the table to assign
72+
:param asynchronous: Whether the operation is asynchronous (if supported by the DataSHIELD server)
73+
"""
74+
raise NotImplementedError("DSConnection function not available")
75+
76+
def assign_resource(self, symbol: str, resource: str, asynchronous: bool = True) -> None:
77+
"""
78+
Assign a resource from the data repository to a symbol in the DataSHIELD R session.
79+
80+
:param symbol: The name of the destination symbol
81+
:param resource: The name of the resource to assign
82+
:param asynchronous: Whether the operation is asynchronous (if supported by the DataSHIELD server)
83+
"""
84+
raise NotImplementedError("DSConnection function not available")
85+
86+
def assign_expr(self, symbol: str, expr: str, asynchronous: bool = True) -> None:
87+
"""
88+
Assign the result of the evaluation of an expression to a symbol in the DataSHIELD R session.
89+
90+
:param symbol: The name of the destination symbol
91+
:param expr: The R expression to evaluate and which result will be assigned
92+
:param asynchronous: Whether the operation is asynchronous (if supported by the DataSHIELD server)
93+
"""
94+
raise NotImplementedError("DSConnection function not available")
95+
96+
def aggregate(self, expr: str, asynchronous: bool = True) -> DSResult:
97+
"""
98+
Aggregate some data from the DataSHIELD R session using a valid R expression. The
99+
aggregation expression must satisfy the data repository's DataSHIELD configuration.
100+
101+
:param expr: The R expression to evaluate and which result will be returned
102+
:param asynchronous: Whether the operation is asynchronous (if supported by the DataSHIELD server)
103+
"""
104+
raise NotImplementedError("DSConnection function not available")
105+
106+
def list_symbols(self) -> list:
107+
"""
108+
After assignments have been performed, some symbols live in the DataSHIELD R session on the server side.
109+
"""
110+
raise NotImplementedError("DSConnection function not available")
111+
112+
def rm_symbol(self, name: str) -> None:
113+
"""
114+
After symbol removal, the data identified by the symbol will not be accessible in the DataSHIELD R session on the server side.
115+
116+
:param name: The name of symbol to remove
117+
"""
118+
raise NotImplementedError("DSConnection function not available")
119+
120+
def list_profiles(self) -> list:
121+
"""
122+
List available DataSHIELD profile names in the data repository.
123+
"""
124+
raise NotImplementedError("DSConnection function not available")
125+
126+
def list_methods(self, type: str = "aggregate") -> list:
127+
"""
128+
Get the list of DataSHIELD methods that have been configured on the remote data repository.
129+
130+
:param type: The type of method, either "aggregate" (default) or "assign"
131+
"""
132+
raise NotImplementedError("DSConnection function not available")
133+
134+
def list_packages(self) -> list:
135+
"""
136+
Get the list of DataSHIELD packages with their version, that have been configured on the remote data repository.
137+
"""
138+
raise NotImplementedError("DSConnection function not available")
139+
140+
def list_workspaces(self) -> list:
141+
"""
142+
Get the list of DataSHIELD workspaces, that have been saved on the remote data repository.
143+
"""
144+
raise NotImplementedError("DSConnection function not available")
145+
146+
def save_workspace(self, name: str) -> list:
147+
"""
148+
Save the DataSHIELD R session in a workspace on the remote data repository.
149+
150+
:param name: The name of the workspace
151+
"""
152+
raise NotImplementedError("DSConnection function not available")
153+
154+
def restore_workspace(self, name: str) -> list:
155+
"""
156+
Restore a saved DataSHIELD R session from the remote data repository. When restoring a workspace,
157+
any existing symbol or file with same name will be overridden.
158+
159+
:param name: The name of the workspace
160+
"""
161+
raise NotImplementedError("DSConnection function not available")
162+
163+
def rm_workspace(self, name: str) -> list:
164+
"""
165+
Remove a DataSHIELD workspace from the remote data repository. Ignored if no
166+
such workspace exists.
167+
168+
:param name: The name of the workspace
169+
"""
170+
raise NotImplementedError("DSConnection function not available")
171+
172+
def is_async(self) -> bool:
173+
"""
174+
When a DSResult object is returned on aggregation or assignment operation,
175+
the raw result can be accessed asynchronously, allowing parallelization of DataSHIELD calls
176+
over multpile servers. The returned named list of logicals will specify if asynchronicity is supported for:
177+
aggregation operation ('aggregate'), table assignment operation ('assign_table'),
178+
resource assignment operation ('assign_resource') and expression assignment operation ('assign_expr').
179+
"""
180+
raise NotImplementedError("DSConnection function not available")
181+
182+
def keep_alive(self) -> bool:
183+
"""
184+
As the DataSHIELD sessions are working in parallel, this function helps at keeping
185+
idle connections alive while others are working. Any communication failure must
186+
be silently processed.
187+
"""
188+
raise NotImplementedError("DSConnection function not available")
189+
190+
def disconnect(self) -> None:
191+
"""
192+
This closes the connection, discards all pending work, and frees resources (e.g., memory, sockets).
193+
"""
194+
raise NotImplementedError("DSConnection function not available")
195+
196+
197+
class DSDriver:
198+
"""
199+
Driver class for instanciating a connection object by driver name.
200+
"""
201+
202+
@classmethod
203+
def new_connection(cls, name: str, args: dict, restore: str = None) -> DSConnection:
204+
"""
205+
Creates a new connection
206+
207+
:param name: The DataSHIELD server name
208+
:param args: The connection arguments, as a dictionary
209+
:param restore: The workspace name to be restored
210+
"""
211+
raise NotImplementedError("DSConnection function not available")
212+
213+
@classmethod
214+
def load_class(cls, name: str) -> any:
215+
"""
216+
Load a class from its fully qualified name (dot separated).
217+
218+
:param name: The driver class name
219+
:return The class of the driver on which the ``new_connection()`` function will be called
220+
"""
221+
names = name.split(".")
222+
className = names.pop()
223+
moduleName = ".".join(names)
224+
return getattr(importlib.import_module(moduleName), className)

poetry.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[tool.poetry]
2+
name = "datashield"
3+
version = "0.1.0"
4+
description = "DataSHIELD Client Interface in Python."
5+
authors = ["Yannick Marcon <yannick.marcon@obiba.org>"]
6+
maintainers = ["Yannick Marcon <yannick.marcon@obiba.org>"]
7+
license = "LGPL"
8+
readme = "README.md"
9+
homepage = "https://www.datashield.org"
10+
repository = "https://github.com/datashield/datashield-python"
11+
documentation = "https://github.com/datashield/datashield-python"
12+
13+
[tool.poetry.urls]
14+
"Bug Tracker" = "https://github.com/datashield/datashield-python/issues"
15+
16+
[tool.poetry.dependencies]
17+
python = "^3.7"
18+
19+
[build-system]
20+
requires = ["poetry-core"]
21+
build-backend = "poetry.core.masonry.api"

tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)