Skip to content

Commit 8304acd

Browse files
authored
Merge pull request #12 from BackofenLab/add_setup
Add setup for workflows and datasets
2 parents b68ebcf + f142158 commit 8304acd

4 files changed

Lines changed: 118 additions & 1 deletion

File tree

Dockerfile

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,27 @@ ENV ENABLE_TTS_INSTALL True
1010
# Enable Conda dependency resolution
1111
ENV GALAXY_CONFIG_CONDA_AUTO_INSTALL=True \
1212
GALAXY_CONFIG_CONDA_AUTO_INIT=True \
13-
GALAXY_CONFIG_USE_CACHED_DEPENDENCY_MANAGER=True
13+
GALAXY_CONFIG_USE_CACHED_DEPENDENCY_MANAGER=True
1414

1515
# Install tools
1616
ADD graphclust.yml $GALAXY_ROOT/tools.yaml
1717
RUN install-tools $GALAXY_ROOT/tools.yaml && \
1818
/tool_deps/_conda/bin/conda clean --tarballs
1919

2020
ADD tour_graphclust_wf.yaml $GALAXY_ROOT/config/plugins/tours/graphclust.wf.yaml
21+
22+
# Data libraries
23+
ADD setup_data_libraries.py $GALAXY_ROOT/setup_data_libraries.py
24+
ADD library_data.yaml $GALAXY_ROOT/library_data.yaml
25+
26+
# Hacky script to import workflows into Galaxy after installation. I would argue this step is redundant.
27+
ADD import_workflows.py $GALAXY_ROOT/import_workflows.py
28+
ADD GraphClust_one.ga $GALAXY_ROOT/GraphClust_one.ga
29+
ADD GraphClust_two.ga $GALAXY_ROOT/GraphClust_two.ga
30+
31+
# Download training data and populate the data library
32+
RUN startup_lite && \
33+
sleep 30 && \
34+
. $GALAXY_VIRTUAL_ENV/bin/activate && \
35+
python $GALAXY_ROOT/setup_data_libraries.py -i $GALAXY_ROOT/library_data.yaml && \
36+
python $GALAXY_ROOT/import_workflows.py

import_workflows.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/usr/bin/env python
2+
3+
import os
4+
from bioblend import galaxy
5+
admin_email = os.environ.get('GALAXY_DEFAULT_ADMIN_USER', 'admin@galaxy.org')
6+
admin_pass = os.environ.get('GALAXY_DEFAULT_ADMIN_PASSWORD', 'admin')
7+
url = "http://localhost:8080"
8+
gi = galaxy.GalaxyInstance(url=url, email=admin_email, password=admin_pass)
9+
10+
wf = galaxy.workflows.WorkflowClient(gi)
11+
wf.import_workflow_from_local_path('/galaxy-central/GraphClust_two.ga')
12+
wf.import_workflow_from_local_path('/galaxy-central/GraphClust_one.ga')

library_data.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
libraries:
2+
- name: "Test data"
3+
files:
4+
- https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa
5+
- https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-low-representatives.fa

setup_data_libraries.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python
2+
3+
import os
4+
import time
5+
import yaml
6+
import argparse
7+
import subprocess
8+
import logging as log
9+
from bioblend import galaxy
10+
from subprocess import CalledProcessError
11+
12+
13+
def main( data ):
14+
"""
15+
Load files into a Galaxy data library.
16+
By default all test-data tools from all installed tools
17+
will be linked into a data library.
18+
"""
19+
20+
log.info("Importing data libraries.")
21+
22+
url = "http://localhost:8080"
23+
# The environment variables are set by the parent container
24+
admin_email = os.environ.get('GALAXY_DEFAULT_ADMIN_USER', 'admin@galaxy.org')
25+
admin_pass = os.environ.get('GALAXY_DEFAULT_ADMIN_PASSWORD', 'admin')
26+
27+
# Establish connection to galaxy instance
28+
gi = galaxy.GalaxyInstance(url=url, email=admin_email, password=admin_pass)
29+
30+
jc = galaxy.jobs.JobsClient(gi)
31+
32+
folders = dict()
33+
34+
libraries = yaml.load(data)
35+
for lib in libraries['libraries']:
36+
folders[lib['name']] = lib['files']
37+
38+
if folders:
39+
log.info("Create 'Test Data' library.")
40+
lib = gi.libraries.create_library('Training Data', 'Data pulled from online archives.')
41+
lib_id = lib['id']
42+
43+
for fname, urls in folders.items():
44+
log.info("Creating folder: %s" % fname)
45+
folder = gi.libraries.create_folder( lib_id, fname )
46+
for url in urls:
47+
gi.libraries.upload_file_from_url(
48+
lib_id,
49+
url,
50+
folder_id = folder[0]['id'],
51+
)
52+
53+
no_break = True
54+
while True:
55+
no_break = False
56+
for job in jc.get_jobs():
57+
if job['state'] != 'ok':
58+
no_break = True
59+
if not no_break:
60+
break
61+
time.sleep(3)
62+
63+
64+
time.sleep(20)
65+
log.info("Finished importing test data.")
66+
67+
68+
if __name__ == '__main__':
69+
parser = argparse.ArgumentParser(
70+
description='Populate the Galaxy data library with test data.'
71+
)
72+
parser.add_argument("-v", "--verbose", help="Increase output verbosity.",
73+
action="store_true")
74+
parser.add_argument('-i', '--infile', type=argparse.FileType('r'))
75+
76+
#TODO: Add options to override the admin_user and admin_password + specify
77+
# files to upload via command line interface.
78+
79+
args = parser.parse_args()
80+
if args.verbose:
81+
log.basicConfig(level=log.DEBUG)
82+
83+
main( args.infile )
84+

0 commit comments

Comments
 (0)