Skip to content

Commit f2d39c9

Browse files
author
Will Trimble
committed
Merge WT branch, update to Argparse
2 parents f559d32 + 8aab53a commit f2d39c9

8 files changed

Lines changed: 249 additions & 298 deletions

File tree

LICENSE

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
BSD 2-Clause License
2+
3+
Copyright (c) 2018, University of Chicago and Argonne National Laboratory
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions are met:
8+
9+
* Redistributions of source code must retain the above copyright notice, this
10+
list of conditions and the following disclaimer.
11+
12+
* Redistributions in binary form must reproduce the above copyright notice,
13+
this list of conditions and the following disclaimer in the documentation
14+
and/or other materials provided with the distribution.
15+
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

mglib/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@
99
API_URL = "https://api.mg-rast.org/"
1010
SHOCK_URL = "https://shock.mg-rast.org/"
1111
AUTH_LIST = "Jared Bischof, Travis Harrison, Folker Meyer, Tobias Paczian, Andreas Wilke"
12-
SEARCH_FIELDS = ["function", "organism", "md5", "name", "metadata", "biome", "feature", "material", "country", "location", "longitude", "latitude", "created", "env_package_type", "project_id", "project_name", "PI_firstname", "PI_lastname", "sequence_type", "seq_method", "collection_date"]
12+
SEARCH_FIELDS = ["all", "name", "investigation_type", "biome", "feature", "material", "continent", "country", "location", "longitude", "latitude", "created_on", "env_package", "project_id", "project_name", "PI_firstname", "PI_lastname", "sequence_type", "seq_meth", "collection_date", "bp_count_raw", "sequence_count_raw", "average_length_raw", "taxonomy", "function"]

mglib/mglib.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import json
99
import string
1010
import random
11+
import hashlib
1112
import subprocess
1213
try:
1314
from StringIO import StringIO
@@ -100,20 +101,24 @@ def obj_from_url(url, auth=None, data=None, debug=False, method=None):
100101
return obj
101102

102103
# print to file results of MG-RAST or Shock API
103-
def file_from_url(url, handle, auth=None, data=None, debug=False):
104+
def file_from_url(url, handle, auth=None, data=None, debug=False, sha1=False):
104105
result = body_from_url(url, 'text/plain', auth=auth, data=data, debug=debug)
106+
sha1hash = hashlib.sha1()
105107
while True:
106108
chunk = result.read(8192)
107109
if not chunk:
108110
break
111+
if sha1:
112+
sha1hash.update(chunk)
109113
handle.write(chunk.decode('utf8'))
114+
return sha1hash.hexdigest()
110115

111116
# print to stdout results of MG-RAST API
112117
def stdout_from_url(url, auth=None, data=None, debug=False):
113118
file_from_url(url, sys.stdout, auth=auth, data=data, debug=debug)
114119

115120
# return python struct from JSON output of asynchronous MG-RAST API
116-
def async_rest_api(url, auth=None, data=None, debug=False, delay=15):
121+
def async_rest_api(url, auth=None, data=None, debug=False, delay=60):
117122
try:
118123
parameters = parse_qs(url.split("?")[1])
119124
assert "asynchronous" in parameters, "Must specify asynchronous=1 for asynchronous call!"
@@ -123,10 +128,9 @@ def async_rest_api(url, auth=None, data=None, debug=False, delay=15):
123128
# If "status" is nor present, or if "status" is somehow not "submitted"
124129
# assume this is not an asynchronous call and it's done.
125130
if ('status' in submit) and (submit['status'] == 'done') and ('url' in submit):
126-
return obj_from_url(submit["url"], debug=debug)
131+
return submit['data']
127132
if not (('status' in submit) and (submit['status'] == 'submitted') and ('url' in submit)):
128-
return(submit)
129-
# sys.stderr.write("ERROR: return data invalid format\n:%s"%json.dumps(submit))
133+
return submit
130134
result = obj_from_url(submit['url'], debug=debug)
131135
try:
132136
while result['status'] != 'done':
@@ -250,15 +254,36 @@ def metadata_from_biom(biom, term):
250254
vals.append(value)
251255
return vals
252256

257+
# turn profile format BIOM into matrix format, use only abundances
258+
def profile_to_matrix(p):
259+
if p['columns'][0]['id'] != 'abundance':
260+
# not a profile
261+
return p
262+
trim = True if len(p['columns']) > 1 else False
263+
p['columns'][0]['id'] = p['id']
264+
p['matrix_element_type'] = 'int'
265+
p['matrix_element_value'] = 'abundance'
266+
p['date'] = time.strftime("%Y-%m-%d %H:%M:%S")
267+
if p['matrix_type'] == 'sparse':
268+
p['data'] = sparse_to_dense(p['data'], p['shape'][0], p['shape'][1])
269+
if trim:
270+
p['columns'] = p['columns'][:1]
271+
for i in range(len(p['rows'])):
272+
p['data'][i] = p['data'][i][:1]
273+
return p
274+
253275
# merge two BIOM objects
254276
def merge_biom(b1, b2):
255277
"""input: 2 biom objects of same 'type', 'matrix_element_type', and 'matrix_element_value'
256278
return: merged biom object, duplicate columns skipped, duplicate rows added"""
257-
# hack for using in loop when one oif 2 is empty
279+
# hack for using in loop when one of 2 is empty
258280
if b1 and (not b2):
259281
return b1
260282
if b2 and (not b1):
261283
return b2
284+
# transform profile BIOM from UI export into matrix BIOM
285+
b1 = profile_to_matrix(b1)
286+
b2 = profile_to_matrix(b2)
262287
# validate
263288
if not (b1 and b2 and (b1['type'] == b2['type']) and (b1['matrix_element_type'] == b2['matrix_element_type']) and (b1['matrix_element_value'] == b2['matrix_element_value'])):
264289
sys.stderr.write("The inputed biom objects are not compatable for merging\n")
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#!/usr/bin/env python
2+
3+
import sys
4+
import os
5+
import json
6+
import yaml
7+
import shutil
8+
import hashlib
9+
from optparse import OptionParser
10+
from prettytable import PrettyTable
11+
from mglib import VERSION, get_auth_token, AUTH_LIST, API_URL, obj_from_url, file_from_url, random_str
12+
13+
VERSION = 'alpha'
14+
15+
prehelp = """
16+
NAME
17+
mg-export-research-object
18+
19+
VERSION
20+
%s
21+
22+
SYNOPSIS
23+
mg-export-research-object [ --help, --user <user>, --passwd <password>, --token <oAuth token>, --metagenome <metagenome id>, --dir <directory name> --list <list manifest>]
24+
25+
DESCRIPTION
26+
Retrieve metagenome research object.
27+
Note: This is an alpha version and currently does not produce a full Research Object.
28+
"""
29+
30+
posthelp = """
31+
Output
32+
List available files in manifest.
33+
OR
34+
Download research object from manifest.
35+
36+
EXAMPLES
37+
mg-export-research-object --metagenome mgm4441680.3 --list
38+
39+
SEE ALSO
40+
-
41+
42+
AUTHORS
43+
%s
44+
"""
45+
46+
def my_unicode_repr(self, data):
47+
return self.represent_str(data.encode('utf-8'))
48+
49+
def edit_input(text, mg):
50+
info = yaml.load(text)
51+
param = mg['pipeline_parameters']
52+
info['jobid'] = int(mg['job_id'])
53+
info['sequences']['path'] = "../data/"+mg['id']+".050.upload."+param['file_type']
54+
if 'filterLn' in info:
55+
info['filterLn'] = True if param['filter_ln'] == "yes" else False
56+
if 'filterAmbig' in info:
57+
info['filterAmbig'] = True if param['filter_ambig'] == "yes" else False
58+
if 'deviation' in info:
59+
info['deviation'] = float(param['filter_ln_mult'])
60+
if 'maxAmbig' in info:
61+
info['maxAmbig'] = int(param['max_ambig'])
62+
if 'derepPrefix' in info:
63+
if param['dereplicate'] == 'yes':
64+
info['derepPrefix'] = int(param['prefix_length'])
65+
else:
66+
info['derepPrefix'] = 0
67+
if 'minQual' in info:
68+
info['minQual'] = int(param['min_qual'])
69+
if 'maxLqb' in info:
70+
info['maxLqb'] = int(param['max_lqb'])
71+
72+
yaml.representer.Representer.add_representer(unicode, my_unicode_repr)
73+
return yaml.dump(info, allow_unicode=True, default_flow_style=False)
74+
75+
def main(args):
76+
OptionParser.format_description = lambda self, formatter: self.description
77+
OptionParser.format_epilog = lambda self, formatter: self.epilog
78+
parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
79+
parser.add_option("", "--url", dest="url", default=API_URL, help="MG-RAST API url")
80+
parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
81+
parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
82+
parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
83+
parser.add_option("", "--metagenome", dest="metagenome", default=None, help="metagenome ID")
84+
parser.add_option("", "--dir", dest="dir", default=".", help="directory to export to")
85+
parser.add_option("", "--list", dest="list", action="store_true", default=False, help="list files in manifest")
86+
87+
# get inputs
88+
(opts, args) = parser.parse_args()
89+
if not opts.metagenome:
90+
sys.stderr.write("ERROR: a metagenome id is required\n")
91+
return 1
92+
if not os.path.isdir(opts.dir):
93+
sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir)
94+
return 1
95+
96+
# get auth
97+
token = get_auth_token(opts)
98+
99+
# get mg info
100+
url = opts.url+'/metagenome/'+opts.metagenome
101+
mg = obj_from_url(url, auth=token)
102+
103+
# get manifest
104+
url = opts.url+'/researchobject/manifest/'+opts.metagenome
105+
data = obj_from_url(url, auth=token)
106+
107+
# just list
108+
if opts.list:
109+
pt = PrettyTable(["File Name", "Folder", "Media Type"])
110+
for info in data["aggregates"]:
111+
pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]])
112+
pt.align = "l"
113+
print(pt)
114+
return 0
115+
116+
# get cwl files
117+
temp_name = random_str(10)
118+
pipeline_dir = os.path.join(opts.dir, temp_name)
119+
git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
120+
os.system(git_clone)
121+
122+
# download manifest
123+
sha1s = []
124+
base = data["@context"][0]["@base"].strip('/')
125+
manifest_dir = os.path.join(opts.dir, base)
126+
os.mkdir(manifest_dir)
127+
data_str = json.dumps(data)
128+
open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
129+
sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ])
130+
131+
# download aggregates
132+
for info in data["aggregates"]:
133+
sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"]))
134+
folder = info["bundledAs"]["folder"].strip('/')
135+
folder_dir = os.path.join(opts.dir, folder)
136+
if not os.path.isdir(folder_dir):
137+
os.mkdir(folder_dir)
138+
if "githubusercontent" in info["uri"]:
139+
pos = info["uri"].find("CWL")
140+
src = os.path.join(pipeline_dir, info["uri"][pos:])
141+
dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
142+
text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '')
143+
if dst.endswith('job.yaml'):
144+
text = edit_input(text, mg)
145+
open(dst, 'w').write(text)
146+
sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ])
147+
else:
148+
fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w')
149+
s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
150+
fh.close()
151+
sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ])
152+
sys.stdout.write("Done\n")
153+
154+
# output sha1
155+
mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
156+
tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
157+
sha1s.sort(key=lambda x: x[1])
158+
for s1 in sha1s:
159+
if s1[1].startswith('data'):
160+
mansha1.write("%s\t%s\n"%(s1[0], s1[1]))
161+
else:
162+
tagsha1.write("%s\t%s\n"%(s1[0], s1[1]))
163+
mansha1.close()
164+
tagsha1.close()
165+
166+
# cleanup
167+
shutil.rmtree(pipeline_dir)
168+
169+
return 0
170+
171+
172+
if __name__ == "__main__":
173+
sys.exit( main(sys.argv) )

0 commit comments

Comments
 (0)