Skip to content

Commit 91d0833

Browse files
committed
research object
1 parent 1318bdc commit 91d0833

1 file changed

Lines changed: 173 additions & 0 deletions

File tree

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#!/usr/bin/env python
2+
3+
import sys
4+
import os
5+
import json
6+
import yaml
7+
import shutil
8+
import hashlib
9+
from optparse import OptionParser
10+
from prettytable import PrettyTable
11+
from mglib import VERSION, get_auth_token, AUTH_LIST, API_URL, obj_from_url, file_from_url, random_str
12+
13+
VERSION = 'alpha'
14+
15+
prehelp = """
16+
NAME
17+
mg-export-research-object
18+
19+
VERSION
20+
%s
21+
22+
SYNOPSIS
23+
mg-export-research-object [ --help, --user <user>, --passwd <password>, --token <oAuth token>, --metagenome <metagenome id>, --dir <directory name> --list <list manifest>]
24+
25+
DESCRIPTION
26+
Retrieve metagenome research object.
27+
Note: This is an alpha version and currently does not produce a full Research Object.
28+
"""
29+
30+
posthelp = """
31+
Output
32+
List available files in manifest.
33+
OR
34+
Download research object from manifest.
35+
36+
EXAMPLES
37+
mg-export-research-object --metagenome mgm4441680.3 --list
38+
39+
SEE ALSO
40+
-
41+
42+
AUTHORS
43+
%s
44+
"""
45+
46+
def my_unicode_repr(self, data):
47+
return self.represent_str(data.encode('utf-8'))
48+
49+
def edit_input(text, mg):
50+
info = yaml.load(text)
51+
param = mg['pipeline_parameters']
52+
info['jobid'] = int(mg['job_id'])
53+
info['sequences']['path'] = "../data/"+mg['id']+".050.upload."+param['file_type']
54+
if 'filterLn' in info:
55+
info['filterLn'] = True if param['filter_ln'] == "yes" else False
56+
if 'filterAmbig' in info:
57+
info['filterAmbig'] = True if param['filter_ambig'] == "yes" else False
58+
if 'deviation' in info:
59+
info['deviation'] = float(param['filter_ln_mult'])
60+
if 'maxAmbig' in info:
61+
info['maxAmbig'] = int(param['max_ambig'])
62+
if 'derepPrefix' in info:
63+
if param['dereplicate'] == 'yes':
64+
info['derepPrefix'] = int(param['prefix_length'])
65+
else:
66+
info['derepPrefix'] = 0
67+
if 'minQual' in info:
68+
info['minQual'] = int(param['min_qual'])
69+
if 'maxLqb' in info:
70+
info['maxLqb'] = int(param['max_lqb'])
71+
72+
yaml.representer.Representer.add_representer(unicode, my_unicode_repr)
73+
return yaml.dump(info, allow_unicode=True, default_flow_style=False)
74+
75+
def main(args):
76+
OptionParser.format_description = lambda self, formatter: self.description
77+
OptionParser.format_epilog = lambda self, formatter: self.epilog
78+
parser = OptionParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
79+
parser.add_option("", "--url", dest="url", default=API_URL, help="MG-RAST API url")
80+
parser.add_option("", "--user", dest="user", default=None, help="OAuth username")
81+
parser.add_option("", "--passwd", dest="passwd", default=None, help="OAuth password")
82+
parser.add_option("", "--token", dest="token", default=None, help="OAuth token")
83+
parser.add_option("", "--metagenome", dest="metagenome", default=None, help="metagenome ID")
84+
parser.add_option("", "--dir", dest="dir", default=".", help="directory to export to")
85+
parser.add_option("", "--list", dest="list", action="store_true", default=False, help="list files in manifest")
86+
87+
# get inputs
88+
(opts, args) = parser.parse_args()
89+
if not opts.metagenome:
90+
sys.stderr.write("ERROR: a metagenome id is required\n")
91+
return 1
92+
if not os.path.isdir(opts.dir):
93+
sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir)
94+
return 1
95+
96+
# get auth
97+
token = get_auth_token(opts)
98+
99+
# get mg info
100+
url = opts.url+'/metagenome/'+opts.metagenome
101+
mg = obj_from_url(url, auth=token)
102+
103+
# get manifest
104+
url = opts.url+'/researchobject/manifest/'+opts.metagenome
105+
data = obj_from_url(url, auth=token)
106+
107+
# just list
108+
if opts.list:
109+
pt = PrettyTable(["File Name", "Folder", "Media Type"])
110+
for info in data["aggregates"]:
111+
pt.add_row([info["bundledAs"]["filename"], info["bundledAs"]["folder"], info["mediatype"]])
112+
pt.align = "l"
113+
print(pt)
114+
return 0
115+
116+
# get cwl files
117+
temp_name = random_str(10)
118+
pipeline_dir = os.path.join(opts.dir, temp_name)
119+
git_clone = "git clone https://github.com/MG-RAST/pipeline.git " + pipeline_dir
120+
os.system(git_clone)
121+
122+
# download manifest
123+
sha1s = []
124+
base = data["@context"][0]["@base"].strip('/')
125+
manifest_dir = os.path.join(opts.dir, base)
126+
os.mkdir(manifest_dir)
127+
data_str = json.dumps(data)
128+
open(os.path.join(manifest_dir, data["manifest"]), 'w').write(data_str)
129+
sha1s.append([ hashlib.sha1(data_str).hexdigest(), os.path.join(base, data["manifest"]) ])
130+
131+
# download aggregates
132+
for info in data["aggregates"]:
133+
sys.stdout.write("Downloading %s ... "%(info["bundledAs"]["filename"]))
134+
folder = info["bundledAs"]["folder"].strip('/')
135+
folder_dir = os.path.join(opts.dir, folder)
136+
if not os.path.isdir(folder_dir):
137+
os.mkdir(folder_dir)
138+
if "githubusercontent" in info["uri"]:
139+
pos = info["uri"].find("CWL")
140+
src = os.path.join(pipeline_dir, info["uri"][pos:])
141+
dst = os.path.join(folder_dir, info["bundledAs"]["filename"])
142+
text = open(src, 'r').read().replace('../Inputs/', '').replace('../Tools/', '').replace('../Workflows/', '')
143+
if dst.endswith('job.yaml'):
144+
text = edit_input(text, mg)
145+
open(dst, 'w').write(text)
146+
sha1s.append([ hashlib.sha1(text).hexdigest(), os.path.join(folder, info["bundledAs"]["filename"]) ])
147+
else:
148+
fh = open(os.path.join(folder_dir, info["bundledAs"]["filename"]), 'w')
149+
s1 = file_from_url(info["uri"], fh, auth=token, sha1=True)
150+
fh.close()
151+
sha1s.append([ s1, os.path.join(folder, info["bundledAs"]["filename"]) ])
152+
sys.stdout.write("Done\n")
153+
154+
# output sha1
155+
mansha1 = open(os.path.join(opts.dir, "manifest-sha1.txt"), 'w')
156+
tagsha1 = open(os.path.join(opts.dir, "tagmanifest-sha1.txt"), 'w')
157+
sha1s.sort(key=lambda x: x[1])
158+
for s1 in sha1s:
159+
if s1[1].startswith('data'):
160+
mansha1.write("%s\t%s\n"%(s1[0], s1[1]))
161+
else:
162+
tagsha1.write("%s\t%s\n"%(s1[0], s1[1]))
163+
mansha1.close()
164+
tagsha1.close()
165+
166+
# cleanup
167+
shutil.rmtree(pipeline_dir)
168+
169+
return 0
170+
171+
172+
if __name__ == "__main__":
173+
sys.exit( main(sys.argv) )

0 commit comments

Comments
 (0)