Skip to content

Commit 2ad2bd8

Browse files
authored
Merge pull request #200 from wltrimbl/master
added excel-exporter
2 parents f14e3a1 + 2d9325e commit 2ad2bd8

4 files changed

Lines changed: 219 additions & 6 deletions

File tree

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ Repository of scripts and libraries for using the MG-RAST API and MG-RAST data
1919

2020
- Python libs
2121
- prettytable - <https://pypi.python.org/pypi/PrettyTable>
22-
- poster - <https://pypi.python.org/pypi/poster>
23-
For python3, you will need a python3 port of poster: <https://github.com/mrd1no/poster-0.8.1-for-Python-3.4>
2422
- requests - <http://docs.python-requests.org/en/latest>
2523
- requests_toolbelt - <https://github.com/sigmavirus24/requests-toolbelt>
2624
- scipy - <http://www.scipy.org>

scripts/mg-export-metadata.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/usr/bin/env python
2+
3+
from __future__ import print_function
4+
5+
import sys
6+
from argparse import ArgumentParser
7+
import xlsxwriter
8+
9+
from mglib import VERSION, get_auth_token, AUTH_LIST, obj_from_url
10+
11+
prehelp = """
12+
NAME
13+
mg-export-metadata.py
14+
15+
VERSION
16+
%s
17+
18+
SYNOPSIS
19+
mg-export-metadata [ --help] --project <project id>
20+
21+
DESCRIPTION
22+
Retrieve metadata for a metagenome.
23+
"""
24+
25+
posthelp = """
26+
Output
27+
excel file named "mgpXXXX-export.xlsx"
28+
29+
EXAMPLES
30+
mg-export-metadata.py --project mgp128
31+
32+
SEE ALSO
33+
-
34+
35+
AUTHORS
36+
%s
37+
"""
38+
39+
def get_project_keys(meta):
40+
keys = set()
41+
for key in meta["data"].keys():
42+
keys.add(key)
43+
if "project_name" in keys: keys.remove("project_name")
44+
return(["project_name"] + list(keys))
45+
46+
def get_sample_keys(meta):
47+
keys = set()
48+
for sample in meta["samples"]:
49+
for key in sample["data"].keys():
50+
keys.add(key)
51+
if "sample_name" in keys: keys.remove("sample_name")
52+
return(["sample_name"] + list(keys))
53+
54+
def get_library_keys(meta):
55+
keys = set()
56+
for sample in meta["samples"]:
57+
for lib in sample["libraries"]:
58+
for key in lib["data"].keys():
59+
keys.add(key)
60+
keys.remove("sample_name")
61+
return(["sample_name"]+list(keys))
62+
def get_eps(meta):
63+
eps = set()
64+
for sample in meta["samples"]:
65+
if "envPackage" in sample.keys():
66+
eps.add(sample["envPackage"]["type"])
67+
return(list(eps))
68+
69+
def get_ep_keys(meta, eps):
70+
epkeys = {ep: set() for ep in eps}
71+
for sample in meta["samples"]:
72+
ep = sample["envPackage"]["type"]
73+
for key in sample["envPackage"]["data"].keys():
74+
epkeys[ep].add(key)
75+
epkeysl = {}
76+
for ep in eps:
77+
epkeys[ep].remove("sample_name")
78+
epkeysl[ep] = ["sample_name"] + list(epkeys[ep])
79+
return(epkeysl)
80+
81+
def write_worksheet_value(worksheet, r, c, v, f):
82+
# write_worksheet_value(worksheet, row, col, value, fmt)
83+
if f == "text" or f == "ontology" or f == "select" or f == "timezone" or v == "":
84+
worksheet.write_string(r, c, v)
85+
elif f == "float" or f == "coordinate" or f == "int":
86+
worksheet.write_number(r, c, float(v))
87+
elif f == "date":
88+
worksheet.write_string(r, c, v)
89+
elif f == "time":
90+
worksheet.write_string(r, c, v)
91+
else:
92+
print("warning, falllback for format ", f)
93+
worksheet.write_string(r, c, v)
94+
return
95+
96+
def main(args):
97+
ArgumentParser.format_description = lambda self, formatter: self.description
98+
ArgumentParser.format_epilog = lambda self, formatter: self.epilog
99+
parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
100+
parser.add_argument("--project", dest="project", default=None, help="project ID")
101+
102+
# get inputs
103+
opts = parser.parse_args()
104+
if not opts.project or opts.project[0:3] != "mgp":
105+
sys.stderr.write("ERROR: a project id is required\n")
106+
return 1
107+
# get auth
108+
PROJECT = opts.project
109+
110+
TOKEN = get_auth_token(opts)
111+
112+
# export metadata
113+
114+
outfile = PROJECT + "-export.xlsx"
115+
#
116+
k = obj_from_url("http://api.mg-rast.org/metadata/export/{project}?verbosity=full".format(project=PROJECT), auth=TOKEN)
117+
metadata = k # json.loads(open(infile).read())
118+
119+
workbook = xlsxwriter.Workbook(outfile)
120+
print("Creating", outfile)
121+
worksheet = {}
122+
worksheet["README"] = workbook.add_worksheet("README")
123+
row = 0
124+
for i in range(10):
125+
worksheet["README"].write_number(row, 0, i)
126+
row += 1
127+
128+
worksheet["project"] = workbook.add_worksheet("project")
129+
project_keys = get_project_keys(metadata)
130+
col = 0
131+
for l in project_keys:
132+
value = metadata["data"][l]["value"]
133+
definition = metadata["data"][l]["definition"]
134+
worksheet["project"].write_string(0, col, l)
135+
worksheet["project"].write_string(1, col, definition)
136+
worksheet["project"].write_string(2, col, value)
137+
col += 1
138+
139+
worksheet["sample"] = workbook.add_worksheet("sample")
140+
141+
samplekeys = get_sample_keys(metadata)
142+
143+
col = 0
144+
row = 2
145+
for sample in metadata["samples"]:
146+
for l in samplekeys:
147+
if l in sample["data"].keys():
148+
value = sample["data"][l]["value"]
149+
definition = sample["data"][l]["definition"]
150+
fmt = sample["data"][l]["type"]
151+
worksheet["sample"].write_string(0, col, l)
152+
worksheet["sample"].write_string(1, col, definition)
153+
write_worksheet_value(worksheet["sample"], row, col, value, fmt)
154+
col += 1
155+
col = 0
156+
row += 1
157+
try:
158+
librarytype = metadata["samples"][0]["libraries"][0]["data"]["investigation_type"]["value"]
159+
except IndexError:
160+
sys.exit("This metadata bundle does not have any libraries")
161+
162+
worksheet["library"] = workbook.add_worksheet("library "+librarytype)
163+
164+
libkeys = get_library_keys(metadata)
165+
col = 0
166+
row = 2
167+
for sample in metadata["samples"]:
168+
for l in libkeys:
169+
if l in sample["libraries"][0]["data"].keys():
170+
value = sample["libraries"][0]["data"][l]["value"]
171+
definition = sample["libraries"][0]["data"][l]["definition"]
172+
fmt = sample["libraries"][0]["data"][l]["type"]
173+
worksheet["library"].write_string(0, col, l)
174+
worksheet["library"].write_string(1, col, definition)
175+
write_worksheet_value(worksheet["library"], row, col, value, fmt)
176+
col += 1
177+
col = 0
178+
row += 1
179+
180+
eps = get_eps(metadata)
181+
print("eps", " ".join(eps))
182+
epcol = {}
183+
eprow = {}
184+
for ep in eps:
185+
worksheet[ep] = workbook.add_worksheet("ep " + ep)
186+
epcol[ep] = 0
187+
eprow[ep] = 2
188+
epkeys = get_ep_keys(metadata, eps)
189+
for sample in metadata["samples"]:
190+
ep = sample["envPackage"]["type"]
191+
for l in epkeys[ep]:
192+
try:
193+
value = sample["envPackage"]["data"][l]["value"]
194+
definition = sample["envPackage"]["data"][l]["definition"]
195+
fmt = sample["envPackage"]["data"][l]["type"]
196+
except KeyError:
197+
value = "" ; definition = ""; fmt = "string"
198+
199+
worksheet[ep].write_string(0, epcol[ep], l)
200+
worksheet[ep].write_string(1, epcol[ep], definition)
201+
write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value, fmt)
202+
epcol[ep] += 1
203+
epcol[ep] = 0
204+
eprow[ep] += 1
205+
206+
workbook.close()
207+
208+
if __name__ == "__main__":
209+
sys.exit(main(sys.argv))

tests/test_dependencies.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ def test_dependencies():
55
except ImportError:
66
print("numpy not found. ")
77

8-
try:
9-
import poster
10-
except ImportError:
11-
print("poster not found. ")
128
try:
139
import requests
1410
except ImportError:

tests/test_mgrtools.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,16 @@ def test_mg_m5nrtoolspl():
223223
assert stat == 0
224224
assert b"alcohol dehydrogenase" in out
225225

226+
def test_query_matrix():
227+
s='''mg-query.py 'http://api.mg-rast.org/matrix/organism?group_level=phylum&source=SEED&hit_type=single&result_type=abundance&evalue=1&identity=60&length=15&taxid=0&id=mgm4510219.3' > matrix.biom'''
228+
stat, out, err = runme(s)
229+
assert stat == 0
230+
def test_mg_biom_view():
231+
s='''mg-biom-view.py < mgm4514486.3.refseq.biom.json'''
232+
stat, out, err = runme(s)
233+
assert stat == 0
234+
assert b'Bacteria;Proteobacteria;Gammaproteobacteria;Vibrionales;Vibrionaceae;Vibrio;Vibrio cholerae;Vibrio cholerae BX 330286 8' in out
235+
226236
@pytest.mark.known_failing
227237
def test_known_failing():
228238
assert False # This should not normally run

0 commit comments

Comments
 (0)