Skip to content

Commit 6961f8e

Browse files
author
Will Trimble
committed
Added excel-exporting script
1 parent baa5275 commit 6961f8e

1 file changed

Lines changed: 206 additions & 0 deletions

File tree

scripts/mg-export-metadata.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
#!/usr/bin/env python
2+
3+
from __future__ import print_function
4+
5+
import sys
6+
from argparse import ArgumentParser
7+
import xlsxwriter
8+
9+
from mglib import VERSION, get_auth_token, AUTH_LIST, obj_from_url
10+
11+
prehelp = """
12+
NAME
13+
mg-export-metadata.py
14+
15+
VERSION
16+
%s
17+
18+
SYNOPSIS
19+
mg-export-metadata [ --help] --project <project id>
20+
21+
DESCRIPTION
22+
Retrieve metadata for a metagenome.
23+
"""
24+
25+
posthelp = """
26+
Output
27+
excel file named "mgpXXXX-export.xlsx"
28+
29+
EXAMPLES
30+
mg-export-metadata.py --project mgp128
31+
32+
SEE ALSO
33+
-
34+
35+
AUTHORS
36+
%s
37+
"""
38+
39+
def get_project_keys(meta):
40+
keys = set()
41+
for key in meta["data"].keys():
42+
keys.add(key)
43+
keys.remove("project_name")
44+
return(["project_name"] + list(keys))
45+
46+
def get_sample_keys(meta):
47+
keys = set()
48+
for sample in meta["samples"]:
49+
for key in sample["data"].keys():
50+
keys.add(key)
51+
keys.remove("sample_name")
52+
return(["sample_name"] + list(keys))
53+
54+
def get_library_keys(meta):
55+
keys = set()
56+
for sample in meta["samples"]:
57+
for lib in sample["libraries"]:
58+
for key in lib["data"].keys():
59+
keys.add(key)
60+
keys.remove("sample_name")
61+
return(["sample_name"]+list(keys))
62+
def get_eps(meta):
63+
eps = set()
64+
for sample in meta["samples"]:
65+
if "envPackage" in sample.keys():
66+
eps.add(sample["envPackage"]["type"])
67+
return(list(eps))
68+
69+
def get_ep_keys(meta, eps):
70+
epkeys = {ep: set() for ep in eps}
71+
for sample in meta["samples"]:
72+
ep = sample["envPackage"]["type"]
73+
for key in sample["envPackage"]["data"].keys():
74+
epkeys[ep].add(key)
75+
epkeysl = {}
76+
for ep in eps:
77+
epkeys[ep].remove("sample_name")
78+
epkeysl[ep] = ["sample_name"] + list(epkeys[ep])
79+
return(epkeysl)
80+
81+
def write_worksheet_value(worksheet, r, c, v, f):
82+
# write_worksheet_value(worksheet, row, col, value, fmt)
83+
if f == "text" or f == "ontology" or f == "select" or f == "timezone" or v == "":
84+
worksheet.write_string(r, c, v)
85+
elif f == "float" or f == "coordinate" or f == "int":
86+
worksheet.write_number(r, c, float(v))
87+
elif f == "date":
88+
worksheet.write_string(r, c, v)
89+
elif f == "time":
90+
worksheet.write_string(r, c, v)
91+
else:
92+
print("warning, falllback for format ", f)
93+
worksheet.write_string(r, c, v)
94+
return
95+
96+
def main(args):
97+
ArgumentParser.format_description = lambda self, formatter: self.description
98+
ArgumentParser.format_epilog = lambda self, formatter: self.epilog
99+
parser = ArgumentParser(usage='', description=prehelp%VERSION, epilog=posthelp%AUTH_LIST)
100+
parser.add_argument("--project", dest="project", default=None, help="project ID")
101+
102+
# get inputs
103+
opts = parser.parse_args()
104+
if not opts.project or opts.project[0:3] != "mgp":
105+
sys.stderr.write("ERROR: a project id is required\n")
106+
return 1
107+
# get auth
108+
PROJECT = opts.project
109+
110+
TOKEN = get_auth_token(opts)
111+
112+
# export metadata
113+
114+
outfile = PROJECT + "-export.xlsx"
115+
#
116+
k = obj_from_url("http://api.mg-rast.org/metadata/export/{project}?verbosity=full".format(project=PROJECT))
117+
metadata = k # json.loads(open(infile).read())
118+
119+
workbook = xlsxwriter.Workbook(outfile)
120+
print("Creating", outfile)
121+
worksheet = {}
122+
worksheet["README"] = workbook.add_worksheet("README")
123+
row = 0
124+
for i in range(10):
125+
worksheet["README"].write_number(row, 0, i)
126+
row += 1
127+
128+
worksheet["project"] = workbook.add_worksheet("project")
129+
project_keys = get_project_keys(metadata)
130+
col = 0
131+
for l in project_keys:
132+
value = metadata["data"][l]["value"]
133+
definition = metadata["data"][l]["definition"]
134+
worksheet["project"].write_string(0, col, l)
135+
worksheet["project"].write_string(1, col, definition)
136+
worksheet["project"].write_string(2, col, value)
137+
col += 1
138+
139+
worksheet["sample"] = workbook.add_worksheet("sample")
140+
141+
samplekeys = get_sample_keys(metadata)
142+
143+
col = 0
144+
row = 2
145+
for sample in metadata["samples"]:
146+
for l in samplekeys:
147+
if l in sample["data"].keys():
148+
value = sample["data"][l]["value"]
149+
definition = sample["data"][l]["definition"]
150+
fmt = sample["data"][l]["type"]
151+
worksheet["sample"].write_string(0, col, l)
152+
worksheet["sample"].write_string(1, col, definition)
153+
write_worksheet_value(worksheet["sample"], row, col, value, fmt)
154+
col += 1
155+
col = 0
156+
row += 1
157+
158+
librarytype = metadata["samples"][0]["libraries"][0]["data"]["investigation_type"]["value"]
159+
worksheet["library"] = workbook.add_worksheet("library "+librarytype)
160+
161+
libkeys = get_library_keys(metadata)
162+
col = 0
163+
row = 2
164+
for sample in metadata["samples"]:
165+
for l in libkeys:
166+
if l in sample["libraries"][0]["data"].keys():
167+
value = sample["libraries"][0]["data"][l]["value"]
168+
definition = sample["libraries"][0]["data"][l]["definition"]
169+
fmt = sample["libraries"][0]["data"][l]["type"]
170+
worksheet["library"].write_string(0, col, l)
171+
worksheet["library"].write_string(1, col, definition)
172+
write_worksheet_value(worksheet["library"], row, col, value, fmt)
173+
col += 1
174+
col = 0
175+
row += 1
176+
177+
eps = get_eps(metadata)
178+
print("eps", " ".join(eps))
179+
epcol = {}
180+
eprow = {}
181+
for ep in eps:
182+
worksheet[ep] = workbook.add_worksheet("ep " + ep)
183+
epcol[ep] = 0
184+
eprow[ep] = 2
185+
epkeys = get_ep_keys(metadata, eps)
186+
for sample in metadata["samples"]:
187+
ep = sample["envPackage"]["type"]
188+
for l in epkeys[ep]:
189+
try:
190+
value = sample["envPackage"]["data"][l]["value"]
191+
definition = sample["envPackage"]["data"][l]["definition"]
192+
fmt = sample["envPackage"]["data"][l]["type"]
193+
except KeyError:
194+
value = "" ; definition = ""; fmt = "string"
195+
196+
worksheet[ep].write_string(0, epcol[ep], l)
197+
worksheet[ep].write_string(1, epcol[ep], definition)
198+
write_worksheet_value(worksheet[ep], eprow[ep], epcol[ep], value, fmt)
199+
epcol[ep] += 1
200+
epcol[ep] = 0
201+
eprow[ep] += 1
202+
203+
workbook.close()
204+
205+
if __name__ == "__main__":
206+
sys.exit(main(sys.argv))

0 commit comments

Comments
 (0)