Skip to content

Commit ed61176

Browse files
authored
Merge pull request #206 from wltrimbl/master
Client maintenence
2 parents 0d68d63 + e52e7c7 commit ed61176

11 files changed

Lines changed: 118 additions & 69 deletions

examples/python/abundance_matrix.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,23 +55,23 @@
5555
else:
5656
print("#"+ URI, file=sys.stdout)
5757

58-
# unpack and display the data table
59-
cols = [x["id"] for x in jsonstructure["columns"]]
60-
rows = [x["id"] for x in jsonstructure["rows"]]
61-
matrixtype = jsonstructure["type"]
62-
58+
matrixtype = jsonstructure["data"]["matrix_type"]
6359
if matrixtype == "sparse":
6460
data = sparse_to_dense(jsonstructure["data"], len(rows), len(cols))
6561
else:
6662
data = jsonstructure["data"]
63+
# unpack and display the data table
64+
cols = [x["id"] for x in data["columns"]]
65+
rows = [x["id"] for x in data["rows"]]
66+
6767

6868
if DEBUG:
6969
print(jsonstructure)
7070
print("COLS", cols)
7171
print("ROWS", rows)
7272
print("TYPE", matrixtype)
7373
print("DATA", data)
74-
h = data
74+
h = data["data"]
7575

7676
sys.stdout.write("Taxon\t")
7777
for j in range(0, len(cols)):

examples/python/annotation_table.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
# rows = [x["id"] for x in jsondata["rows"]]
4848

4949
data = jsondata # ["data"]
50+
data = jsondata["data"]
5051

5152
if DEBUG:
5253
print(jsonstructure)

examples/python/list_all_mg.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import print_function
44
from __future__ import unicode_literals
55
import sys
6+
import time
67

78
from mglib import get_auth_token, obj_from_url, API_URL, urlencode
89

@@ -18,7 +19,7 @@ def printlist(js):
1819
project_id = item["project_id"]
1920
project_name = item["project_name"]
2021
except KeyError:
21-
sys.stderr.write(repr(item))
22+
sys.stderr.write(repr(item) + "\n")
2223
sys.stdout.write(("\t".join([item["metagenome_id"],
2324
# str(len(item.keys())),
2425
repr(public), item["created_on"],
@@ -32,7 +33,7 @@ def printlist(js):
3233
limit = 1000 # initial call
3334

3435
# construct API call
35-
36+
# public = 0 means "don't show public metagenomes"
3637
parameters = {"limit": limit, "order":"created_on", "direction": "asc", "public": "1"}
3738
API_URL= "https://api.mg-rast.org/"
3839

@@ -45,12 +46,17 @@ def printlist(js):
4546
total_count = int(jsonstructure["total_count"])
4647
sys.stderr.write("Total number of records: {:d}\n".format(total_count))
4748

48-
for i in range(0, int(total_count / limit) +1):
49-
sys.stderr.write("Page {:d}\t".format(i))
49+
for i in range(0, int(total_count / limit) +2):
50+
# sys.stderr.write("Page {:d}\t".format(i))
51+
sys.stderr.write("Page {:d}\t{}\n".format(i, base_url))
5052
jsonstructure = obj_from_url(base_url, auth=key)
5153
printlist(jsonstructure)
52-
try:
54+
time.sleep(1)
55+
if "next" in jsonstructure.keys():
5356
next_url = jsonstructure["next"]
5457
base_url = next_url
55-
except KeyError:
58+
continue
59+
else:
60+
sys.stderr.write("No next, page {} url {} \n".format(i, base_url))
61+
sys.stderr.write(repr(jsonstructure))
5662
break

mglib/mglib.py

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,24 @@ def body_from_url(url, accept, auth=None, data=None, debug=False, method=None):
7272

7373
# return python struct from JSON output of MG-RAST or Shock API
7474
def obj_from_url(url, auth=None, data=None, debug=False, method=None):
75-
result = body_from_url(url, 'application/json', auth=auth, data=data, debug=debug, method=method)
76-
if result.headers["content-type"] == "application/x-download":
77-
return(result.read())
75+
try:
76+
result = body_from_url(url, 'application/json', auth=auth, data=data, debug=debug, method=method)
77+
read = result.read()
78+
except: # try one more time ConnectionResetError is incompatible with python2
79+
result = body_from_url(url, 'application/json', auth=auth, data=data, debug=debug, method=method)
80+
read = result.read()
81+
if result.headers["content-type"] == "application/x-download" or result.headers["content-type"] == "application/octet-stream":
82+
return(read) # Watch out!
83+
if result.headers["content-type"][0:9] == "text/html": # json decoder won't work
84+
return(read) # Watch out!
85+
if result.headers["content-type"] == "application/json": # If header is set, this should work
86+
data = read.decode("utf8")
87+
obj = json.loads(data)
7888
else:
79-
obj = json.loads(result.read().decode("utf8"))
89+
data = read.decode("utf8")
90+
obj = json.loads(data)
8091
if obj is None:
81-
sys.stderr.write("ERROR: return structure not valid json format\n")
92+
sys.stderr.write("ERROR: return structure not valid json format\n" + repr(data))
8293
sys.exit(1)
8394
if len(list(obj.keys())) == 0:
8495
if debug:
@@ -121,33 +132,44 @@ def async_rest_api(url, auth=None, data=None, debug=False, delay=60):
121132
except:
122133
parameters = {"asynchronous": 1}
123134
submit = obj_from_url(url, auth=auth, data=data, debug=debug)
124-
# If "status" is nor present, or if "status" is somehow not "submitted"
135+
# If "status" is nor present, or if "status" is somehow not "submitted"
125136
# assume this is not an asynchronous call and it's done.
126-
if ('status' in submit) and (submit['status'] != 'submitted') and ('data' in submit):
127-
return submit['data']
137+
if type(submit) == bytes: # can't decode
138+
try:
139+
return decode("utf-8", submit)
140+
except:
141+
return submit
142+
if ('status' in submit) and (submit['status'] != 'submitted') and (submit['status'] != "processing") and ('data' in submit):
143+
return submit
144+
if not ('url' in submit.keys()):
145+
return submit
128146
# if not (('status' in submit) and (submit['status'] == 'submitted') and ('url' in submit)):
129147
# return submit # No status, no url and no submitted
130148
result = obj_from_url(submit['url'], auth=auth, debug=debug)
131-
try:
132-
while result['status'] == 'submitted':
149+
if type(result) is bytes:
150+
return(result)
151+
if 'status' in result.keys():
152+
while result['status'] == 'submitted' or result['status'] == "processing":
133153
if debug:
134154
print("waiting %d seconds ..."%delay)
135155
time.sleep(delay)
136156
result = obj_from_url(submit['url'], auth=auth, debug=debug)
137-
except KeyError:
157+
if 'url' in result.keys() or 'next' in result.keys(): # does not need to wait
158+
return(result)
159+
try:
138160
print("Error in response to "+url, file=sys.stderr)
139-
print("Does not contain 'status' field, likely API syntax error", file=sys.stderr)
161+
print("Does not contain 'status' or 'next' field, likely API syntax error", file=sys.stderr)
140162
print(json.dumps(result), file=sys.stderr)
141163
sys.exit(1)
142164
except TypeError: # result isn't json, return it anyway
143165
return(result.decode("utf8"))
144-
try:
166+
try:
145167
if 'ERROR' in result['data']:
146168
sys.stderr.write("ERROR: %s\n" %result['data']['ERROR'])
147169
print(json.dumps(result), file=sys.stderr)
148170
sys.exit(1)
149171
except KeyError: # result doesn't have "data"
150-
return result
172+
return result
151173
return result['data']
152174

153175
# POST file to MG-RAST or Shock
@@ -214,14 +236,15 @@ def sparse_to_dense(sMatrix, rmax, cmax):
214236
# transform BIOM format to tabbed table
215237
# returns max value of matrix
216238
def biom_to_tab(biom, hdl, rows=None, use_id=True, col_name=False):
239+
assert 'matrix_type' in biom.keys(), repr(biom)
217240
if biom['matrix_type'] == 'sparse':
218241
matrix = sparse_to_dense(biom['data'], biom['shape'][0], biom['shape'][1])
219242
else:
220243
matrix = biom['data']
221244
if col_name:
222-
hdl.write( "\t%s\n" %"\t".join([c['name'] for c in biom['columns']]) )
245+
hdl.write("\t%s\n" %"\t".join([c['name'] for c in biom['columns']]))
223246
else:
224-
hdl.write( "\t%s\n" %"\t".join([c['id'] for c in biom['columns']]) )
247+
hdl.write("\t%s\n" %"\t".join([c['id'] for c in biom['columns']]))
225248
rowmax = []
226249
for i, row in enumerate(matrix):
227250
name = biom['rows'][i]['id']
@@ -231,7 +254,7 @@ def biom_to_tab(biom, hdl, rows=None, use_id=True, col_name=False):
231254
continue
232255
try:
233256
rowmax.append(max(row))
234-
hdl.write( "%s\t%s\n" %(name, "\t".join(map(str, row))) )
257+
hdl.write("%s\t%s\n" %(name, "\t".join(map(str, row))))
235258
except:
236259
try:
237260
hdl.close()
@@ -262,6 +285,7 @@ def profile_to_matrix(p):
262285
p['matrix_element_type'] = 'int'
263286
p['matrix_element_value'] = 'abundance'
264287
p['date'] = time.strftime("%Y-%m-%d %H:%M:%S")
288+
assert 'matrix_type' in p.keys(), repr(p)
265289
if p['matrix_type'] == 'sparse':
266290
p['data'] = sparse_to_dense(p['data'], p['shape'][0], p['shape'][1])
267291
if trim:
@@ -301,6 +325,7 @@ def merge_biom(b1, b2):
301325
"id": b1['id']+'_'+b2['id'],
302326
"type": b1['type'] }
303327
# make sure we are dense
328+
assert 'matrix_type' in b2.keys(), repr(b2)
304329
if b2['matrix_type'] == 'sparse':
305330
b2['data'] = sparse_to_dense(b2['data'], b2['shape'][0], b2['shape'][1])
306331
# get lists of ids
@@ -352,14 +377,15 @@ def biom_to_matrix(biom, col_name=False, sig_stats=False):
352377
except KeyError:
353378
rows = [r['id'] for r in biom['rows']]
354379
# rows = [";".join(r['metadata']['hierarchy']) for r in biom['rows']]
380+
assert "matrix_type" in biom.keys(), repr(biom)
355381
if biom['matrix_type'] == 'sparse':
356382
data = sparse_to_dense(biom['data'], len(rows), len(cols))
357383
else:
358384
data = biom['data']
359385
if sig_stats and ('significance' in biom['rows'][0]['metadata']) and (len(biom['rows'][0]['metadata']['significance']) > 0):
360-
cols.extend( [s[0] for s in biom['rows'][0]['metadata']['significance']] )
386+
cols.extend([s[0] for s in biom['rows'][0]['metadata']['significance']] )
361387
for i, r in enumerate(biom['rows']):
362-
data[i].extend( [s[1] for s in r['metadata']['significance']] )
388+
data[i].extend([s[1] for s in r['metadata']['significance']] )
363389
return rows, cols, data
364390

365391
# transform tabbed table to matrix in json format
@@ -382,7 +408,7 @@ def sub_matrix(matrix, ncols):
382408
return matrix
383409
sub = list()
384410
for row in matrix:
385-
sub.append( row[:ncols] )
411+
sub.append(row[:ncols] )
386412
return sub
387413

388414
# return KBase id for MG-RAST id

scripts/mg-abundant-functions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,10 @@ def main(args):
9494
data = obj_from_url(url)
9595
level = 'level4' if opts.level == 'function' else opts.level
9696
sub_ann = set(map(lambda x: x[level], data['data']))
97-
97+
biomorig = biom
98+
biom = biomorig["data"]
9899
# sort data
100+
assert "matrix_type" in biom.keys(), repr(biom)
99101
if biom["matrix_type"] == "sparse":
100102
for d in sorted(biom['data'], key=itemgetter(2), reverse=True):
101103
name = biom['rows'][d[0]]['id'] # if opts.source != 'Subsystems' else biom['rows'][d[0]]['metadata']['ontology'][-1]

scripts/mg-abundant-taxa.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ def main(args):
9292
url = opts.url+'/m5nr/taxonomy?'+urlencode(params, True)
9393
data = obj_from_url(url)
9494
sub_ann = set(map(lambda x: x[opts.level], data['data']))
95+
biomorig = biom
96+
biom = biomorig["data"]
9597
if biom['matrix_type'] == "dense":
9698
data = biom['data']
9799
else:

scripts/mg-compare-functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def main(args):
166166
if opts.format == 'biom':
167167
out_hdl.write(json.dumps(biom)+"\n")
168168
else:
169-
biom_to_tab(biom, out_hdl, rows=sub_ann)
169+
biom_to_tab(biom["data"], out_hdl, rows=sub_ann)
170170

171171
out_hdl.close()
172172
return 0

scripts/mg-download.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,13 @@
3838

3939
# download a file
4040
def file_download(auth, info, dirpath="."):
41-
fhandle = open(os.path.join(dirpath, info['file_name']), 'w')
4241
sys.stdout.write("Downloading %s for %s ... "%(info['file_name'], info['id']))
43-
file_from_url(info['url'], fhandle, auth=auth)
44-
fhandle.close()
42+
if "url" in info.keys(): # all is well
43+
fhandle = open(os.path.join(dirpath, info['file_name']), 'w')
44+
file_from_url(info['url'], fhandle, auth=auth)
45+
fhandle.close()
46+
else: # Don't open empty file if download doesn't have url
47+
sys.stderr.write("WARNING Download info does not contain url. Possibly datasets pre- human screening?\n" + repr(info)+"\n")
4548
sys.stdout.write("Done\n")
4649

4750
def main(args):
@@ -54,42 +57,47 @@ def main(args):
5457
parser.add_argument("--token", dest="token", default=None, help="OAuth token")
5558
parser.add_argument("--project", dest="project", default=None, help="project ID")
5659
parser.add_argument("--metagenome", dest="metagenome", default=None, help="metagenome ID")
57-
parser.add_argument("--file", dest="file", default=None, help="file ID for given project or metagenome")
60+
parser.add_argument("--file", dest="file", default="299.1", help="file ID for given project or metagenome")
5861
parser.add_argument("--dir", dest="dir", default=".", help="directory to do downloads")
5962
parser.add_argument("--list", dest="list", action="store_true", default=False, help="list files and their info for given ID")
60-
63+
6164
# get inputs
6265
opts = parser.parse_args()
63-
if not (opts.project or opts.metagenome):
66+
PROJECT = opts.project
67+
DOWNDIR = opts.dir
68+
METAGENOME = opts.metagenome
69+
LIST = opts.list
70+
FILE = opts.file
71+
URL = opts.url
72+
73+
if not (PROJECT or METAGENOME):
6474
sys.stderr.write("ERROR: a project or metagenome id is required\n")
6575
return 1
66-
if not os.path.isdir(opts.dir):
67-
sys.stderr.write("ERROR: dir '%s' does not exist\n"%opts.dir)
76+
if not os.path.isdir(DOWNDIR):
77+
sys.stderr.write("ERROR: dir '%s' does not exist\n"%DOWNDIR)
6878
return 1
69-
downdir = opts.dir
70-
79+
7180
# get auth
7281
token = get_auth_token(opts)
73-
82+
7483
# get metagenome list
7584
mgs = []
76-
if opts.project:
77-
url = opts.url+'/project/'+opts.project+'?verbosity=full'
85+
if PROJECT:
86+
url = URL +'/project/'+PROJECT+'?verbosity=full'
7887
data = obj_from_url(url, auth=token)
7988
for mg in data['metagenomes']:
8089
mgs.append(mg["metagenome_id"])
81-
elif opts.metagenome:
82-
mgs.append(opts.metagenome)
83-
90+
elif METAGENOME:
91+
mgs.append(METAGENOME)
8492
# get file lists
8593
all_files = {}
8694
for mg in mgs:
87-
url = opts.url+'/download/'+mg
95+
url = URL + '/download/' + mg
8896
data = obj_from_url(url, auth=token)
8997
all_files[mg] = data['data']
90-
98+
9199
# just list
92-
if opts.list:
100+
if LIST:
93101
pt = PrettyTable(["Metagenome", "File Name", "File ID", "Checksum", "Byte Size"])
94102
for mg, files in all_files.items():
95103
for f in files:
@@ -99,27 +107,31 @@ def main(args):
99107
pt.align['Byte Size'] = "r"
100108
print(pt)
101109
return 0
102-
110+
103111
# download all in dirs by ID
104-
if opts.project:
105-
downdir = os.path.join(downdir, opts.project)
106-
if not os.path.isdir(downdir):
107-
os.mkdir(downdir)
112+
if PROJECT:
113+
DOWNDIR = os.path.join(DOWNDIR, PROJECT)
114+
if not os.path.isdir(DOWNDIR):
115+
os.mkdir(DOWNDIR)
108116
for mg, files in all_files.items():
109-
mgdir = os.path.join(downdir, mg)
117+
mgdir = os.path.join(DOWNDIR, mg)
110118
if not os.path.isdir(mgdir):
111119
os.mkdir(mgdir)
120+
filecount = 0
112121
for f in files:
113-
if opts.file:
114-
if f['file_id'] == opts.file:
122+
if FILE:
123+
if f['file_id'] == FILE:
124+
filecount += 1
115125
file_download(token, f, dirpath=mgdir)
116-
elif f['file_name'] == opts.file:
126+
elif f['file_name'] == FILE:
127+
filecount += 1
117128
file_download(token, f, dirpath=mgdir)
118129
else:
119130
file_download(token, f, dirpath=mgdir)
120-
131+
filecount += 1
132+
if filecount == 0:
133+
sys.exit("Didn't find file number " + FILE)
121134
return 0
122135

123-
124136
if __name__ == "__main__":
125137
sys.exit(main(sys.argv))

0 commit comments

Comments
 (0)