forked from dyang108/diningdata
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtestingxpath.py
More file actions
27 lines (25 loc) · 821 Bytes
/
testingxpath.py
File metadata and controls
27 lines (25 loc) · 821 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import urllib
import ast
import re
from lxml import html
from pymongo import MongoClient
if __name__ == "__main__":
notParsable = []
inserted = []
numarr = []
with open(os.path.expanduser("validrecipes.txt")) as f:
for line in f:
line = line.split() # to deal with blank
if line: # lines (ie skip them)
line = [int(i) for i in line]
numarr.append(line[0])
for i in numarr:
index = str(i).zfill(6)
page = urllib.urlopen("http://menus.tufts.edu/foodpro/label.asp?locationNum=09&RecNumAndPort="+index)
htmlSource = page.read()
page.close()
tree = html.fromstring(htmlSource)
element = tree.xpath("/html/body")
print element
print element.text_content()