-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathbase.py
More file actions
85 lines (83 loc) · 2.07 KB
/
base.py
File metadata and controls
85 lines (83 loc) · 2.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from Tkinter import *
from bs4 import BeautifulSoup
import HTMLParser
import urllib
import os
import re
from urlparse import urljoin
root = Tk()
l1 = Label(root)
l2 = Label(root)
e1 = Entry(root)
e2 = Entry(root)
l = Label(root)
l1.config(text = "url")
l2.config(text = "path")
def callback():
urlInput = e1.get()
path=e2.get()
print urlInput
print path
html = urllib.urlopen(urlInput).read()
soup = BeautifulSoup(html)
tags = soup('a')
urls = set()
finalUrls=set()
try:
script(urlInput)
css(urlInput)
except:
print 'none'
for tag in tags:
urls.add(urljoin(urlInput, tag.get('href')))
for url in urls:
if re.match(urlInput,url):
finalUrls.add(url)
for url in finalUrls:
print url
try:
script(url,path)
css(url,path)
print "\n"
except:
print 'none'
def script(url,path):
Folder=url.replace('http://',"")
subFolder=os.path.join(path+"/"+Folder)
if not os.path.exists(subFolder):
os.makedirs(subFolder)
completeName=os.path.join(subFolder+"/SCRIPT.txt")
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
fo=open(completeName,'w+')
fo.seek(0)
fo.truncate()
scripts=soup.find_all('script')
for tag in scripts:
try:
File = tag["src"]
fo.write(File+"\n")
except:
print "no source"
def css(url,path):
Folder=url.replace('http://',"")
subFolder=os.path.join(path+"/"+Folder)
if not os.path.exists(subFolder):
os.makedirs(subFolder)
completeName=os.path.join(subFolder+"/CSS.txt")
fo=open(completeName,'w+')
fo.seek(0)
fo.truncate()
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
css=soup.find_all('link')
for tag in css:
try:
File = tag["href"]
fo.write(File+"\n")
except:
print "no source"
b = Button(root, text="extrack", command=callback)
for widget in (l1,e1,l2, e2, l, b):
widget.pack()
b.mainloop()