-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathurl.py
More file actions
34 lines (25 loc) · 655 Bytes
/
url.py
File metadata and controls
34 lines (25 loc) · 655 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import json
import requests
from bs4 import BeautifulSoup
import re
from os.path import join
from urllib.parse import urljoin
import argparse
parser = argparse.ArgumentParser(description='arguments')
parser.add_argument("--a")
args = parser.parse_args()
URL= args.a
#input("Enter a URL :")#"https://www.geeksforgeeks.org/data-structures/"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html5lib')
#print(soup.prettify())
ls=[]
for link in soup.find_all('a', href=True):
ls.append(urljoin(URL,link['href']))
#print(ls)
website={}
website["site"]=URL
website["links"]=ls
#print(website)
jsonfile=json.dumps(website)
print(jsonfile)