-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreateJson.py
More file actions
97 lines (64 loc) · 2.09 KB
/
createJson.py
File metadata and controls
97 lines (64 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import sqlite3, sys
conn = sqlite3.connect('spider_database.sqlite')
cur = conn.cursor()
no = input("Enter the number of nodes to visualize in the graph > ")
if len(no) < 1:
print("ATLEAST TWO NODES REQUIRED")
sys.exit()
no_of_nodes = int(no)
cur.execute("""
SELECT COUNT (from_page_id) AS inbound_links, old_rank, new_rank, id, url
FROM Pages JOIN Links ON Pages.id = Links.to_page_id
WHERE html IS NOT NULL AND ERROR IS NULL
GROUP BY id ORDER BY id, inbound_links
""")
graphNodes = []
maxRank = None
minRank = None
for row in cur:
graphNodes.append(row)
rank = row[2]
if maxRank is None or maxRank < rank:
maxRank = rank
if minRank is None or minRank > rank:
minRank = rank
if len(graphNodes) == no_of_nodes:
break
if maxRank == minRank or maxRank is None or minRank is None:
print("ERROR! Please run pagerank.py to calculate page ranks")
# row = (noOfInboundLinks, old_rank, new_rank, id, url)
file = open("./scripts/graph.js", "w")
count = 0
mapping = {} # to map source node to target node
ranks = {}
file.write('const spiderJson = { \n "nodes" : [\n ')
for row in graphNodes:
if count > 0:
file.write(",\n")
rank = row[2]
# normalize ranks
rank = 19 * ( (rank - minRank) / (maxRank - minRank) + 0.01 )
data = '"weight" : {}, "rank" : {}, "id": {}, "url" : "{}"' \
.format(str(row[0]), str(rank), str(row[3]), row[4])
file.write('{' + data + '}' )
mapping[row[3]] = count
ranks[row[3]] = rank
count += 1
cur.execute('''SELECT from_page_id, to_page_id FROM Links''')
file.write('],\n"links" : [\n')
count = 0
for row in cur :
if row[0] not in mapping or row[1] not in mapping :
continue
if count > 0 :
file.write(',\n')
rank = ranks[row[0]]
data = '"source" : {}, "target" : {}' \
.format(str(mapping[row[0]]), str(mapping[row[1]]))
file.write('{' + data + '}')
count += 1
file.write('\n]\n}')
file.close()
cur.close()
print("Successfully wrote data to graph.js")
print("Open view.html to view the pagerank graph")