Skip to content

Commit c0e24ac

Browse files
Fix PageRank normalization and dangling node handling
1 parent e3b01ec commit c0e24ac

2 files changed

Lines changed: 63 additions & 15 deletions

File tree

graphs/page_rank.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,37 @@ def __repr__(self):
3131
return f"<node={self.name} inbound={self.inbound} outbound={self.outbound}>"
3232

3333

34-
def page_rank(nodes, limit=3, d=0.85):
35-
ranks = {}
36-
for node in nodes:
37-
ranks[node.name] = 1
34+
def page_rank(nodes, limit=None, d=0.85, tol=1e-8, max_iter=100):
35+
if not nodes:
36+
return {}
37+
38+
if limit is not None:
39+
max_iter = limit
40+
41+
n = len(nodes)
42+
ranks = {node.name: 1.0 / n for node in nodes}
43+
outbounds = {node.name: len(node.outbound) for node in nodes}
44+
45+
for _ in range(max_iter):
46+
new_ranks = {}
47+
dangling_sum = sum(
48+
ranks[node.name] for node in nodes if outbounds[node.name] == 0
49+
)
50+
51+
for node in nodes:
52+
inbound_rank = sum(
53+
ranks[inbound_node] / outbounds[inbound_node]
54+
for inbound_node in node.inbound
55+
)
56+
new_ranks[node.name] = (1 - d) / n + d * (
57+
inbound_rank + dangling_sum / n
58+
)
3859

39-
outbounds = {}
40-
for node in nodes:
41-
outbounds[node.name] = len(node.outbound)
60+
if sum(abs(new_ranks[name] - ranks[name]) for name in ranks) < tol:
61+
return new_ranks
62+
ranks = new_ranks
4263

43-
for i in range(limit):
44-
print(f"======= Iteration {i + 1} =======")
45-
for _, node in enumerate(nodes):
46-
ranks[node.name] = (1 - d) + d * sum(
47-
ranks[ib] / outbounds[ib] for ib in node.inbound
48-
)
49-
print(ranks)
64+
return ranks
5065

5166

5267
def main():
@@ -64,7 +79,8 @@ def main():
6479
for node in nodes:
6580
print(node)
6681

67-
page_rank(nodes)
82+
print("======= Page Rank =======")
83+
print(page_rank(nodes))
6884

6985

7086
if __name__ == "__main__":

graphs/tests/test_page_rank.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import math
2+
3+
from graphs.page_rank import Node, page_rank
4+
5+
6+
def add_edge(nodes, source, destination):
7+
nodes[destination].add_inbound(nodes[source].name)
8+
nodes[source].add_outbound(nodes[destination].name)
9+
10+
11+
def test_page_rank_scores_are_normalized():
12+
nodes = [Node("A"), Node("B"), Node("C")]
13+
add_edge(nodes, 0, 1)
14+
add_edge(nodes, 0, 2)
15+
add_edge(nodes, 1, 2)
16+
add_edge(nodes, 2, 0)
17+
18+
ranks = page_rank(nodes, max_iter=100)
19+
20+
assert math.isclose(sum(ranks.values()), 1.0, abs_tol=1e-8)
21+
22+
23+
def test_page_rank_handles_dangling_nodes():
24+
nodes = [Node("A"), Node("B"), Node("C")]
25+
add_edge(nodes, 0, 1)
26+
add_edge(nodes, 1, 2)
27+
28+
ranks = page_rank(nodes, max_iter=100)
29+
30+
assert math.isclose(sum(ranks.values()), 1.0, abs_tol=1e-8)
31+
assert math.isclose(ranks["C"], 0.474412, rel_tol=1e-5)
32+
assert ranks["C"] > ranks["B"] > ranks["A"]

0 commit comments

Comments
 (0)