Python_repository/Grapgh Theory/PageRank.py at master · YoTro/Python_repository · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import heapq
from scipy.sparse import csc_matrix


def pageRank(G, s=.85, maxerr=.0001):
    """
    Computes the pagerank for each of the n states
    Parameters
    $$R = ( d M + \frac { 1 - d } { n } E ) R = A R$$
    ----------
    G: matrix representing state transitions
       Gij is a binary value representing a transition from state i to j.
    s: probability of following a transition. 1-s probability of teleporting
       to another state.
    maxerr: if the sum of pageranks between iterations is bellow this we will
            have converged.
    """
    n = G.shape[0]

    # compressed sparse G-matrix according to column
    A = csc_matrix(G, dtype=np.float)
    #Retrun one dimensional array that the count of every rows' nonzero numbers
    rsums = np.array(A.sum(1))[:, 0]
    #return the indexs of nonzero which include (row,col)
    ri, ci = A.nonzero()
    #probability l(pij)
    A.data /= rsums[ri]
    # bool array of sink states
    sink = rsums == 0
    #initialize r
    ro, r = np.zeros(n), np.ones(n)
    # Compute pagerank r with Power iteration until converge
    while np.sum(np.abs(r - ro)) > maxerr:
        #initialize ro
        ro = r.copy()
        # calculate each pagerank at a time
        for i in range(n):
            # inlinks of state i
            Ai = np.array(A[:, i].todense())[:, 0]
            # account for sink states
            Di = sink / float(n)
            # account for teleportation to state i
            Ei = np.ones(n) / float(n)

            r[i] = ro.dot(Ai * s + Di * s + Ei * (1 - s))
    # return normalized pagerank
    return r / float(sum(r))

if __name__ == '__main__':
    #adjacency matrix
    G = np.array([[1,0,1,0,0,0,0],
                  [0,1,0,0,0,0,0],
                  [0,0,1,1,0,0,0],
                  [0,0,0,0,1,0,0],
                  [1,0,0,0,0,0,1],
                  [0,0,0,0,0,1,0],
                  [0,0,0,1,0,0,1]])
    print(pageRank(G,s=.85))