Wikipedia article on PageRank

Original PageRank paper: Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry (1999) The PageRank Citation Ranking: Bringing Order to the Web. Technical Report. Stanford InfoLab.

In [43]:
from pagerank import read_graph


In [44]:
counts

Out[44]:
[[0, 1, 0, 0, 0],
[0, 0, 2, 2, 1],
[0, 0, 0, 1, 0],
[1, 0, 0, 0, 0],
[1, 0, 1, 0, 0]]
In [45]:
out_degree

Out[45]:
[1, 5, 1, 1, 2]

The pseudocode for the rest of the code is:

transition_matrix = <compute transition matrix>
times_visited = <list of size N>
for i in range(num_steps):
page = <take a step with the random surfer>
times_visited[page] += 1

for c in times_visited:
print c / num_steps

## Computing the transition matrix¶

In [46]:
def compute_transition(counts, out_degree):
n = len(out_degree)

# Create an empty n x n "matrix" (using lists of lists)
transition_matrix = []
for i in range(n):
transition_matrix.append([0]*n)

for i in range(n):
for j in range(n):
transition_matrix[i][j] = 0.0 # Instead of 0.0 we need to compute a probability

return transition_matrix

In [47]:
compute_transition(counts, out_degree)

Out[47]:
[[0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0]]
In [48]:
n = 5
i = 1
j = 2

# 90% of the times, the user goes to page "j" with a probability
# proportional to the number of links from "i" to "j".
# 40% of the outbound links in page 1 go to page 2
p_followlink = 0.9 * counts[i][j]/out_degree[i]   # i.e., 0.9 * 2/5 = 0.36

# 10% of the times, we just randomly pick any page
p_any = 0.1 * (1/n)

# The total probability is the sum of the two

In [49]:
p

Out[49]:
0.38
In [50]:
def compute_transition(counts, out_degree):
n = len(out_degree)

# Create an empty n x n "matrix" (using lists of lists)
transition_matrix = []
for i in range(n):
transition_matrix.append([0]*n)

for i in range(n):
for j in range(n):
transition_matrix[i][j] = (0.9 * counts[i][j]/out_degree[i]) + (0.1 * 1/n)

return transition_matrix

In [51]:
compute_transition(counts, out_degree)

Out[51]:
[[0.02, 0.92, 0.02, 0.02, 0.02],
[0.02, 0.02, 0.38, 0.38, 0.19999999999999998],
[0.02, 0.02, 0.02, 0.92, 0.02],
[0.92, 0.02, 0.02, 0.02, 0.02],
[0.47000000000000003, 0.02, 0.47000000000000003, 0.02, 0.02]]

## Simulating the random surfer¶

In [52]:
num_steps = 1000

transition_matrix = compute_transition(counts, out_degree)
n = len(transition_matrix)
times_visited = [0]*n

page = 0
for t in range(num_steps):
page = 0 # Instead of 0, we need to figure out the result of a single step by the random surfer
times_visited[page] += 1

In [53]:
print(times_visited)

[1000, 0, 0, 0, 0]

In [54]:
transition_matrix[4]

Out[54]:
[0.47000000000000003, 0.02, 0.47000000000000003, 0.02, 0.02]
In [55]:
probabilities = transition_matrix[4]

# Let's hardcode 0.7. This should choose page 2
r = 0.7
psum = 0.0

for j in range(n):
psum += probabilities[j]
if psum >= r:
print(j)
break

2

In [56]:
import random

probabilities = transition_matrix[4]

r = random.uniform(0.0, 1.0)
psum = 0.0

for j in range(n):
psum += probabilities[j]
if psum >= r:
print(j)
break

0

In [57]:
def make_one_move(transition_matrix, page):
n = len(transition_matrix)
r = random.uniform(0.0, 1.0)
psum = 0.0

for j in range(n):
psum += transition_matrix[page][j]
if psum >= r:
return j

# If we get to the end of the loop without returning, we return
# the last value.
return n

In [58]:
probs = []
for i in range(1000):
probs.append(make_one_move(transition_matrix, 4))

In [59]:
for i in range(n):
print(i, probs.count(i))

0 473
1 16
2 471
3 18
4 22

In [60]:
num_steps = 1000

transition_matrix = compute_transition(times_visited, out_degree)
n = len(transition_matrix)
times_visited = [0]*n

page = 0
for t in range(num_steps):
page = make_one_move(transition_matrix, page)
times_visited[page] += 1

In [61]:
for i, count in enumerate(times_visited):
print(i, count)

0 274
1 266
2 162
3 238
4 60

In [73]:
def random_surfer(transition_matrix, num_steps):
n = len(transition_matrix)
page = 0
times_visited = [0]*n

for t in range(num_steps):
page = make_one_move(transition_matrix, page)
times_visited[page] += 1

for i, count in enumerate(times_visited):
v = count / num_steps
print(i, v)

In [74]:
random_surfer(transition_matrix, 1000)

0 0.268
1 0.264
2 0.161
3 0.236
4 0.071



## All together¶

In [76]:
from pagerank import read_graph

transition_matrix = compute_transition(counts, out_degree)
random_surfer(transition_matrix, 1000)

0 0.279
1 0.263
2 0.144
3 0.257
4 0.057


In [77]:
counts, out_degree = read_graph("medium.txt")
transition_matrix = compute_transition(counts, out_degree)
random_surfer(transition_matrix, 1000)

0 0.001
1 0.017
2 0.008
3 0.001
4 0.005
5 0.021
6 0.065
7 0.014
8 0.01
9 0.052
10 0.004
11 0.037
12 0.003
13 0.051
14 0.011
15 0.024
16 0.03
17 0.004
18 0.016
19 0.021
20 0.004
21 0.027
22 0.064
23 0.046
24 0.011
25 0.005
26 0.005
27 0.007
28 0.03
29 0.0
30 0.051
31 0.014
32 0.018
33 0.014
34 0.019
35 0.03
36 0.003
37 0.047
38 0.018
39 0.021
40 0.013
41 0.014
42 0.015
43 0.022
44 0.015
45 0.007
46 0.029
47 0.03
48 0.01
49 0.016