Original PageRank paper: Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry (1999) The PageRank Citation Ranking: Bringing Order to the Web. Technical Report. Stanford InfoLab.
from pagerank import read_graph
counts, out_degree = read_graph("tiny.txt")
counts
out_degree
The pseudocode for the rest of the code is:
transition_matrix = <compute transition matrix>
times_visited = <list of size N>
for i in range(num_steps):
page = <take a step with the random surfer>
times_visited[page] += 1
for c in times_visited:
print c / num_steps
def compute_transition(counts, out_degree):
n = len(out_degree)
# Create an empty n x n "matrix" (using lists of lists)
transition_matrix = []
for i in range(n):
transition_matrix.append([0]*n)
for i in range(n):
for j in range(n):
transition_matrix[i][j] = 0.0 # Instead of 0.0 we need to compute a probability
return transition_matrix
compute_transition(counts, out_degree)
n = 5
i = 1
j = 2
# 90% of the times, the user goes to page "j" with a probability
# proportional to the number of links from "i" to "j".
# 40% of the outbound links in page 1 go to page 2
p_followlink = 0.9 * counts[i][j]/out_degree[i] # i.e., 0.9 * 2/5 = 0.36
# 10% of the times, we just randomly pick any page
p_any = 0.1 * (1/n)
# The total probability is the sum of the two
p = p_followlink + p_any
p
def compute_transition(counts, out_degree):
n = len(out_degree)
# Create an empty n x n "matrix" (using lists of lists)
transition_matrix = []
for i in range(n):
transition_matrix.append([0]*n)
for i in range(n):
for j in range(n):
transition_matrix[i][j] = (0.9 * counts[i][j]/out_degree[i]) + (0.1 * 1/n)
return transition_matrix
compute_transition(counts, out_degree)
num_steps = 1000
transition_matrix = compute_transition(counts, out_degree)
n = len(transition_matrix)
times_visited = [0]*n
page = 0
for t in range(num_steps):
page = 0 # Instead of 0, we need to figure out the result of a single step by the random surfer
times_visited[page] += 1
print(times_visited)
transition_matrix[4]
probabilities = transition_matrix[4]
# Let's hardcode 0.7. This should choose page 2
r = 0.7
psum = 0.0
for j in range(n):
psum += probabilities[j]
if psum >= r:
print(j)
break
import random
probabilities = transition_matrix[4]
r = random.uniform(0.0, 1.0)
psum = 0.0
for j in range(n):
psum += probabilities[j]
if psum >= r:
print(j)
break
def make_one_move(transition_matrix, page):
n = len(transition_matrix)
r = random.uniform(0.0, 1.0)
psum = 0.0
for j in range(n):
psum += transition_matrix[page][j]
if psum >= r:
return j
# If we get to the end of the loop without returning, we return
# the last value.
return n
probs = []
for i in range(1000):
probs.append(make_one_move(transition_matrix, 4))
for i in range(n):
print(i, probs.count(i))
num_steps = 1000
transition_matrix = compute_transition(times_visited, out_degree)
n = len(transition_matrix)
times_visited = [0]*n
page = 0
for t in range(num_steps):
page = make_one_move(transition_matrix, page)
times_visited[page] += 1
for i, count in enumerate(times_visited):
print(i, count)
def random_surfer(transition_matrix, num_steps):
n = len(transition_matrix)
page = 0
times_visited = [0]*n
for t in range(num_steps):
page = make_one_move(transition_matrix, page)
times_visited[page] += 1
for i, count in enumerate(times_visited):
v = count / num_steps
print(i, v)
random_surfer(transition_matrix, 1000)
from pagerank import read_graph
counts, out_degree = read_graph("tiny.txt")
transition_matrix = compute_transition(counts, out_degree)
random_surfer(transition_matrix, 1000)
counts, out_degree = read_graph("medium.txt")
transition_matrix = compute_transition(counts, out_degree)
random_surfer(transition_matrix, 1000)