2
我參考PageRank - Wikipedia並用下面的公式代數計算PageRank,但我得到nx.pagerank_numpy
的不同結果。不正確的PageRank計算結果
例如(維基百科圖片),
我,
# 'A', 'B', 'C', 'D', 'E', 'F'
[[ 0.028]
[ 0.324]
[ 0.289]
[ 0.033]
[ 0.068]
[ 0.033]]
爲何結果不同?
這裏是源代碼。
import networkx as nx
import numpy as np
# Step 1: Build up a graph
G = build_graph_wikipedia_pagerank_example()
# Step 2: PageRank calculation
# Part 1: \mathbf {1} is the column vector of length N containing only ones.
N = len(G.nodes()) # N = 11
column_vector = np.ones((N, 1), dtype=np.int)
#print(column_vector)
# Part 2: Matrix M
# Adjacency matrix A
nodelist = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'] # sorted(G.nodes())
A = nx.to_numpy_matrix(G, nodelist)
# K is the diagonal matrix with the outdegrees in the diagonal.
list_outdegree = map(operator.itemgetter(1), sorted(G.out_degree().items()))
K = np.diag(list_outdegree)
K_inv = np.linalg.pinv(K)
# Matrix M
M = (K_inv * A).transpose()
# Part 3: PageRank calculation
d = 0.85
I = np.identity(N)
R = np.linalg.pinv(I - d*M) * (1-d)/N * column_vector
爲了構建圖,我用,
def build_graph_wikipedia_pagerank_example():
"""
Build a graph for https://en.wikipedia.org/wiki/File:PageRanks-Example.svg
"""
G = nx.DiGraph()
# A
# B -->
G.add_path(['B', 'C'])
# C -->
G.add_path(['C', 'B'])
# D -->
G.add_path(['D', 'A'])
G.add_path(['D', 'B'])
# E -->
G.add_path(['E', 'B'])
G.add_path(['E', 'D'])
G.add_path(['E', 'F'])
# F -->
G.add_path(['F', 'B'])
G.add_path(['F', 'E'])
# G -->
G.add_path(['G', 'B'])
G.add_path(['G', 'E'])
# H -->
G.add_path(['H', 'B'])
G.add_path(['H', 'E'])
# I -->
G.add_path(['I', 'B'])
G.add_path(['I', 'E'])
# J -->
G.add_path(['J', 'E'])
# J -->
G.add_path(['K', 'E'])
return G