這聽起來像一個「加權集團」的問題:找到例如 r =網絡中5個人的最大兼容性/ C(5,2)對權重的最大總和。
Google「加權集團」算法 - 「集團滲透」→ 3k次點擊。
而是因爲它是可以理解的,可控的
我將與賈斯汀剝離的方法 去(走N2最好的對,從他們的最好N3三倍...... 調整N2 N3 ......輕鬆地權衡的結果的運行時間/質量。 )
增加了18可以在下面的執行中被切斷。
@Jose,看看nbest []序列對你有什麼作用會很有趣。
#!/usr/bin/env python
""" cliq.py: grow high-weight 2 3 4 5-cliques, taking nbest at each stage
weight ab = dist[a,b] -- a symmetric numpy array, diag << 0
weight abc, abcd ... = sum weight all pairs
C[2] = [ (dist[j,k], (j,k)) ... ] nbest[2] pairs
C[3] = [ (cliqwt(j,k,l), (j,k,l)) ... ] nbest[3] triples
...
run time ~ N * (N + nbest[2] + nbest[3] ...)
keywords: weighted-clique heuristic python
"""
# cf "graph clustering algorithm"
from __future__ import division
import numpy as np
__version__ = "denis 18may 2010"
me = __file__.split('/') [-1]
def cliqdistances(cliq, dist):
return sorted([dist[j,k] for j in cliq for k in cliq if j < k], reverse=True)
def maxarray2(a, n):
""" -> max n [ (a[j,k], (j,k)) ...] j <= k, a symmetric """
jkflat = np.argsort(a, axis=None)[:-2*n:-1]
jks = [np.unravel_index(jk, a.shape) for jk in jkflat]
return [(a[j,k], (j,k)) for j,k in jks if j <= k] [:n]
def _str(iter, fmt="%.2g"):
return " ".join(fmt % x for x in iter)
#...............................................................................
def maxweightcliques(dist, nbest, r, verbose=10):
def cliqwt(cliq, p):
return sum(dist[c,p] for c in cliq) # << 0 if p in c
def growcliqs(cliqs, nbest):
""" [(cliqweight, n-cliq) ...] -> nbest [(cliqweight, n+1 cliq) ...] """
# heapq the nbest ? here just gen all N * |cliqs|, sort
all = []
dups = set()
for w, c in cliqs:
for p in xrange(N):
# fast gen [sorted c+p ...] with small sorted c ?
cp = c + [p]
cp.sort()
tup = tuple(cp)
if tup in dups: continue
dups.add(tup)
all.append((w + cliqwt(c, p), cp))
all.sort(reverse=True)
if verbose:
print "growcliqs: %s" % _str(w for w,c in all[:verbose]) ,
print " best: %s" % _str(cliqdistances(all[0][1], dist)[:10])
return all[:nbest]
np.fill_diagonal(dist, -1e10) # so cliqwt(c, p in c) << 0
C = (r+1) * [(0, None)] # [(cliqweight, cliq-tuple) ...]
# C[1] = [(0, (p,)) for p in xrange(N)]
C[2] = [(w, list(pair)) for w, pair in maxarray2(dist, nbest[2])]
for j in range(3, r+1):
C[j] = growcliqs(C[j-1], nbest[j])
return C
#...............................................................................
if __name__ == "__main__":
import sys
N = 100
r = 5 # max clique size
nbest = 10
verbose = 0
seed = 1
exec "\n".join(sys.argv[1:]) # N= ...
np.random.seed(seed)
nbest = [0, 0, N//2] + (r - 2) * [nbest] # ?
print "%s N=%d r=%d nbest=%s" % (me, N, r, nbest)
# random graphs w cluster parameters ?
dist = np.random.exponential(1, (N,N))
dist = (dist + dist.T)/2
for j in range(0, N, r):
dist[j:j+r, j:j+r] += 2 # see if we get r in a row
# dist = np.ones((N,N))
cliqs = maxweightcliques(dist, nbest, r, verbose)[-1] # [ (wt, cliq) ... ]
print "Clique weight, clique, distances within clique"
print 50 * "-"
for w,c in cliqs:
print "%5.3g %s %s" % (
w, _str(c, fmt="%d"), _str(cliqdistances(c, dist)[:10]))
這一直是加速的主要來源!謝謝! – Jose 2010-05-14 12:22:11