我構建了一個與Kruskal的MST算法一起使用的不相交集數據結構。我需要加載並結合一個包含200k個互連節點的圖,我認爲我的數據結構實現是一個瓶頸。Python OOP不相交集性能
您有關於如何提高性能的建議嗎?我認爲我的查找方法可能有問題。
class partition(object):
def __init__(self, element=None):
self.size = 0
if element == None:
self.contents = set()
self.representative = None
else:
self.contents = {element}
self.representative = element
self.size = 1
def find(self, element):
return element in self.contents
def add(self, partition):
self.contents = self.contents.union(partition)
self.size = len(self.contents)
def show(self):
return self.contents
def __repr__(self):
return str(self.contents)
class disjoint_set(object):
def __init__(self):
self.partitions_count = 0
self.forest = {}
def make_set(self, element):
if self.find(element) == False:
new_partition = partition(element)
self.forest[new_partition.representative] = new_partition
self.partitions_count += 1
def union(self, x, y):
if x != y:
if self.forest[x].size < self.forest[y].size:
self.forest[y].add(self.forest[x].show())
self.delete(x)
else:
self.forest[x].add(self.forest[y].show())
self.delete(y)
def find(self, element):
for partition in self.forest.keys():
if self.forest[partition].find(element):
return self.forest[partition].representative
return False
def delete(self, partition):
del self.forest[partition]
self.partitions_count -= 1
if __name__ == '__main__':
t = disjoint_set()
t.make_set(1)
t.make_set(2)
t.make_set(3)
print("Create 3 singleton partitions:")
print(t.partitions_count)
print(t.forest)
print("Union two into a single partition:")
t.union(1,2)
print(t.forest)
print(t.partitions_count)
編輯:
閱讀註釋,做更多的研究,我意識到我原來的算法如何設計不良的是後。我從頭開始並把它放在一起。我把所有分區放到一個單獨的哈希表中,並在find()中使用路徑壓縮。這看起來如何,我是否應該解決一些明顯的問題?
class disjoint_set(object):
def __init__(self):
self.partitions_count = 0
self.size = {}
self.parent = {}
def make_set(self, element):
if self.find(element) == False:
self.parent[element] = element
self.size[element] = 1
self.partitions_count += 1
def union(self, x, y):
xParent = self.find(x)
yParent = self.find(y)
if xParent != yParent:
if self.size[xParent] < self.size[yParent]:
self.parent[xParent] = yParent
self.size[yParent] += self.size[xParent]
self.partitions_count -= 1
else:
self.parent[yParent] = xParent
self.size[xParent] += self.size[yParent]
self.partitions_count -= 1
def find(self, element):
if element in self.parent:
if element == self.parent[element]:
return element
root = self.parent[element]
while self.parent[root] != root:
root = self.find(self.parent[root])
self.parent[element] = root
return root
return False
if __name__ == '__main__':
t = disjoint_set()
t.make_set(1)
t.make_set(2)
t.make_set(3)
t.make_set(4)
t.make_set(5)
print("Create 5 singleton partitions")
print(t.partitions_count)
print("Union two singletons into a single partition")
t.union(1,2)
print("Union three singletones into a single partition")
t.union(3,4)
t.union(5,4)
print("Union a single partition")
t.union(2,4)
print("Parent List: %s" % t.parent)
print("Partition Count: %s" % t.partitions_count)
print("Parent of element 2: %s" % t.find(2))
print("Parent List: %s" % t.parent)
你可以添加if __name__ =='__main__'部分來顯示使用情況嗎? – Bey
對不起!現在添加。 –
[使用真正的不相交集森林數據結構。](https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Disjoint-set_forests)您剛挑出了幾個名字,這聽起來有點像不相交集然後編寫一個非常樸素的算法,與真正的不相交的集合森林沒有任何關係。 – user2357112