這不完全是你要求什麼,但我認爲這已經足夠了:
import itertools as iter
movies = sc.parallelize([("P", "SW4"), ("P", "SW5"), ("P", "SW6"),
("A", "SW4"), ("A", "SW5"),
("B", "SW5"), ("B", "SW6"),
("W", "SW4"),
("X", "SW1"), ("X", "SW7"), ("X", "SW2"), ("X", "SW3"),
("Y", "SW1"), ("Y", "SW7"), ("Y", "SW2"), ("Y", "SW3")])
swap_tuple = lambda (k, v): (v, k)
movies = movies.groupByKey().mapValues(list)
all_pairs = movies.flatMap(lambda (movie, actors): map(lambda actors:(movie, actors), iter.combinations(actors, 2)))
print all_pairs.collect()
"""
>> [('SW1', ('X', 'Y')),
('SW3', ('X', 'Y')),
('SW5', ('P', 'A')),
('SW5', ('P', 'B')),
('SW5', ('A', 'B')),
('SW7', ('X', 'Y')),
('SW2', ('X', 'Y')),
('SW4', ('P', 'A')),
('SW4', ('P', 'W')),
('SW4', ('A', 'W')),
('SW6', ('P', 'B'))]
"""
Here正在運行使用.ipynb
正是我需要的,謝謝! – nikos