2017-08-12 68 views
2

是否存在與R的stem()等效的熊貓功能,用於爲數字創建莖葉圖?Series是否有熊貓功能用於創建莖葉圖

我可以很容易地寫一些代碼,但想知道我是否錯過了精細的手冊中的東西。

輸出示例:

16 | 070355555588 
18 | 000022233333335577777777888822335777888 
20 | 00002223378800035778 
22 | 0002335578023578 
24 | 00228 
26 | 23 
28 | 080 
30 | 7 

回答

5
import pandas as pd 
import stemgraphic 

x = [ 
    12, 127, 28, 42, 39, 113, 42, 18, 44, 118, 44, 37, 113, 124, 37, 48, 127, 36, 29, 31, 
    125, 139, 131, 115, 105, 132, 104, 123, 35, 113, 122, 42, 117, 119, 58, 109, 23, 105, 
    63, 27, 44, 105, 99, 41, 128, 121, 116, 125, 32, 61, 37, 127, 29, 113, 121, 58, 114, 
    126, 53, 114, 96, 25, 109, 7, 31, 141, 46, 13, 27, 43, 117, 116, 27, 7, 68, 40, 31, 115, 
    124, 42, 128, 52, 71, 118, 117, 38, 27, 106, 33, 117, 116, 111, 40, 119, 47, 105, 57, 
    122, 109, 124, 115, 43, 120, 43, 27, 27, 18, 28, 48, 125, 107, 114, 34, 133, 45, 120, 
    30, 127, 31, 116, 146 
] 
y = pd.Series(x) 

fig, ax = stemgraphic.stem_graphic(y) 

enter image description here

+0

我運行這段代碼(在PyCharm和IPython中),但是我看不到這個圖。 –

+1

''ImportError:No module named'stemgraphic''' ... – PatrickT

4

據我所知,但它不是太難以複製:

eruptions = [3.6, 1.8, 3.333, 2.283, 4.533, 2.883, 4.7, 3.6, 1.95, 4.35, 1.833, 3.917, 4.2, 1.75, 4.7, 2.167, 1.75, 4.8, 1.6, 4.25, 1.8, 1.75, 3.45, 3.067, 4.533, 3.6, 1.967, 4.083, 3.85, 4.433, 4.3, 4.467, 3.367, 4.033, 3.833, 2.017, 1.867, 4.833, 1.833, 4.783, 4.35, 1.883, 4.567, 1.75, 4.533, 3.317, 3.833, 2.1, 4.633, 2.0, 4.8, 4.716, 1.833, 4.833, 1.733, 4.883, 3.717, 1.667, 4.567, 4.317, 2.233, 4.5, 1.75, 4.8, 1.817, 4.4, 4.167, 4.7, 2.067, 4.7, 4.033, 1.967, 4.5, 4.0, 1.983, 5.067, 2.017, 4.567, 3.883, 3.6, 4.133, 4.333, 4.1, 2.633, 4.067, 4.933, 3.95, 4.517, 2.167, 4.0, 2.2, 4.333, 1.867, 4.817, 1.833, 4.3, 4.667, 3.75, 1.867, 4.9, 2.483, 4.367, 2.1, 4.5, 4.05, 1.867, 4.7, 1.783, 4.85, 3.683, 4.733, 2.3, 4.9, 4.417, 1.7, 4.633, 2.317, 4.6, 1.817, 4.417, 2.617, 4.067, 4.25, 1.967, 4.6, 3.767, 1.917, 4.5, 2.267, 4.65, 1.867, 4.167, 2.8, 4.333, 1.833, 4.383, 1.883, 4.933, 2.033, 3.733, 4.233, 2.233, 4.533, 4.817, 4.333, 1.983, 4.633, 2.017, 5.1, 1.8, 5.033, 4.0, 2.4, 4.6, 3.567, 4.0, 4.5, 4.083, 1.8, 3.967, 2.2, 4.15, 2.0, 3.833, 3.5, 4.583, 2.367, 5.0, 1.933, 4.617, 1.917, 2.083, 4.583, 3.333, 4.167, 4.333, 4.5, 2.417, 4.0, 4.167, 1.883, 4.583, 4.25, 3.767, 2.033, 4.433, 4.083, 1.833, 4.417, 2.183, 4.8, 1.833, 4.8, 4.1, 3.966, 4.233, 3.5, 4.366, 2.25, 4.667, 2.1, 4.35, 4.133, 1.867, 4.6, 1.783, 4.367, 3.85, 1.933, 4.5, 2.383, 4.7, 1.867, 3.833, 3.417, 4.233, 2.4, 4.8, 2.0, 4.15, 1.867, 4.267, 1.75, 4.483, 4.0, 4.117, 4.083, 4.267, 3.917, 4.55, 4.083, 2.417, 4.183, 2.217, 4.45, 1.883, 1.85, 4.283, 3.95, 2.333, 4.15, 2.35, 4.933, 2.9, 4.583, 3.833, 2.083, 4.367, 2.133, 4.35, 2.2, 4.45, 3.567, 4.5, 4.15, 3.817, 3.917, 4.45, 2.0, 4.283, 4.767, 4.533, 1.85, 4.25, 1.983, 2.25, 4.75, 4.117, 2.15, 4.417, 1.817, 4.467] 
waiting = [79, 54, 74, 62, 85, 55, 88, 85, 51, 85, 54, 84, 78, 47, 83, 52, 62, 84, 52, 79, 51, 47, 78, 69, 74, 83, 55, 76, 78, 79, 73, 77, 66, 80, 74, 52, 48, 80, 59, 90, 80, 58, 84, 58, 73, 83, 64, 53, 82, 59, 75, 90, 54, 80, 54, 83, 71, 64, 77, 81, 59, 84, 48, 82, 60, 92, 78, 78, 65, 73, 82, 56, 79, 71, 62, 76, 60, 78, 76, 83, 75, 82, 70, 65, 73, 88, 76, 80, 48, 86, 60, 90, 50, 78, 63, 72, 84, 75, 51, 82, 62, 88, 49, 83, 81, 47, 84, 52, 86, 81, 75, 59, 89, 79, 59, 81, 50, 85, 59, 87, 53, 69, 77, 56, 88, 81, 45, 82, 55, 90, 45, 83, 56, 89, 46, 82, 51, 86, 53, 79, 81, 60, 82, 77, 76, 59, 80, 49, 96, 53, 77, 77, 65, 81, 71, 70, 81, 93, 53, 89, 45, 86, 58, 78, 66, 76, 63, 88, 52, 93, 49, 57, 77, 68, 81, 81, 73, 50, 85, 74, 55, 77, 83, 83, 51, 78, 84, 46, 83, 55, 81, 57, 76, 84, 77, 81, 87, 77, 51, 78, 60, 82, 91, 53, 78, 46, 77, 84, 49, 83, 71, 80, 49, 75, 64, 76, 53, 94, 55, 76, 50, 82, 54, 75, 78, 79, 78, 78, 70, 79, 70, 54, 86, 50, 90, 54, 54, 77, 79, 64, 75, 47, 86, 63, 85, 82, 57, 82, 67, 74, 54, 83, 73, 73, 88, 80, 71, 83, 56, 79, 78, 84, 58, 83, 43, 60, 75, 81, 46, 90, 46, 74] 
df = pd.DataFrame({'eruptions': eruptions, 'waiting': waiting}) 

s = sorted([str(round(i * 10, 1)) for i in df['eruptions']]) 
s = (
    pd.Series(s) 
    .groupby([str(int(float(i) // 2 * 2)) for i in s]) 
    .apply(lambda group: "".join((x[-1] for x in group))) 
    .reset_index() 
) 
print("\n".join(s['index'] +' | ' + s[0])) 
# Output 
# 16 | 070355555588 
# 18 | 000022233333335577777777888822335777888 
# 20 | 00002223378800035778 
# 22 | 0002335578023578 
# 24 | 00228 
# 26 | 23 
# 28 | 080 
# 30 | 7 
# 32 | 2337 
# 34 | 250077 
# 36 | 0000823577 
# 38 | 2333335582225577 
# 40 | 0000003357788888002233555577778 
# 42 | 03335555778800233333555577778 
# 44 | 02222335557780000000023333357778888 
# 46 | 0000233357700000023578 
# 48 | 00000022335800333 
# 50 | 0370 
+0

謝謝你的代碼,但我有已經寫了類似的東西。作爲Python代碼,您的答案寫得非常好。不過,我將答覆標記爲可以提供我可以安裝的軟件包的答案。 – verisimilidude

+0

@verisimilidude,那麼你應該在問題中發佈你的代碼。 – PatrickT