2017-05-22 100 views
1

我正在用散點圖創建一個具有兩個子圖的圖。我想爲每個子圖使用相同的配色方案和標記定義,但似乎無法使其工作。請原諒我最小的工作例子的長度,但我儘可能地減少了它的長度。具有相同顏色和標記的散點圖子圖

import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib.ticker as tkr 
from scipy.stats import probplot 

#Raw Data 
area_old = [7603.4897489697905, 2941.7094279413577, 8153.896678990219, 7289.99097646249, 8620.196237363853, 11619.546945954673, 8458.80648310436, 7161.530990460888, 28486.298572761007, 4928.4856128268875, 4219.122621992603, 31687.155529782176] 

combined = [7603.4897489697905, 2941.7094279413577, 8153.896678990219, 7289.99097646249, 8620.196237363853, 11619.546945954673, 8458.80648310436, 7161.530990460888, 28486.298572761007, 4928.4856128268875, 4219.122621992603, 31687.155529782176, 3059.4357099599456, 3348.0415691055823, 4839.023360449559, 4398.877634354169, 29269.67455441528, 11058.400909555028, 18266.34679952683, 16641.3446048029, 24983.586163502885, 5811.868753338233] 

#Attributes to map colors and markers to 
lt_bt = ['r','s','s','r','r','u','r','s','r','r','s','r'] 
combined_bt =['r','s','s','r','r','u','r','s','r','r','s','r','u','u','r','s','r','s','r','r','r','u'] 

#Get Probability plot Data 
a = probplot(area_old,dist='norm', plot=None) 
b= probplot(combined,dist='norm', plot=None) 

#Colors and Markers to use 
colors = {'r':'red','s':'blue', 'u':'green'} 
markers = {'r':'*','s':'x', 'u':'o'} 

#Create Dataframe to combine raw data, attributes and sort 
old_df = pd.DataFrame(area_old, columns=['Long Term Sites: N=12']) 
old_df['Bar_Type'] = lt_bt 
old_df = old_df.sort_values(by='Long Term Sites: N=12') 
old_df['quart']=a[0][0] 

#Pandas series of colors for plotting on subplot 'ax' 
ax_color = old_df.loc[:,'Bar_Type'].apply(lambda x: colors[x]) 

#Create Dataframe to combine raw data, attributes and sort 
combined_df = pd.DataFrame(combined, columns=['ALL SITES N=22']) 
combined_df['Bar_Type'] = combined_bt 
combined_df = combined_df.sort_values(by='ALL SITES N=22') 
combined_df['quart']=b[0][0] 

#Pandas series of colors for plotting on subplot 'ax1' 
ax1_color = combined_df.loc[:,'Bar_Type'].apply(lambda x: colors[x]) 

#Legend Handles 
undif = plt.Line2D([0,0],[0,1], color='green',marker='o',linestyle=' ') 
reatt = plt.Line2D([0,0],[0,1], color='red',marker='*',linestyle=' ') 
sep = plt.Line2D([0,0],[0,1], color='blue',marker='x',linestyle=' ') 


fig,(ax,ax1) = plt.subplots(ncols=2,sharey=True) 

#Plot each data point seperatly with different markers and colors 
for i, thing in old_df.iterrows(): 
    ax.scatter(thing['quart'],thing['Long Term Sites: N=12'],c=ax_color.iloc[i],marker=markers[thing['Bar_Type']],zorder=10,s=50) 
del i, thing 

for i , thing in combined_df.iterrows(): 
    ax1.scatter(thing['quart'],thing['ALL SITES N=22'],c=ax1_color.iloc[i],marker=markers[thing['Bar_Type']],zorder=10,s=50) 
del i, thing 

ax.set_title('LONG TERM SITES N=12') 
ax1.set_title('ALL SITES N=22') 
ax1.set_ylabel('') 
ax.set_ylabel('TOTAL EDDY AREA, IN METERS SQUARED') 
ax.set_ylim(0,35000) 
ax.get_yaxis().set_major_formatter(tkr.FuncFormatter(lambda x, p: format(int(x), ','))) 

legend = ax.legend([reatt,sep,undif],["Reattachment","Separation", "Undifferentiated"],loc=2,title='Bar Type',fontsize='x-small') 

plt.setp(legend.get_title(),fontsize='x-small') 

ax.set_xlabel('QUANTILES') 
ax1.set_xlabel('QUANTILES') 
plt.tight_layout() 

基本思想是我繪製散點圖,逐點分配合適的顏色和標記。我使用pandas整數索引.iloc分配顏色,並通過指定markers字典的鍵來指定標記。

我知道的東西是不正確的,因爲在old_dfcombined_df第一點(即old_df.loc[1,:]combined_df.loc[1,:])應具備的'blue''x',respectivly顏色和標記。

我在做什麼錯?

回答

1

不知道爲什麼,但以某種方式在ax.scatter中使用.iloc會導致不可預知的行爲。我所要做的只是刪除.iloc方法,並用字典映射(即c=ax_color.iloc[i]c=colors[thing['Bar_Type']])替換它,一切正常!

所期望的結果的一個工作例子:

import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib.ticker as tkr 
from scipy.stats import probplot 

#Raw Data 
area_old = [7603.4897489697905, 2941.7094279413577, 8153.896678990219, 7289.99097646249, 8620.196237363853, 11619.546945954673, 8458.80648310436, 7161.530990460888, 28486.298572761007, 4928.4856128268875, 4219.122621992603, 31687.155529782176] 

combined = [7603.4897489697905, 2941.7094279413577, 8153.896678990219, 7289.99097646249, 8620.196237363853, 11619.546945954673, 8458.80648310436, 7161.530990460888, 28486.298572761007, 4928.4856128268875, 4219.122621992603, 31687.155529782176, 3059.4357099599456, 3348.0415691055823, 4839.023360449559, 4398.877634354169, 29269.67455441528, 11058.400909555028, 18266.34679952683, 16641.3446048029, 24983.586163502885, 5811.868753338233] 

#Attributes to map colors and markers to 
lt_bt = ['r','s','s','r','r','u','r','s','r','r','s','r'] 
combined_bt =['r','s','s','r','r','u','r','s','r','r','s','r','u','u','r','s','r','s','r','r','r','u'] 

#Get Probability plot Data 
a = probplot(area_old,dist='norm', plot=None) 
b= probplot(combined,dist='norm', plot=None) 

#Colors and Markers to use 
colors = {'r':'red','s':'blue', 'u':'green'} 
markers = {'r':'*','s':'x', 'u':'o'} 

#Create Dataframe to combine raw data, attributes and sort 
old_df = pd.DataFrame(area_old, columns=['Long Term Sites: N=12']) 
old_df['Bar_Type'] = lt_bt 
old_df = old_df.sort_values(by='Long Term Sites: N=12') 
old_df['quart']=a[0][0] 

#Pandas series of colors for plotting on subplot 'ax' 
ax_color = old_df.loc[:,'Bar_Type'].apply(lambda x: colors[x]) 

#Create Dataframe to combine raw data, attributes and sort 
combined_df = pd.DataFrame(combined, columns=['ALL SITES N=22']) 
combined_df['Bar_Type'] = combined_bt 
combined_df = combined_df.sort_values(by='ALL SITES N=22') 
combined_df['quart']=b[0][0] 

#Pandas series of colors for plotting on subplot 'ax1' 
ax1_color = combined_df.loc[:,'Bar_Type'].apply(lambda x: colors[x]) 

#Legend Handles 
undif = plt.Line2D([0,0],[0,1], color='green',marker='o',linestyle=' ') 
reatt = plt.Line2D([0,0],[0,1], color='red',marker='*',linestyle=' ') 
sep = plt.Line2D([0,0],[0,1], color='blue',marker='x',linestyle=' ') 


fig,(ax,ax1) = plt.subplots(ncols=2,sharey=True) 

#Plot each data point seperatly with different markers and colors 
for i, thing in old_df.iterrows(): 
    ax.scatter(thing['quart'],thing['Long Term Sites: N=12'],c=colors[thing['Bar_Type']],marker=markers[thing['Bar_Type']],zorder=10,s=50) 
del i, thing 

for i , thing in combined_df.iterrows(): 
    ax1.scatter(thing['quart'],thing['ALL SITES N=22'],c=colors[thing['Bar_Type']],marker=markers[thing['Bar_Type']],zorder=10,s=50) 
del i, thing 

ax.set_title('LONG TERM SITES N=12') 
ax1.set_title('ALL SITES N=22') 
ax1.set_ylabel('') 
ax.set_ylabel('TOTAL EDDY AREA, IN METERS SQUARED') 
ax.set_ylim(0,35000) 
ax.get_yaxis().set_major_formatter(tkr.FuncFormatter(lambda x, p: format(int(x), ','))) 

legend = ax.legend([reatt,sep,undif],["Reattachment","Separation", "Undifferentiated"],loc=2,title='Bar Type',fontsize='x-small') 

plt.setp(legend.get_title(),fontsize='x-small') 

ax.set_xlabel('QUANTILES') 
ax1.set_xlabel('QUANTILES') 
plt.tight_layout()