我有一個數據幀,並試圖進行以下操作:「無法將float Nan轉換爲int」但不是Nan?
data['SD_rates']=np.array([int((data['actual value'][i]-data['means'][i])/data['std'][i]) for i in range (len(data['means']))])
它與下面的消息打破: 「無法轉換浮動楠爲int」
這是一個錯誤我理解,但用data.isnull()測試了df,沒有涉及的列包括NaN(我通過發送data.to_csv手動控制它)。
我甚至用fillna(-1,inplace = True)填充數據['std'],但仍然中斷。我不明白爲什麼,因爲沒有除0(我也控制這個列中沒有零,所以沒有原始的0和Null/Nan填充-1),實際值和手段是fillna(0 )的缺失值,無論如何,減法不能產生一個nan(數據範圍[0-10])。
什麼可能是錯的? (正如我所說的,觸發操作之前的數據是正確的...)。由於
下面的代碼片段:
我的一個假設是,在某些方面,GROUPBY可能產生NaN的,在計算我的手段時,我無法擺脫掉(但我認爲,這是被熊貓自動忽略......)並且沒有填充0或-1(我故意選擇-1作爲標準偏差以避免被0除)。
def stats_setting(data):
print('Stats settings')
print(data.columns)
print(data.dtypes)
#sys.exit()
data['marks']=np.log1p(data['marks'].astype(float))
data['students']=np.log1p(data['students'].astype(float))#Rossman9 think this has to be tested
#were filled with fillna before)
#First Part: by studentType and Assortment
types_DoM_select=['Type','Type2','Category']
#First Block:types_DoM students grouped by categories
#wonder if can do a groupby of groupb
print("types_DoM_marks_means")
types_DoM_marks_means = data.groupby(types_DoM_select)['marks'].mean()
types_DoM_marks_means.name = 'types_DoM_marks_means'
types_DoM_marks_means = types_DoM_marks_means.reset_index()
data = pd.merge(data, types_DoM_marks_means, on = types_DoM_select, how='left')
print("types_DoM_students_means")
types_DoM_students_means = data.groupby(types_DoM_select)['students'].mean() #.students won't work. Why?
types_DoM_students_means.name = 'types_DoM_students_means'
types_DoM_students_means=types_DoM_students_means.reset_index()
data = pd.merge(data, types_DoM_students_means, on = types_DoM_select, how='left')
print("types_DoM_marks_medians")
types_DoM_marks_medians = data.groupby(types_DoM_select)['marks'].median()
types_DoM_marks_medians.name = 'types_DoM_marks_medians'
types_DoM_marks_medians = types_DoM_marks_medians.reset_index()
data = pd.merge(data, types_DoM_marks_medians, on = types_DoM_select, how='left')
print("types_DoM_students_medians")
types_DoM_students_medians = data.groupby(types_DoM_select)['students'].median() #.students won't work. Why?
types_DoM_students_medians.name = 'types_DoM_students_medians'
types_DoM_students_medians=types_DoM_students_medians.reset_index()
data = pd.merge(data, types_DoM_students_medians, on = types_DoM_select, how='left')
print("types_DoM_marks_std")
types_DoM_marks_std = data.groupby(types_DoM_select)['marks'].std()
types_DoM_marks_std.name = 'types_DoM_marks_std'
types_DoM_marks_std = types_DoM_marks_std.reset_index()
data = pd.merge(data, types_DoM_marks_std, on = types_DoM_select, how='left')
print("types_DoM_students_std")
types_DoM_students_std = data.groupby(types_DoM_select)['students'].std()
types_DoM_students_std.name = 'types_DoM_students_std'
types_DoM_students_std = types_DoM_students_std.reset_index()
data = pd.merge(data, types_DoM_students_std, on = types_DoM_select, how='left')
data['types_DoM_marks_means'].fillna(-1, inplace=True)
data['types_DoM_students_means'].fillna(-1, inplace=True)
data['types_DoM_marks_medians'].fillna(-1, inplace=True)
data['types_DoM_students_medians'].fillna(-1, inplace=True)
data['types_DoM_marks_std'].fillna(-1, inplace=True)
data['types_DoM_students_std'].fillna(-1, inplace=True)
#Second Part: by specific student
student_DoM_select=['Type','Type2','Category']
#First Block:student_DoM
#wonder if can do a groupby of groupb
print("student_DoM_marks_means")
student_DoM_marks_means = data.groupby(student_DoM_select)['marks'].mean()
student_DoM_marks_means.name = 'student_DoM_marks_means'
student_DoM_marks_means = student_DoM_marks_means.reset_index()
data = pd.merge(data, student_DoM_marks_means, on = student_DoM_select, how='left')
print("student_DoM_students_means")
student_DoM_students_means = data.groupby(student_DoM_select)['students'].mean() #.students won't work. Why?
student_DoM_students_means.name = 'student_DoM_students_means'
student_DoM_students_means=student_DoM_students_means.reset_index()
data = pd.merge(data, student_DoM_students_means, on = student_DoM_select, how='left')
print("student_DoM_marks_medians")
student_DoM_marks_medians = data.groupby(student_DoM_select)['marks'].median()
student_DoM_marks_medians.name = 'student_DoM_marks_medians'
student_DoM_marks_medians = student_DoM_marks_medians.reset_index()
data = pd.merge(data, student_DoM_marks_medians, on = student_DoM_select, how='left')
print("student_DoM_students_medians")
student_DoM_students_medians = data.groupby(student_DoM_select)['students'].median() #.students won't work. Why?
student_DoM_students_medians.name = 'student_DoM_students_medians'
student_DoM_students_medians=student_DoM_students_medians.reset_index()
data = pd.merge(data, student_DoM_students_medians, on = student_DoM_select, how='left')
# May I use data['marks','students','marksMean','studentsMean','marksMedian','studentsMedian']=data['marks','students','marksMean','studentsMean','marksMedian','studentsMedian'].astype(int) to spare memory?
print("student_DoM_marks_std")
student_DoM_marks_std = data.groupby(student_DoM_select)['marks'].std()
student_DoM_marks_std.name = 'student_DoM_marks_std'
student_DoM_marks_std = student_DoM_marks_std.reset_index()
data = pd.merge(data, student_DoM_marks_std, on = student_DoM_select, how='left')
print("student_DoM_students_std")
student_DoM_students_std = data.groupby(student_DoM_select)['students'].std()
student_DoM_students_std.name = 'student_DoM_students_std'
student_DoM_students_std = student_DoM_students_std.reset_index()
data = pd.merge(data, student_DoM_students_std, on = student_DoM_select, how='left')
data['student_DoM_marks_means'].fillna(0, inplace=True)
data['student_DoM_students_means'].fillna(0, inplace=True)
data['student_DoM_marks_medians'].fillna(0, inplace=True)
data['student_DoM_students_medians'].fillna(0, inplace=True)
data['student_DoM_marks_std'].fillna(0, inplace=True)
data['student_DoM_students_std'].fillna(0, inplace=True)
#Third Part: Exceptional students
#I think int is better here as it helps defining categories but can't use it.#
#print(data.isnull().sum())
#print(data['types_DoM_marks_std'][data['types_DoM_marks_std']==0].sum())
#data.to_csv('ex')
#print(data.columns)
#Original version:#int raises the "can't convert Nan float to int. While there were no Nan as I verified in the data just before sending it to the
data['Except_student_IP2_DoM_marks_means']=np.array([int((data['student_IP2_DoM_marks_means'][i]-data['types_IP2_DoM_marks_means'][i])/data['types_IP2_DoM_students_std'][i]) for i in range (len(data['year']))])
data['Except_student_IP2_DoM_marks_medians']=np.array([int((data['student_IP2_DoM_marks_medians'][i]-data['types_IP2_DoM_marks_means'][i])/data['types_IP2_DoM_students_std'][i]) for i in range (len(data['year']))])
#Second version: raises no error but final data (returned) is filled with these stupid NaN
data['Except_student_P2M_DoM_marks_means']=np.array([np.round((data['student_DoM_marks_means'][i]-data['types_DoM_marks_means'][i])/data['types_DoM_marks_std'][i],0) for i in range (len(data['year']))])
data['Except_student_P2M_DoM_marks_medians']=np.array([np.round((data['student_DoM_marks_medians'][i]-data['types_DoM_marks_medians'][i])/data['types_DoM_marks_std'][i],0) for i in range (len(data['year']))])
#End
return data
你可以附加你的數據框的一部分嗎? –
你的支票有問題。錯誤消息不會從無到有。 –
嘗試使用循環代替列表理解,並打印每一步以查看錯誤出現的位置。 – Mel