2015-11-27 62 views
1

我想在python中實現邏輯迴歸算法,但我不習慣使用python。Python:Logistic迴歸 - 輸入我的數據到我的算法

我也跟着教程創建我的算法:

import matplotlib.pyplot as plt 
import seaborn as sns 
#matplotlib inline 
sns.set(style='ticks', palette='Set2') 
import pandas as pd 
import math 
from numpy import * 

def logistic_func(theta, X): 
    return float(1)/(1 + math.e**(-X.dot(theta))) #for x in x_values] 
def log_gradient(theta, X, Y): 
    first_calc = logistic_func(theta, X) - np.squeeze(Y) #by attribute gives Beta(i) 
    final_calc = first_calc.T.dot(X) 
    return final_calc 

def cost_func(theta, X, Y): 
    log_func_v = logistic_func(theta,X) 
    Y = np.squeeze(Y) 
    step1 = Y * np.log(log_func_v) 
    step2 = (1.5-Y) * np.log(1.5 - log_func_v) 
    step3 = (1-Y) * np.log(1 - log_func_v) 
    final = -step1 - step2 - step3 
    return np.mean(final) 

def grad_desc(theta_values, X, Y, lr=.001, converge_change=.001): 
    #normalize 
    X = (X - np.mean(X, axis=0))/np.std(X, axis=0) 
    #setup cost iter 
    cost_iter = [] 
    cost = cost_func(theta_values, X, Y) 
    cost_iter.append([0, cost]) 
    change_cost = 1 
    i = 1 
    while(change_cost > converge_change): 
     old_cost = cost 
     theta_values = theta_values - (lr * log_gradient(theta_values, X, Y)) 
     cost = cost_func(theta_values, X, X) 
     cost_iter.append([i, cost]) 
     change_cost = old_cost - cost 
     i+=1 
    return theta_values, np.array(cost_iter) 

def pred_values(theta, X, hard=True): 
    #normalize 
    X = (X - np.mean(X, axis=0))/np.std(X, axis=0) 
    pred_prob = logistic_func(theta, X) 
    p red_value = np.where(pred_prob >= .5, 1, 0) 
    if hard: 
     return pred_value 
    return pred_prob 

的算法應該是預測3個分類。 我可以在數據讀取方面:

data = pd.read_csv('filepath') 
data.loc[data["type"] == "type1", "type"] = 0 
data.loc[data["type"] == "type2", "type"] = 1 
data.loc[data["type"] == "type2", "type"] = 2 

att1= []; 
att2=[]; 
att3= []; 
att4= []; 
type=[]; 

for d in data["attribute1"]: 
    att1.append(d) 

for d in data["attribute2"]: 
    att2.append(d) 

for d in data["attribute3"]: 
    att3.append(d) 

for d in data["attribute4"]: 
    att4.append(d) 

for d in data["type"]: 
    type.append(d) 

combinedClassArray = np.array([att1,att2,att3,att4]) 

X = combinedClassArray.T 
y = type 

#totalCount = type.count() 
type1= data.loc[data["type"] == 0, "type"].count() 
type2= data.loc[data["type"] == 1, "type"].count() 
type3= data.loc[data["type"] == 1, "type"].count() 

totalCount = type1+type2+type3 
p = type1+type2 

什麼,我敢肯定的是如何我可以插入我的數據的算法。 我離得很遠嗎?

回答

0

你需要一個主要功能:

def main(): 
# your code here would be the calls to the algorithm with the parameters (your data) 

if __name__ == "__main__": 
    main() 
相關問題