0
我已經在keras中編寫了這個RNN文本分類系統,並提供了網上提供的教程。它工作正常,並顯示輸出。但是,有人可以檢查我的代碼,並告訴我我的實現是否正確?使用keras的RNN文本分類
# LSTM with dropout for sequence classification
import numpy
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.preprocessing import sequence,text
from keras.layers.embeddings import Embedding
import pandas as pd
# fix random seed for reproducibility
numpy.random.seed(7)
#fetching sms spam dataset
url = 'https://raw.githubusercontent.com/justmarkham/pydata-dc-2016-tutorial/master/sms.tsv'
sms = pd.read_table(url, header=None, names=['label', 'message'])
#binarizing
sms['label_num'] = sms.label.map({'ham':0, 'spam':1})
sms.head()
X = sms.message
y = sms.label_num
print(X.shape)
print(y.shape)
###################################
tk = text.Tokenizer(nb_words=200, lower=True)
tk.fit_on_texts(X)
x = tk.texts_to_sequences(X)
print len(tk.word_counts)
###################################
max_len = 80
print "max_len ", max_len
print('Pad sequences (samples x time)')
x = sequence.pad_sequences(x, maxlen=max_len)
max_features = 200
model = Sequential()
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x, y=y, batch_size=500, nb_epoch=1, verbose=1, validation_split=0.2, show_accuracy=True, shuffle=True)
嘗試https://codereview.stackexchange.com/ – jcubic
我投票結束這個問題作爲題外話,因爲它屬於codereview.stackexchange.com – jcubic