我現在有一個utilities.py
文件具有本機的學習功能python-rq隊列中的python scikit函數運行得更快嗎?
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
import models
import random
words = [w.strip() for w in open('words.txt') if w == w.lower()]
def scramble(s):
return "".join(random.sample(s, len(s)))
@models.db_session
def check_pronounceability(word):
scrambled = [scramble(w) for w in words]
X = words+scrambled
y = ['word']*len(words) + ['unpronounceable']*len(scrambled)
X_train, X_test, y_train, y_test = train_test_split(X, y)
text_clf = Pipeline([
('vect', CountVectorizer(analyzer='char', ngram_range=(1, 3))),
('clf', MultinomialNB())
])
text_clf = text_clf.fit(X_train, y_train)
stuff = text_clf.predict_proba([word])
pronounceability = round(100*stuff[0][1], 2)
models.Word(word=word, pronounceability=pronounceability)
models.commit()
return pronounceability
然後我在我的app.py
from flask import Flask, render_template, jsonify, request
from rq import Queue
from rq.job import Job
from worker import conn
from flask_cors import CORS
from utilities import check_pronounceability
app = Flask(__name__)
q = Queue(connection=conn)
import models
@app.route('/api/word', methods=['POST', 'GET'])
@models.db_session
def check():
if request.method == "POST":
word = request.form['word']
if not word:
return render_template('index.html')
db_word = models.Word.get(word=word)
if not db_word:
job = q.enqueue_call(check_pronounceability, args=(word,))
return jsonify(job=job.id)
調用讀python-rq preformance notes它規定
的模式你之後可以用來提高這些吞吐量性能 類型的工作可以導入t他在fork之前需要模塊。
然後我所做的worker.py
文件看起來像這樣
import os
import redis
from rq import Worker, Queue, Connection
listen = ['default']
redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6379')
conn = redis.from_url(redis_url)
import utilities
if __name__ == '__main__':
with Connection(conn):
worker = Worker(list(map(Queue, listen)))
worker.work()
我已經是這仍然運行速度慢的問題,是不是我做錯了什麼?當我檢查一個單詞時,我可以通過將所有內容存儲在內存中來使其更快運行。據xpost I did in the python-rq看來我正確地將其導入
謝謝,這固定了它。我將看看使用LogisticRegressions分類器來代替 – nadermx