2013-06-26 29 views
2

我認爲gevent應該使用比線程少的內存,但實際上它比線程使用更多的內存。爲什麼gevent使用更多內存而不是線程

這裏是我的代碼: 的GEVENT

#import gevent.monkey 
#import gevent.httplib as ghttplib 
import httplib as ghttplib 
import httpsqs 
#gevent.monkey.patch_all() 
#from urlparse import urlparse 
#from gevent.pool import Pool 
#import gevent 
#import MySQLdb 
import urllib2 
#from MySQLdb.cursors import SSCursor 
#import gevent_profiler 
import requests 
import time 
from threading import Thread 
#import Queue 
import os 
import memory 
import sys 
#gevent_profiler.print_percentages(True) 
#gevent_profiler.time_blocking(True) 
#gevent_profiler.set_stats_output('my-stats.txt') 

user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:10.0)\ 
     Gecko/20100101 Firefox/10.0' 
headers = { 'User-Agent' : user_agent } 


scale = [1,5,10,20,50,100,200,300] 

data = open("thread.txt",'w') 
#db=MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True) 
#cur = db.cursor() 
print os.getpid() 

def get(url): 
    r = requests.get(url,headers=headers,timeout=10) 
    return r 

if(os.path.exists("./urls_httpsqs")): 
    pass 
else: 
    os.makedirs("./urls_httpsqs") 


class URLThread(Thread): 
    def __init__(self, queue, queue_name, timeout=10, allow_redirects=True): 
     super(URLThread, self).__init__() 
     #self.url = url 
     self.timeout = timeout 
     self.runflag = True 
     self.allow_redirects = allow_redirects 
     self.response = None 
     self.headers = { 'User-Agent' : user_agent } 
     #self.db = MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True) 
     #self.cur = self.db.cursor() 
     self.queue_name = queue_name 
     self.queue = queue 

    def save_disk(self,res,pid): 
     datafile = open("./urls_httpsqs/%s"%pid,"w") 
     datafile.write(res.content) 
     datafile.close() 

    def run(self): 
     while self.runflag: 
      url = self.queue.get(self.queue_name).strip() 
      if httpsqs.isOK(url): 
       pass 
      else: 
       return 
      #print "getting",url 
      try: 
       self.response = requests.get(url, timeout = self.timeout, headers = self.headers, allow_redirects = self.allow_redirects) 
       #pid = url.split("/")[-1] 
       #print "pid is", pid 
       #self.save_disk(self.response,pid) 
       #print "file done" 

      except Exception , what: 
       print what 
       #self.insert_into_fail(db,url) 
       pass 
      #finally: 
       #self.queue.task_done() 
    def stop(self): 
     self.runflag = False 

#queue = Queue.Queue(50) 
queue = httpsqs.Httpsqs("125.221.225.12") 
queue_name = "coroutine" 


#gevent_profiler.attach() 
threads = [] 

now = time.time() 

for num in scale: 
    for i in range(num): 
     threads.append(URLThread(queue,queue_name)) 
    #for t in threads: 
     #t.start() 
    for t in threads: 
     t.stop() 
    print memory.resident() 
    threads = [] 
    data.write(str((memory.resident()/1000000))) 
    data.write("\t") 
    data.write(str((memory.resident()/1000000)+memory.memory()/1000000)) 
    data.write("\n") 
    data.flush() 

#sys.exit(0) 

#for t in threads: 
    #t.join() 

end = time.time() 
print "virtual memory is", memory.memory() 
print "resident memory is", memory.resident() 
print "stack memory is", memory.stacksize() 
print "begin is",now 
print "end is",end 
print "it costs", end-now 

穿線:

import gevent.httplib as ghttplib 
import time 
import httplib as ghttplib 
import httpsqs 
#gevent.monkey.patch_all() 
#from urlparse import urlparse 
#from gevent.pool import Pool 
#import gevent 
import MySQLdb 
import urllib2 
#from MySQLdb.cursors import SSCursor 
#import gevent_profiler 
import requests 
from threading import Thread 
import multiprocessing 
#import Queue 
import os 
import memory 

#gevent_profiler.print_percentages(True) 
#gevent_profiler.time_blocking(True) 
#gevent_profiler.set_stats_output('my-stats.txt') 

user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:10.0)\ 
     Gecko/20100101 Firefox/10.0' 
headers = { 'User-Agent' : user_agent } 

scale = [1,5,10,20,50,100,200,300] 
data = open("process.txt",'w') 

#db=MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True) 
#cur = db.cursor() 
total_mem = 0 

def get(url): 
    r = requests.get(url,headers=headers,timeout=10) 
    return r 

if(os.path.exists("./urls_httpsqs")): 
    pass 
else: 
    os.makedirs("./urls_httpsqs") 

def save_disk(res,pid): 
    datafile = open("./urls_httpsqs/%s"%pid,"w") 
    datafile.write(res.content) 
    datafile.close() 

def run(queue,queue_name): 
    #print os.getpid() 

    #print 'total mem is', total_mem 
    while True: 
     url = queue.get(queue_name).strip() 
     if httpsqs.isOK(url): 
      pass 
     else: 
      return 

     #print "getting",url 
     try: 
      #db = MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True) 
      #response = requests.get(url, timeout = 10) 
      response = requests.get(url) 
      #pid = self.insert_into_avail(db,url) 
      #pid = url.split("/")[-1] 
      #save_disk(response,1) 

     except Exception , what: 
      print what 
      #self.insert_into_fail(db,url) 
      pass 



queue = httpsqs.Httpsqs("125.221.225.12") 
queue_name = "coroutine" 
#print os.getpid() 

#gevent_profiler.attach() 
now = time.time() 
record = [] 
for num in scale: 
    for i in range(num): 
     process = multiprocessing.Process(target=run,args=(queue,queue_name)) 
     process.start() 
     record.append(process) 
    for i in record: 
     i.terminate() 
    record = [] 
    print "done" 
    print memory.resident() 
    print num 
    print memory.resident()*num 
    data.write(str((memory.resident()*num/1000000))) 
    data.write("\t") 
    data.write(str((memory.resident()*num/1000000)+memory.memory()/1000000)) 
    data.write("\n") 
    data.flush() 
    #for process in record: 
     #process.join() 
#pool.close() 
#pool.join() 
data.close() 
end = time.time() 

print "virtual memory is", memory.memory() 
print "resident memory is", memory.resident() 
print "stack memory is", memory.stacksize() 
print "begin is",now 
print "end is",end 
print "it costs", end-now 

和我用下面要知道內存成本:

import os 
import sys 

sys_pid = sys.argv[1] 
sys_pid = int(sys_pid) 
#_proc_status = '/proc/%d/status' % os.getpid() 
_proc_status = '/proc/%d/status' %sys_pid 

_scale = {'kB': 1024.0, 'mB': 1024.0*1024.0, 
      'KB': 1024.0, 'MB': 1024.0*1024.0} 

def _VmB(VmKey): 
    '''Private. 
    ''' 
    global _proc_status, _scale 
    # get pseudo file /proc/<pid>/status 
    try: 
     t = open(_proc_status) 
     v = t.read() 
     t.close() 
    except: 
     return 0.0 # non-Linux? 
    # get VmKey line e.g. 'VmRSS: 9999 kB\n ...' 
    i = v.index(VmKey) 
    v = v[i:].split(None, 3) # whitespace 
    if len(v) < 3: 
     return 0.0 # invalid format? 
    # convert Vm value to bytes 
    return float(v[1]) * _scale[v[2]] 


def memory(since=0.0): 
    '''Return memory usage in bytes. 
    ''' 
    return _VmB('VmSize:') - since 


def resident(since=0.0): 
    '''Return resident memory usage in bytes. 
    ''' 
    return _VmB('VmRSS:') - since 


def stacksize(since=0.0): 
    '''Return stack size in bytes. 
    ''' 
    return _VmB('VmStk:') - since 

print "virtual memory is", memory() 
print "resident memory is", resident() 
print "stack memory is", stacksize() 

這個來自python食譜。

,輸出是: 線程

8.310784 23.42912 
8.347648 23.445504 
8.35584 23.457792 
8.368128 23.47008 
8.41728 23.519232 
8.503296 23.601152 
8.671232 24.117248 
8.843264 24.293376 

GEVENT

9.019392 24.829952 
9.048064 24.846336 
9.056256 24.854528 
9.07264 25.14944 
9.1136 25.1904 
9.19552 25.27232 
9.330688 25.407488 
9.46176 25.92768 

在哪裏我做錯了什麼?

回答

0

Gevent必然會使用更多的內存,因爲它維護自己的輕量級線程(greenlet),這會導致一些開銷。

如果你的應用程序是CPU綁定的,那麼gevent可能對你沒什麼用處。

但是,如果您的應用程序是I/O綁定的,那麼gevent非常棒,因爲您可以在4-8個GiG機器上達到1000個併發級別。

另外,正如我的朋友曾經說過的,內存很貴,但並不昂貴:-) 乾杯!

相關問題