2014-10-08 91 views
0

所以我終於弄清楚如何選擇熊貓數據框的一部分,但現在我迷失在如何使用它的數據。我希望能夠在一分鐘內將所有條目加在一起。Python與熊貓一起工作

for minute in rrule.rrule(rrule.MINUTELY, dtstart=pd.datetime.strptime(dateToday+"T15:30","%Y-%m-%dT%H:%M"), until=pd.datetime.strptime(dateToday+"T22:00","%Y-%m-%dT%H:%M")): 
      temp = pageData[(pageData['time']>=minute)&(pageData['time']<minute+timedelta(seconds=60))] 
      print(temp) 
+0

顯示示例輸入和輸出,具有完整的工作示例代碼。 – 2014-10-08 03:24:43

回答

0

想通了。我需要的是循環通過創建的子項目(無論你稱他們爲何)循環。

#Refrences 
from time import * 
from dateutil import rrule 
from datetime import timedelta 
import urllib.request as web 
import pandas as pd 
import os 

forToday = 'http://netfonds.no/quotes/tradedump.php?csv_format=csv&paper=' 
dateToday = strftime("%Y-%m-%d", localtime()) 
#dateToday = "2014-10-07" 

def pullToday(exchange,stock): 
    hold=[] 
    fileName=('data/'+exchange+'/'+stock+'/'+dateToday+'.txt') 
    try: 
     if not os.path.isdir(os.path.dirname(fileName)): 
      os.makedirs(os.path.dirname(fileName)) 
    except OSError: 
     print("Something went very wrong. Review the dir creation section") 

    pageBuffer=web.urlopen(forToday+stock+'.'+exchange) 
    pageData=pd.read_csv(pageBuffer,usecols=['time','price','quantity']) 
    for i in pageData.index: 
     pageData['time'][i]=pd.datetime.strptime(pageData['time'][i],'%Y%m%dT%H%M%S') 

    print(hold)     
    #pageData = pageData.set_index('time',drop=False) 
    for minute in rrule.rrule(rrule.MINUTELY, dtstart=pd.datetime.strptime(dateToday+"T15:30","%Y-%m-%dT%H:%M"), until=pd.datetime.strptime(dateToday+"T21:58","%Y-%m-%dT%H:%M")): 
     temp = pageData[(pageData['time']>=minute)&(pageData['time']<minute+timedelta(seconds=60))] 
     volume=0 
     priceSum=0 
     low=123456 
     high=0 
     for i in temp.index: 
      volume+=temp['quantity'][i] 
      priceSum+=temp['quantity'][i]*temp['price'][i] 
      if temp['price'][i]>high: 
       high=temp['price'][i] 
      if temp['price'][i]<low: 
       low=temp['price'][i] 
     priceSum/=volume 
     hold.append([minute,volume,low,high,round(priceSum,4)]) 

    minute=pd.datetime.strptime(dateToday+"T21:59","%Y-%m-%dT%H:%M") 
    temp = pageData[(pageData['time']>=minute)&(pageData['time']<minute+timedelta(seconds=180))] 
    volume=0 
    priceSum=0 
    low=123456 
    high=0 
    for i in temp.index: 
     volume+=temp['quantity'][i] 
     priceSum+=temp['quantity'][i]*temp['price'][i] 
     if temp['price'][i]>high: 
      high=temp['price'][i] 
     if temp['price'][i]<low: 
      low=temp['price'][i] 
    priceSum/=volume 
    hold.append([minute,volume,low,high,round(priceSum,4)]) 

    compiledData=pd.DataFrame(hold ,columns=['TimeStamp', 'Volume', 'Low', 'High', 'Average']) 
    #for i in compiledData.index: 
     #compiledData['TimeStamp'][i]-=pd.datetime.strptime(dateToday+"TZ06","%Y-%m-%dTZ%H") 
    print(compiledData) 
    dataFile = open(fileName,'w') 
    dataFile.write('#Format: Timestamp;Volume;Low;High;Median\n') 
    dataFile.close() 
    pageData.to_csv(fileName,index=False,sep=';',mode='a',header=False) 

def getList(fileName): 
    stockList = [] 
    file = open(fileName+'.txt', 'r').read() 
    fileByLines = file.split('\n') 
    for eachLine in fileByLines: 
     if '#' not in eachLine: 
      lineByValues = eachLine.split('.') 
      stockList.append(lineByValues) 
    return stockList 

start_time = time() 

stockList = getList('stocks') 
#for eachEntry in stockList: 
# pullToday(eachEntry[0],eachEntry[1]) 

pullToday('O','AAPL') 

delay=str(round((time()-start_time))) 
print('Finished in ' + delay) 
+0

你不需要那個循環(循環幾乎是不需要的/ goog的想法)。例如。你可以簡單地做'volume = temp ['quantitiy']。sum()' – joris 2014-10-08 12:33:39

+0

我還需要將每個交易價格乘以它的交易量。我知道如何用循環的唯一方法。 – Samuel 2014-10-08 19:49:27

+0

只在環路外面有'temp ['quantity'] * temp ['price']'? – joris 2014-10-08 20:06:41