我跑這個代碼來設置我的文件系統
設置
import pandas as pd
import numpy as np
def setup_test_files(indir='in'):
colnames = [
"WindSpeed", "Capacity",
"p0.025", "p0.05", "p0.1", "p0.5",
"p0.9", "p0.95", "p0.975", "suffix"
]
tidx = pd.date_range('2016-03-31', periods=3, freq='M', name='DateTime')
for filename in ['in/fn_{}.csv'.format(i) for i in range(3)]:
pd.DataFrame(
np.random.rand(3, len(colnames)),
tidx, colnames
).round(2).to_csv(filename)
print(filename)
setup_test_files()
這創造了3個文件名爲['fn_0.csv', 'fn_1.csv', 'fn_2.csv']
他們看起來是這樣的文件
with open('in/fn_0.csv', 'r') as fo:
print(''.join(fo.readlines()))
DateTime,WindSpeed,Capacity,p0.025,p0.05,p0.1,p0.5,p0.9,p0.95,p0.975,suffix
2016-03-31,0.03,0.76,0.62,0.21,0.76,0.36,0.44,0.61,0.23,0.04
2016-04-30,0.39,0.12,0.31,0.99,0.86,0.35,0.15,0.61,0.55,0.03
2016-05-31,0.72,1.0,0.71,0.86,0.41,0.79,0.22,0.76,0.92,0.79
我將定義一個解析器函數和一個單獨做串聯。爲什麼?因爲我認爲這樣更容易。
import pandas as pd
import glob
import os
def read_csv(fn):
colnames = [
"DateTime", "WindSpeed", "Capacity",
"p0.025", "p0.05", "p0.1", "p0.5",
"p0.9", "p0.95", "p0.975", "suffix"
]
df = pd.read_csv(fn, encoding='latin-1')
df.columns = colnames
return df
def concatenate(indir='in', outfile='out/Forecast.csv'):
curdir = os.getcwd()
try:
os.chdir(indir)
file_list = glob.glob('*.csv')
df_names = [fn.replace('.csv', '') for fn in file_list]
concat_df = pd.concat(
[read_csv(fn) for fn in file_list],
axis=1, keys=df_names)
# notice I was nice enough to change directory back :-)
os.chdir(curdir)
concat_df.to_csv(outfile, index=None)
except:
os.chdir(curdir)
然後運行級聯
concatenate()
您可以在讀取結果這樣
print(pd.read_csv('out/Forecast.csv', header=[0, 1]))
fn_0 \
DateTime WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9 p0.95 p0.975
0 2016-03-31 0.03 0.76 0.62 0.21 0.76 0.36 0.44 0.61 0.23
1 2016-04-30 0.39 0.12 0.31 0.99 0.86 0.35 0.15 0.61 0.55
2 2016-05-31 0.72 1.00 0.71 0.86 0.41 0.79 0.22 0.76 0.92
... fn_2
... WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9 p0.95 p0.975 suffix
0 ... 0.80 0.79 0.38 0.94 0.91 0.18 0.27 0.14 0.39 0.91
1 ... 0.60 0.97 0.04 0.69 0.04 0.65 0.94 0.81 0.37 0.22
2 ... 0.78 0.53 0.83 0.93 0.92 0.12 0.15 0.65 0.06 0.11
[3 rows x 33 columns]
注:
您沒有理會將DateTime
作爲您的索引。我認爲這可能是你想要的。如果是這樣,改變read_csv
和concatenate
功能,這
import pandas as pd
import glob
import os
def read_csv(fn):
colnames = [
"WindSpeed", "Capacity",
"p0.025", "p0.05", "p0.1", "p0.5",
"p0.9", "p0.95", "p0.975", "suffix"
]
# notice extra parameters for specifying index and parsing dates
df = pd.read_csv(fn, index_col=0, parse_dates=[0], encoding='latin-1')
df.index.name = "DateTime"
df.columns = colnames
return df
def concatenate(indir='in', outfile='out/Forecast.csv'):
curdir = os.getcwd()
try:
os.chdir(indir)
file_list = glob.glob('*.csv')
df_names = [fn.replace('.csv', '') for fn in file_list]
concat_df = pd.concat(
[read_csv(fn) for fn in file_list],
axis=1, keys=df_names)
os.chdir(curdir)
concat_df.to_csv(outfile)
except:
os.chdir(curdir)
這就是最後的結果看起來像這種變化,注意日期將對準這樣
fn_0 \
WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9 p0.95 p0.975
DateTime
2016-03-31 0.03 0.76 0.62 0.21 0.76 0.36 0.44 0.61 0.23
2016-04-30 0.39 0.12 0.31 0.99 0.86 0.35 0.15 0.61 0.55
2016-05-31 0.72 1.00 0.71 0.86 0.41 0.79 0.22 0.76 0.92
... fn_2 \
suffix ... WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9
DateTime ...
2016-03-31 0.04 ... 0.80 0.79 0.38 0.94 0.91 0.18 0.27
2016-04-30 0.03 ... 0.60 0.97 0.04 0.69 0.04 0.65 0.94
2016-05-31 0.79 ... 0.78 0.53 0.83 0.93 0.92 0.12 0.15
p0.95 p0.975 suffix
DateTime
2016-03-31 0.14 0.39 0.91
2016-04-30 0.81 0.37 0.22
2016-05-31 0.65 0.06 0.11
[3 rows x 30 columns]
你將不得不循環遍歷並連接它們。如果一個CSV文件有缺失的行,您將不得不中止或發明缺失數據的表示。 –
只是扔在那裏。你在調用這個函數嗎?如果沒有,代碼將不會做任何事情。 – Parfait