好吧,我對這個問題感興趣(最近有一個類似的問題),並對它做了一些工作。你可以嘗試這樣的事情:
import io
import datetime
from csv import DictReader
file0 = io.StringIO('''timestamp,data
2015-06-01 10:00, data00
2015-06-01 11:00, data01
2015-06-01 12:00, data02
2015-06-01 12:30, data03
2015-06-01 13:00, data04
''')
file1 = io.StringIO('''timestamp,data
2015-06-01 09:00, data10
2015-06-01 10:30, data11
2015-06-01 11:00, data12
2015-06-01 12:30, data13
''')
class Data(object):
def __init__(self):
self.timestamp = None
self.data = None
@staticmethod
def new_from_dict(dct=None):
if dct is None:
return None
ret = Data()
ret.data = dct['data'].strip()
ret.timestamp = datetime.datetime.strptime(dct['timestamp'],
'%Y-%m-%d %H:%M')
return ret
def __lt__(self, other):
if other is None:
return False
return self.timestamp < other.timestamp
def __gt__(self, other):
if other is None:
return False
return self.timestamp > other.timestamp
def __str__(self):
ret = '{0.__class__.__name__}'.format(self) +\
'(timestamp={0.timestamp}, data={0.data})'.format(self)
return ret
def next_or_none(reader):
try:
return Data.new_from_dict(next(reader))
except StopIteration:
return None
def yield_in_order(reader0, reader1):
data0 = next_or_none(reader0)
data1 = next_or_none(reader1)
while not data0 == data1 == None:
if data0 is None:
yield None, data1
data1 = next_or_none(reader1)
continue
if data1 is None:
yield data0, None
data0 = next_or_none(reader0)
continue
while data0 < data1:
yield data0, None
data0 = next_or_none(reader0)
while data0 > data1:
yield None, data1
data1 = next_or_none(reader1)
if data0 is not None and data1 is not None:
if data0.timestamp == data1.timestamp:
yield data0, data1
data0 = next_or_none(reader0)
data1 = next_or_none(reader1)
csv0 = DictReader(file0)
csv1 = DictReader(file1)
FMT = '{!s:50s} | {!s:50s}'
print(FMT.format('file0', 'file1'))
print(101*'-')
for dta0, dta1 in yield_in_order(csv0, csv1):
print(FMT.format(dta0, dta1))
這僅用於2個文件。
請添加一些示例數據 – synner
我建議你看看熊貓(http://pandas.pydata.org/)。它有工具可以完成您提到的確切類型的合併。 – Ivan
@Ivan當我搜索這個問題時,我注意到了熊貓圖書館。該庫對於我正在執行的許多操作看起來非常有用。我目前嚴重依賴於Numpy,但當我花一些時間時,我會去調查熊貓。 – RandomBits