0
我有一個Python腳本,它使用os.walk
和win32com.client
從我的C:/驅動器上的文件夾及其子文件夾中提取Outlook電子郵件文件(.msg)中的信息。它似乎工作,但是當我嘗試對返回的數據幀做任何事情時(例如emailData.head()
Python崩潰)。由於權限錯誤,我也無法將數據框寫入.csv。使用Python提取Outlook電子郵件數據時出錯
我想知道如果我的代碼沒有正確關閉outlook /每條消息,那是什麼導致了問題?任何幫助,將不勝感激。
import os
import win32com.client
import pandas as pd
# initialize Outlook client
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
# set input directory (where the emails are) and output directory (where you
# would like the email data saved)
inputDir = 'C:/Users/.../myFolderPath'
outputDir = 'C:/Users/.../myOutputPath'
def emailDataCollection(inputDir,outputDir):
""" This function loops through an input directory to find
all '.msg' email files in all folders and subfolders in the
directory, extracting information from the email into lists,
then converting the lists to a Pandas dataframe before exporting
to a '.csv' file in the output directory
"""
# Initialize lists
msg_Path = []
msg_SenderName = []
msg_SenderEmailAddress = []
msg_SentOn = []
msg_To = []
msg_CC = []
msg_BCC = []
msg_Subject = []
msg_Body = []
msg_AttachmentCount = []
# Loop through the directory
for root, dirnames, filenames in os.walk(inputDir):
for filename in filenames:
if filename.endswith('.msg'): # check to see if the file is an email
filepath = os.path.join(root,filename) # save the full filepath
# Extract email data into lists
msg = outlook.OpenSharedItem(filepath)
msg_Path.append(filepath)
msg_SenderName.append(msg.SenderName)
msg_SenderEmailAddress.append(msg.SenderEmailAddress)
msg_SentOn.append(msg.SentOn)
msg_To.append(msg.To)
msg_CC.append(msg.CC)
msg_BCC.append(msg.BCC)
msg_Subject.append(msg.Subject)
msg_Body.append(msg.Body)
msg_AttachmentCount.append(msg.Attachments.Count)
del msg
# Convert lists to Pandas dataframe
emailData = pd.DataFrame({'Path' : msg_Path,
'SenderName' : msg_SenderName,
'SenderEmailAddress' : msg_SenderEmailAddress,
'SentOn' : msg_SentOn,
'To' : msg_To,
'CC' : msg_CC,
'BCC' : msg_BCC,
'Subject' : msg_Subject,
'Body' : msg_Body,
'AttachmentCount' : msg_AttachmentCount
}, columns=['Path','SenderName','SenderEmailAddress','SentOn','To','CC',
'BCC','Subject','Body','AttachmentCount'])
return(emailData)
# Call the function
emailData = emailDataCollection(inputDir,outputDir)
# Causes Python to crash
emailData.head()
# Fails due to permission error
emailData.to_csv(outputDir,header=True,index=False)