任何人都可以幫助我在每行的開始/結尾刪除這些雙引號嗎?雙引號在寫入/ .CSV時每行結尾處
我有一個很大的csv(800k行),並且想要創建插入語句來將數據導入到SQL DB中。我知道代碼是十分可怕的,但我從來沒有使用Python的前...任何幫助是極大的讚賞...
#Script file to read from .csv containing raw location data (zip code database)
#SQL insert statements are written to another CSV
#Duplicate zip codes are removed
import csv
Blockquote
csvfile = open('c:\Canada\canada_zip.csv', 'rb')
dialect = csv.Sniffer().sniff(csvfile.readline())
csvfile.seek(0)
reader = csv.reader(csvfile, dialect)
reader.next()
ofile = open('c:\Canada\canada_inserts.csv', 'wb')
writer = csv.writer(ofile, dialect)
#DROP/CREATE TABLE
createTableCmd = '''DROP TABLE PopulatedPlacesCanada \n\
CREATE TABLE PopulatedPlacesCanada \n\
( \n\
ID INT primary key identity not null, \n\
Zip VARCHAR(10), \n\
City nVARCHAR(100), \n\
County nvarchar(100), \n\
StateCode varchar(3), \n\
StateName nvarchar(100), \n\
Country nvarchar(30), \n\
Latitude float, \n\
Longitude float, \n\
PopulationCount int, \n\
Timezone int, \n\
Dst bit \n\
)'''
writer.writerow([createTableCmd])
table = 'PopulatedPlacesCanada'
db_fields = 'Zip, City, County, StateCode, StateName, Country, Latitude, Longitude, PopulationCount, Timezone, Dst'
zip_codes = set()
count = 0
for row in reader:
if row[0] not in zip_codes: #only add row if zip code is unique
count = count + 1
zipCode = row[0] #not every row in the csv is needed so handpick them using row[n]
city = row[1].replace("\'", "").strip()
county = ""
state_abr = row[2]
state = row[3].replace("\'", "").strip()
country = 'Canada'
lat = row[8]
lon = row[9]
pop = row[11]
timezone = row[6]
dst = row[7]
if dst == 'Y':
dst= '1'
if dst == 'N':
dst = '0'
query = "INSERT INTO {0}({1}) VALUES ('{2}', '{3}', '{4}', '{5}', '{6}', '{7}', {8}, {9}, {10}, {11}, {12})".format(table, db_fields, zipCode, city, county, state_abr, state, country, lat, lon, pop, timezone, dst)
writer.writerow([query])
zip_codes.add(row[0])
if count == 100: #Go statement to make sql batch size manageable
writer.writerow(['GO'])
雙引號是什麼? –