這裏我們有一個嵌套內容的大型json文件。我們希望將其轉換爲csv文件,以便它可以將其用於數據建模,但是我覺得代碼缺少某些東西,我無法辨別。我對python很陌生,需要幫助。在Python中將json文件轉換爲csv不會返回任何內容?
以下是文件中的內容的樣子:
[{
"address": " -, Gulbarga-585102",
"college": "College (Architecture)",
"courses": [
{
"brief_details": "",
"college_name": "School of ArchitecturePoojya Doddappa Appa College of Engineering",
"course_branch": "B.Arch",
"course_duration": " 5-year",
"course_nature": " Full-Time",
"course_title": "",
"course_type": " B.Arch",
"no_of_seats": " 60",
"qualifications": "",
"selection_process": ""
}
],
"email": " [email protected]",
"fax": "08472-255685",
"name": "School Of Architecturepoojya Doddappa Appa College Of Engineering",
"phone": "08472-224262 Extn. 435, 220742",
"recognition": " V.t.u. Belgaum",
"website": ""
}]
而下面是我的代碼
from bs4 import BeautifulSoup
from os import listdir
import os
from os.path import isfile, join
import fnmatch
import shelve
import json
import csv
def write_csv(read_file_path):
data = json.loads(open(read_file_path).read())
file_colleges = csv.writer(open(r"/home/maitreyee/SchoolCollege.com/collegesdb/colleges.csv", "w", newline=""))
list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website']
file_colleges.writerow(list_colleges_headers)
list_courses.list_colleges_headers = ['course_title', 'course_type','course_duration','course_nature','qualifications','brief_details','selection_process', 'course_branch', 'no_of_seats']
for d in data:
file_colleges.writerow(
[d['name'], d['college'], d['recognition'], d['address'], d['phone'], d['fax'], d['website']])
file_course.writerow(list_courses_headers)
for course in d['courses']:
file_course.writerow(
[
(course['course_title'] if course['course_title'] is not None or course['course_title'] != '' else 'NA'),
(course['course_type'] if course['course_type'] is not None or course['course_type'] != '' else 'NA'),
(course['course_duration'] if course['course_duration'] is not None or course['course_duration'] != '' else 'NA'),
(course['course_nature'] if course['course_nature'] is not None or course['course_nature'] != '' else 'NA'),
(course['qualifications'] if course['qualifications'] is not None or course['qualifications'] != '' else 'NA'),
(course['brief_details'] if course['brief_details'] is not None or course['brief_details'] != '' else 'NA'),
(course['selection_process'] if course['selection_process'] is not None or course['selection_process'] != '' else 'NA'),
(course['course_branch'] if course['course_branch'] is not None or course['course_branch'] != '' else 'NA'),
(course['no_of_seats'] if course['no_of_seats'] is not None or course['no_of_seats'] != '' else 'NA')])
pass
#def write_file(file, colleges):
# db = shelve.open(file)
# for college in colleges:
# db[college.name] = college
# db.close()
read_file_path = r'/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
#colleges = read_colleges(r"/home/maitreyee/Downloads/SchoolCollege.com1/collegedb1.json")
#new_write_file(r'/home/maitreyee/Downloads/SchoolCollege.com1/')
而且代碼返回一個空文件
下面是代碼@ 7stud.have剛剛修改了文件位置。
import json
import csv
def write_csv(jsonfile, outfile):
with open(jsonfile) as f:
data = json.loads(f.read())
college_dict = data[0]
college_keys = list(college_dict.keys())
college_keys.remove('courses')
college_keys.remove('college')
courses_dict = data[0]['courses'][0]
courses_keys = list(courses_dict.keys())
courses_keys.remove('brief_details')
with open(outfile, 'w', newline='') as f:
csv_writer = csv.writer(f)
headers = college_keys + courses_keys
csv_writer.writerow(headers)
row = (
[
college_dict[key] if college_dict[key] else 'NA'
for key in college_keys
]
+
[
courses_dict[key] if courses_dict[key] else 'NA'
for key in courses_keys
]
)
csv_writer.writerow(row)
jsonfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json'
outfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesout.csv'
write_csv(jsonfile, outfile)
及以下的錯誤
[email protected]:~/Downloads/SchoolCollege.com$ python json2csv4.py
Traceback (most recent call last):
File "json2csv4.py", line 41, in <module>
write_csv(jsonfile, outfile)
File "json2csv4.py", line 15, in write_csv
courses_dict = data[0]['courses'][0]
IndexError: list index out of range
這是愚蠢的我。對不起,對於這 –