2015-11-15 39 views
1

這裏我們有一個嵌套內容的大型json文件。我們希望將其轉換爲csv文件,以便它可以將其用於數據建模,但是我覺得代碼缺少某些東西,我無法辨別。我對python很陌生,需要幫助。在Python中將json文件轉換爲csv不會返回任何內容?

以下是文件中的內容的樣子:

[{ 
"address": " -, Gulbarga-585102", 
"college": "College (Architecture)", 
"courses": [ 
{ 
    "brief_details": "", 
    "college_name": "School of ArchitecturePoojya Doddappa Appa College of Engineering", 
    "course_branch": "B.Arch", 
    "course_duration": " 5-year", 
    "course_nature": " Full-Time", 
    "course_title": "", 
    "course_type": " B.Arch", 
    "no_of_seats": " 60", 
    "qualifications": "", 
    "selection_process": "" 
} 
], 
"email": " [email protected]", 
"fax": "08472-255685", 
"name": "School Of Architecturepoojya Doddappa Appa College Of Engineering", 
"phone": "08472-224262 Extn. 435, 220742", 
"recognition": " V.t.u. Belgaum", 
"website": "" 
}] 

而下面是我的代碼

from bs4 import BeautifulSoup 
from os import listdir 
import os 
from os.path import isfile, join 
import fnmatch 
import shelve 
import json 
import csv 

def write_csv(read_file_path): 
    data = json.loads(open(read_file_path).read()) 
    file_colleges = csv.writer(open(r"/home/maitreyee/SchoolCollege.com/collegesdb/colleges.csv", "w", newline="")) 
    list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website'] 
    file_colleges.writerow(list_colleges_headers) 
    list_courses.list_colleges_headers = ['course_title', 'course_type','course_duration','course_nature','qualifications','brief_details','selection_process', 'course_branch', 'no_of_seats'] 

    for d in data: 
     file_colleges.writerow(
      [d['name'], d['college'], d['recognition'], d['address'], d['phone'], d['fax'], d['website']]) 
     file_course.writerow(list_courses_headers) 
     for course in d['courses']: 
      file_course.writerow(
       [ 
       (course['course_title'] if course['course_title'] is not None or course['course_title'] != '' else 'NA'), 
       (course['course_type'] if course['course_type'] is not None or course['course_type'] != '' else 'NA'), 
       (course['course_duration'] if course['course_duration'] is not None or course['course_duration'] != '' else 'NA'), 
       (course['course_nature'] if course['course_nature'] is not None or course['course_nature'] != '' else 'NA'), 
       (course['qualifications'] if course['qualifications'] is not None or course['qualifications'] != '' else 'NA'), 
       (course['brief_details'] if course['brief_details'] is not None or course['brief_details'] != '' else 'NA'), 
       (course['selection_process'] if course['selection_process'] is not None or course['selection_process'] != '' else 'NA'), 
       (course['course_branch'] if course['course_branch'] is not None or course['course_branch'] != '' else 'NA'), 
       (course['no_of_seats'] if course['no_of_seats'] is not None or course['no_of_seats'] != '' else 'NA')]) 
     pass 

#def write_file(file, colleges): 
# db = shelve.open(file) 
# for college in colleges: 
#  db[college.name] = college 
# db.close() 
read_file_path = r'/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json' 
#colleges = read_colleges(r"/home/maitreyee/Downloads/SchoolCollege.com1/collegedb1.json") 
#new_write_file(r'/home/maitreyee/Downloads/SchoolCollege.com1/') 

而且代碼返回一個空文件

下面是代碼@ 7stud.have剛剛修改了文件位置。

import json 
import csv 

def write_csv(jsonfile, outfile): 

    with open(jsonfile) as f: 
     data = json.loads(f.read()) 

    college_dict = data[0] 

    college_keys = list(college_dict.keys()) 
    college_keys.remove('courses') 
    college_keys.remove('college') 

    courses_dict = data[0]['courses'][0] 
    courses_keys = list(courses_dict.keys()) 
    courses_keys.remove('brief_details') 

    with open(outfile, 'w', newline='') as f: 
     csv_writer = csv.writer(f) 
     headers = college_keys + courses_keys 
     csv_writer.writerow(headers) 

     row = (
      [ 
       college_dict[key] if college_dict[key] else 'NA' 
       for key in college_keys 
      ] 
      + 
      [ 
       courses_dict[key] if courses_dict[key] else 'NA' 
       for key in courses_keys 
      ] 
     ) 

     csv_writer.writerow(row) 

jsonfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesdb1.json' 
outfile = '/home/maitreyee/Downloads/SchoolCollege.com/collegesdb/collegesout.csv' 

write_csv(jsonfile, outfile) 

及以下的錯誤

[email protected]:~/Downloads/SchoolCollege.com$ python json2csv4.py 
Traceback (most recent call last): 
    File "json2csv4.py", line 41, in <module> 
    write_csv(jsonfile, outfile) 
    File "json2csv4.py", line 15, in write_csv 
    courses_dict = data[0]['courses'][0] 
IndexError: list index out of range 
+0

這是愚蠢的我。對不起,對於這 –

回答

2
  1. 你打算在呼喚你的write_csv()功能在你的程序?

  2. 如果你調用write_csv(),你將得到錯誤:

NameError: name 'list_courses' is not defined

如果你只是這樣做:

import json 
import csv 

def write_csv(read_file_path): 
    data = json.loads(open(read_file_path).read()) 
    file_colleges = csv.writer(open('out.txt', "w", newline="")) 
    list_colleges_headers = ['name', 'recognition','address','phone','fax','email','website'] 
    file_colleges.writerow(list_colleges_headers) 


infile = "json.txt" 
write_csv(infile) 

你會看到該文件包含輸出:

$ cat out.txt 
name,recognition,address,phone,fax,email,website 

編輯:

如果CSV文件中的列順序並不重要:

import json 
import csv 

def write_csv(jsonfile, outfile): 

    with open(jsonfile) as f: 
     data = json.loads(f.read()) 

    college_dict = data[0] 

    college_keys = list(college_dict.keys()) 
    college_keys.remove('courses') 
    college_keys.remove('college') 

    courses_dict = data[0]['courses'][0] 
    courses_keys = list(courses_dict.keys()) 
    courses_keys.remove('brief_details') 

    with open(outfile, 'w', newline='') as f: 
     csv_writer = csv.writer(f) 
     headers = college_keys + courses_keys 
     csv_writer.writerow(headers) 

     row = (
      [ 
       college_dict[key] if college_dict[key] else 'NA' 
       for key in college_keys 
      ] 
      + 
      [ 
       courses_dict[key] if courses_dict[key] else 'NA' 
       for key in courses_keys 
      ] 
     ) 

     csv_writer.writerow(row) 

jsonfile = 'data.json' 
outfile = 'out.csv' 

write_csv(jsonfile, outfile) 
+0

我還需要單獨列中的上述課程的嵌套值。是的,你是正確的,做了寫csv後,我得到了錯誤,所以我正在做一些修改,以包括課程價值以及。我將在此處放置修改後的代碼。如果您對如何包含嵌套列表有任何建議,請讓我知道。 –

+0

@MaitreyeeTewari,請參閱我的答案的底部。 – 7stud

+0

現在我收到錯誤'列表索引超出範圍'? –

相關問題