2017-06-27 88 views
0

我試圖解析來自使用musicbrainzngs模塊從Musicbrainz收到的大JSON類Python字典中的數據。下面是一個漂亮格式的這種類型的字典的示例。使用長索引檢查字典/列表項是否存在

{ 'artist-credit': [ { 'artist': { 'id': '0039c7ae-e1a7-4a7d-9b49-0cbc716821a6', 
            'name': 'Death Cab for Cutie', 
            'sort-name': 'Death Cab for Cutie'}}], 
    'artist-credit-phrase': 'Death Cab for Cutie', 
    'asin': 'B0000D1FDI', 
    'barcode': '655173103227', 
    'country': 'US', 
    'cover-art-archive': { 'artwork': 'true', 
         'back': 'false', 
         'count': '1', 
         'front': 'true'}, 
    'date': '2003-10-07', 
    'id': 'e602a3ae-fe8f-4abd-8638-f055517bacb2', 
    'label-info-count': 1, 
    'label-info-list': [ { 'catalog-number': 'bark32', 
         'label': { 'id': 'a4f904e0-f048-4c13-88ec-f9f31f3e6109', 
            'name': 'Barsuk Records', 
            'sort-name': 'Barsuk Records'}}], 
    'medium-count': 1, 
    'medium-list': [ { 'disc-count': 4, 
        'disc-list': [ { 'id': '5G2zzIza.oA1Y3XpMHxuohbzazQ-', 
             'offset-count': 11, 
             'offset-list': [ 150, 
                 18780, 
                 34698, 
                 51295, 
                 70286, 
                 80376, 
                 100160, 
                 135944, 
                 152723, 
                 166122, 
                 191235], 
             'sectors': '207432'}, 
            { 'id': '8XGkh_GqZPv6rL8W1c6_t9fQKhw-', 
             'offset-count': 11, 
             'offset-list': [ 150, 
                 18933, 
                 35005, 
                 51755, 
                 70899, 
                 81142, 
                 101079, 
                 137017, 
                 153949, 
                 167501, 
                 192767], 
             'sectors': '208967'}, 
            { 'id': 'AptsPDTKO.nMoE_GRmqGZSWjT7g-', 
             'offset-count': 11, 
             'offset-list': [ 150, 
                 18635, 
                 34405, 
                 50855, 
                 69697, 
                 79638, 
                 99273, 
                 134905, 
                 151535, 
                 164791, 
                 189759], 
             'sectors': '205963'}, 
            { 'id': 'scc32yarsl41ysxMw43_1Pk8n3M-', 
             'offset-count': 11, 
             'offset-list': [ 150, 
                 18628, 
                 34394, 
                 50839, 
                 69678, 
                 79616, 
                 99248, 
                 134880, 
                 151507, 
                 164754, 
                 189715], 
             'sectors': '205910'}], 
        'format': 'CD', 
        'position': '1', 
        'track-count': 11, 
        'track-list': [ { 'id': 'd65135d9-d917-3c04-9a3e-1a9f3f75dbdf', 
             'length': '246400', 
             'number': '1', 
             'position': '1', 
             'recording': { 'id': '2aefb5c8-f137-4289-b9f9-e78d23695468', 
                 'length': '246400', 
                 'title': 'The New ' 
                   'Year'}, 
             'track_or_recording_length': '246400'}, 
            { 'id': 'bdff5634-4743-3957-9a2c-285af885fd56', 
             'length': '210240', 
             'number': '2', 
             'position': '2', 
             'recording': { 'id': '60d3363b-7cc4-4675-a85b-692683054ff2', 
                 'length': '210213', 
                 'title': 'Lightness'}, 
             'track_or_recording_length': '210240'}, 
            { 'id': '41aec2a4-4ecb-30c0-9052-099a504c1623', 
             'length': '219280', 
             'number': '3', 
             'position': '3', 
             'recording': { 'id': '6c077d47-09ae-4059-b025-d48f48710f92', 
                 'length': '219000', 
                 'title': 'Title ' 
                   'and ' 
                   'Registration'}, 
             'track_or_recording_length': '219280'}, 
            { 'id': 'b28ef069-7bca-3a43-a3a2-dcd123652d2e', 
             'length': '251200', 
             'number': '4', 
             'position': '4', 
             'recording': { 'id': '143b3d2e-82d6-4fbc-afd4-5ac8b8e2ffe6', 
                 'length': '251187', 
                 'title': 'Expo ’86'}, 
             'track_or_recording_length': '251200'}, 
            { 'id': 'c329e654-8866-3a78-939d-3cb8368f1de6', 
             'length': '132520', 
             'number': '5', 
             'position': '5', 
             'recording': { 'id': '59403f9d-722a-48c3-aedb-6c1bb6102668', 
                 'length': '132520', 
                 'title': 'The ' 
                   'Sound ' 
                   'of ' 
                   'Settling'}, 
             'track_or_recording_length': '132520'}, 
            { 'id': '469bbe0a-8ac9-38f9-8998-17ab0bdd8cb4', 
             'length': '261773', 
             'number': '6', 
             'position': '6', 
             'recording': { 'id': 'a594b2c7-5e4f-4e81-9cda-91bab05da25e', 
                 'length': '261773', 
                 'title': 'Tiny ' 
                   'Vessels'}, 
             'track_or_recording_length': '261773'}, 
            { 'id': '9ac716d8-e9a2-3b7f-9588-ef5b91b00925', 
             'length': '475120', 
             'number': '7', 
             'position': '7', 
             'recording': { 'id': '9472186f-ec6d-48d1-9a47-4bc6e922cffe', 
                 'length': '475093', 
                 'title': 'Transatlanticism'}, 
             'track_or_recording_length': '475120'}, 
            { 'id': 'a37ec190-9dde-3c23-8d2d-a561afe56a3a', 
             'length': '221706', 
             'number': '8', 
             'position': '8', 
             'recording': { 'id': '6dfcacb8-f767-43af-9645-9c9dd39eeb44', 
                 'length': '221706', 
                 'title': 'Passenger ' 
                   'Seat'}, 
             'track_or_recording_length': '221706'}, 
            { 'id': '46eecfc9-e7a3-3d73-acb1-02a5b13d3831', 
             'length': '176640', 
             'number': '9', 
             'position': '9', 
             'recording': { 'id': 'e2a42ced-3f08-4012-ad7c-8c215da8a2a1', 
                 'length': '176640', 
                 'title': 'Death ' 
                   'of an ' 
                   'Interior ' 
                   'Decorator'}, 
             'track_or_recording_length': '176640'}, 
            { 'id': 'b3e21529-72fb-30bd-b4b8-22e7c7c9a411', 
             'length': '332826', 
             'number': '10', 
             'position': '10', 
             'recording': { 'id': '70cf7264-1053-4afc-ace8-81cd24cc6391', 
                 'length': '332826', 
                 'title': 'We ' 
                   'Looked ' 
                   'Like ' 
                   'Giants'}, 
             'track_or_recording_length': '332826'}, 
            { 'id': '8b3e26c5-978b-35f2-8b2b-7e2574ae37bf', 
             'length': '215294', 
             'number': '11', 
             'position': '11', 
             'recording': { 'id': 'a9f6ea4c-06a9-46a4-9a67-667617b0fe6a', 
                 'length': '216000', 
                 'title': 'A Lack ' 
                   'of Color'}, 
             'track_or_recording_length': '215294'}]}], 
    'packaging': 'Jewel Case', 
    'quality': 'normal', 
    'release-event-count': 1, 
    'release-event-list': [ { 'area': { 'id': '489ce91b-6658-3307-9877-795b68554c98', 
             'iso-3166-1-code-list': ['US'], 
             'name': 'United States', 
             'sort-name': 'United States'}, 
          'date': '2003-10-07'}], 
    'status': 'Official', 
    'text-representation': {'language': 'eng', 'script': 'Latn'}, 
    'title': 'Transatlanticism'} 

我想解析特定位信息的字典。例如,我有下面的字典,其中填充了數據的一般描述作爲關鍵字,然後通過列表和字典的長索引導致我想要的數據。對於這個例子,假設release_dict是來自上面的數據。

album_info = { 
    "album_title": release_dict['title'], 
    "album_artist": release_dict['artist-credit'][0]['artist']['name'], 
    "artist": release_dict['artist-credit'][0]['artist']['name'], 
    "release_date": release_dict['date'], 
    "track_total": release_dict['medium-list'][disc_number]['track-count'], 
    "release_label": release_dict['label-info-list'][0]['label']['name'], 
} 

問題是,從musicbrainzngs傳回的數據並不總是相同的。例如,導致「release_label」的索引可能不存在。我能想到的處理這個問題的唯一合理的方法是使嘗試語句爲每一個標籤,像這樣:

album_dict = {} 

try: 
    album_dict["album_title"] = release_dict['title'] 
except KeyError: 
    print("Value for 'album_title' not found.") 

和所有其他標籤做同樣的。但是我真的更喜歡以某種方式遍歷每個標籤,以使代碼更加乾爽和安全。

我能想到的唯一解決方案是製作一個讀取索引字符串(即"['artist-credit'][0]['artist']['name']")的函數,它會讀取它並檢查每個部分是否存在,但我認爲這可能對此有點矯枉過正問題。

無論如何,我只是想製作一段代碼,儘可能多地從原始元數據中獲取信息。如果您有任何想法,我願意接受所有裁決。並感謝您的幫助提前。

回答

2

您可以創建一個函數,該函數採用可變數量的鍵,如果它不存在則停止並返回默認值(如嵌套的dict.get)。

def rec_get(d, *items, default=None): 
    try: 
     for item in items: 
      d = d[item] 
    except (KeyError, TypeError): 
     return default 
    return d 

album_info = { 
    "album_title": release_dict.get('title'), 
    "album_artist": rec_get(release_dict, 'artist-credit', 0, 'artist', 'name'), 
    "artist": rec_get(release_dict, 'artist-credit', 0, 'artist', 'name'), 
    "release_date": release_dict.get('date'), 
    "track_total": rec_get(release_dict, 'medium-list', disc_number, 'track-count'), 
    "release_label": rec_get(release_dict, 'label-info-list', 0, 'label', 'name'), 
} 
+0

謝謝,這正是我所尋找的。 – tedm1106