2017-03-04 55 views
-1

我有一個字符串是這樣的:獲得字符串中的所有URL在python

[ 
{ 
"profilechecksum":"58cd944da7b8e647abdcdb722d74fc7ai9060852", 
"userloginstatus":"Last Online today", 
"subscription_text":null, 
"subscription_icon":null, 
"age":"22 Years", 
"username":"ZZXS8433", 
"height":"5' 2\" ", 
"occupation":"Not working", 
"caste":"Namasudra\/Namosudra", 
"income":"No Income", 
"mtongue":"Bengali", 
"edu_level_new":"High School", 
"location":"Kolkata", 
"photo":{ 
"label":null, 
"url":"http:\/\/mediacdn.jeevansathi.com\/866\/17\/17337654-1375139585.jpeg", 
"action":null 
}, 
"size":null, 
"album_count":"1", 
"timetext":null, 
"seen":"Y", 
"religion":"Hindu", 
"gender":"F", 
"featured":null, 
"filter_score":"", 
"filter_reason":"", 
"highlighted":0, 
"verification_seal":null, 
"verification_status":null, 
"mstatus":"Never Married", 
"college":null, 
"pg_college":null, 
"company_name":null, 
"gunascore":null, 
"name_of_user":null, 
"profileid":"9060852", 
"buttonDetails":{ 
"buttons":[ 
{ 
"iconid":"001", 
"label":"Send Interest", 
"action":"INITIATE", 
"value":null, 
"params":null 
}, 
{ 
"iconid":"003", 
"label":"Shortlist", 
"action":"SHORTLIST", 
"value":null, 
"params":"&shortlist=false" 
}, 
{ 
"iconid":"005", 
"label":"Photo", 
"action":"ALBUM", 
"value":"1", 
"params":null 
}, 
{ 
"iconid":"007", 
"label":"Contact", 
"action":"CONTACTDETAIL", 
"value":null, 
"params":null 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
}, 
"buttonDetailsJSMS":{ 
"buttons":[ 
{ 
"action":"INITIATE", 
"label":"Send Interest", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"params":"&stype=A", 
"enable":true, 
"id":"INITIATE" 
}, 
{ 
"iconid":null, 
"label":"View Contacts", 
"action":"CONTACT_DETAIL", 
"value":null, 
"params":null, 
"enable":true, 
"primary":"true", 
"secondary":null, 
"id":"CONTACT_DETAIL" 
}, 
{ 
"action":"CHAT", 
"label":"Chat", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"enable":true, 
"id":"CHAT", 
"params":"ZZXS8433,9060852,http:\/\/mediacdn.jeevansathi.com\/866\/17\/17337654-1375139585.jpeg,N" 
}, 
{ 
"action":"SHORTLIST", 
"iconid":"003", 
"label":"Shortlist", 
"params":"&shortlist=false", 
"primary":"true", 
"secondary":null, 
"enable":"true", 
"id":"SHORTLIST" 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
} 
}, 
{ 
"profilechecksum":"3c08e787ae61cbfada3232eb5393fa2fi8295748", 
"userloginstatus":"Last Online today", 
"subscription_text":null, 
"subscription_icon":null, 
"age":"22 Years", 
"username":"ZAUY2793", 
"height":"5' 2\" ", 
"occupation":"Student", 
"caste":"Jhijhotiya", 
"income":"No Income", 
"mtongue":"Hindi-MP", 
"edu_level_new":"B.Com", 
"location":"Indore", 
"photo":{ 
"label":null, 
"url":"http:\/\/mediacdn.jeevansathi.com\/713\/7\/14267803-1374952962.jpeg", 
"action":null 
}, 
"size":null, 
"album_count":"2", 
"timetext":null, 
"seen":"Y", 
"religion":"Hindu", 
"gender":"F", 
"featured":null, 
"filter_score":"", 
"filter_reason":"", 
"highlighted":0, 
"verification_seal":null, 
"verification_status":null, 
"mstatus":"Never Married", 
"college":null, 
"pg_college":null, 
"company_name":null, 
"gunascore":null, 
"name_of_user":null, 
"profileid":"8295748", 
"buttonDetails":{ 
"buttons":[ 
{ 
"iconid":"001", 
"label":"Send Interest", 
"action":"INITIATE", 
"value":null, 
"params":null 
}, 
{ 
"iconid":"003", 
"label":"Shortlist", 
"action":"SHORTLIST", 
"value":null, 
"params":"&shortlist=false" 
}, 
{ 
"iconid":"005", 
"label":"Album", 
"action":"ALBUM", 
"value":"2", 
"params":null 
}, 
{ 
"iconid":"007", 
"label":"Contact", 
"action":"CONTACTDETAIL", 
"value":null, 
"params":null 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
}, 
"buttonDetailsJSMS":{ 
"buttons":[ 
{ 
"action":"INITIATE", 
"label":"Send Interest", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"params":"&stype=A", 
"enable":true, 
"id":"INITIATE" 
}, 
{ 
"iconid":null, 
"label":"View Contacts", 
"action":"CONTACT_DETAIL", 
"value":null, 
"params":null, 
"enable":true, 
"primary":"true", 
"secondary":null, 
"id":"CONTACT_DETAIL" 
}, 
{ 
"action":"CHAT", 
"label":"Chat", 
"iconid":null, 
"primary":"true", 
"secondary":null, 
"enable":true, 
"id":"CHAT", 
"params":"ZAUY2793,8295748,http:\/\/mediacdn.jeevansathi.com\/713\/7\/14267803-1374952962.jpeg,N" 
}, 
{ 
"action":"SHORTLIST", 
"iconid":"003", 
"label":"Shortlist", 
"params":"&shortlist=false", 
"primary":"true", 
"secondary":null, 
"enable":"true", 
"id":"SHORTLIST" 
} 
], 
"button":null, 
"infomsgiconid":null, 
"infomsglabel":null, 
"infobtnlabel":null, 
"infobtnvalue":null, 
"infobtnaction":null 
} 
} 
] 

我想寫一個正則表達式從它那裏得到"url":"http:\/\/mediacdn.jeevansathi.com\/866\/17\/17337654-1375139585.jpeg"(即,只有網址的一部分。)。

我想:

p = re.compile("\"url\":\".*\.jpeg\"") # parentheses for capture groups 
      print p.findall(line) # 

但它不給我唯一的URL部分。

+0

添加一個捕獲組 - 「're.compile(r'」url「:」(。* \ .jpeg)「')' –

+0

不工作。 https://regex101.com/r/4MvHDj/1 – learner

+0

*您在**代碼中使用're.findall' * *,而不是regex101 *。 –

回答

1

正則表達式在這裏似乎是錯誤的工具。這是一個JSON字符串;您應該解析它,然後搜索url屬性以jpeg結尾的項目。

data = json.loads(my_string) 
results = [item['photo']['url'] for item in results if item.get('photo', '{}').get('url', '').endswith('jpeg')] 
+0

我想用同樣的正則表達式,因爲我的字符串不是純json。它也可以有起始和結束前綴。 – learner

相關問題