2016-12-01 171 views
1

我正在刮一個網站,並且我想使用Python正則表達式提取以下JS代碼中的data變量的JSON。使用正則表達式提取兩個字符串之間的字符串

<script type="text/javascript"> 
P.when('A').register("ImageBlockATF", function(A){ 
    var data = { 
       'colorImages': { 'initial': [{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41SnVVzKChL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41SnVVzKChL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY355_.jpg":[355,270],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY450_.jpg":[450,342],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY550_.jpg":[550,419],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY606_.jpg":[606,461],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY679_.jpg":[679,517]},"variant":"MAIN","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/416rXB0xcmL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/416rXB0xcmL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SY355_.jpg":[355,276],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SY450_.jpg":[450,349],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX425_.jpg":[547,425],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX466_.jpg":[600,466],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX522_.jpg":[672,522]},"variant":"PT01","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/51gQxeLTYhL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51gQxeLTYhL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX355_.jpg":[251,355],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX450_.jpg":[318,450],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX425_.jpg":[300,425],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX466_.jpg":[329,466],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX522_.jpg":[369,522]},"variant":"PT02","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41d9m8J4MbL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41d9m8J4MbL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX355_.jpg":[142,355],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX450_.jpg":[180,450],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX425_.jpg":[170,425],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX466_.jpg":[187,466],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX522_.jpg":[209,522]},"variant":"PT03","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41zh%2BCGamHL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41zh%2BCGamHL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY355_.jpg":[355,260],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY450_.jpg":[450,330],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY550_.jpg":[550,403],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY606_.jpg":[606,444],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY679_.jpg":[679,498]},"variant":"PT04","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41sMHp-WegL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41sMHp-WegL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY355_.jpg":[355,258],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY450_.jpg":[450,327],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY550_.jpg":[550,400],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY606_.jpg":[606,441],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY679_.jpg":[679,494]},"variant":"PT05","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SL1364_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/416TFrjOFlL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/416TFrjOFlL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX355_.jpg":[231,355],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX450_.jpg":[293,450],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX425_.jpg":[277,425],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX466_.jpg":[304,466],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX522_.jpg":[340,522]},"variant":"PT06","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SL1341_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41%2BNMI0l9yL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41%2BNMI0l9yL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX355_.jpg":[190,355],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX450_.jpg":[240,450],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX425_.jpg":[227,425],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX466_.jpg":[249,466],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX522_.jpg":[279,522]},"variant":"PT07","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SY355_.jpg":[355,266],"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SY450_.jpg":[450,338],"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL.jpg":[500,375]},"variant":"AW01","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SY355_.jpg":[355,266],"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SY450_.jpg":[450,338],"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL.jpg":[500,375]},"variant":"AW02","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SY355_.jpg":[355,355],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SY450_.jpg":[450,450],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SX425_.jpg":[425,425],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SX466_.jpg":[466,466],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL.jpg":[500,500]},"variant":"AW03","lowRes":null}]}, 
       'colorToAsin': {'initial': {}}, 
       'holderRatio': 1.0, 
       'holderMaxHeight': 700, 
       'heroImage': {'initial': []}, 
       'weblabs' : {} 
       }; 
    A.trigger('P.AboveTheFold'); // trigger ATF event. 
    return data; 
}); 
</script> 

我一直在嘗試遵循正則表達式,但不工作。

(var\s+data\s+=).*^[A.trigger('P.AboveTheFold')]$ 

基本上我需要的正則表達式var data =A.trigger('P.AboveTheFold')

回答

1

之間搶字符串如果你確定你的JSON數據不包括任何;,你可以寫:

var data\s*=\s*([^;]*}); 

這是不是很健壯,你可能應該使用解析庫。 json數據在第一組內。

查看它here

如果你確定你的數據是var data =A.trigger('P.AboveTheFold')之間,你可以使用:

(?<=var data =).*(?=A.trigger\('P\.AboveTheFold'\)) 

看到它there

json數據是完整的匹配,這要歸功於正面的選擇。 它也不健壯。例如,數據和=之間的任何不同間隔都會使其斷裂。你需要re.DOTALL標誌告訴Python .應該匹配一個換行符。

相關問題