2015-09-01 156 views
2

我想用WinHttpRequest從此網頁獲取電話號碼。 有JavaScript隨機順序改變數字 - 我不明白的算法。從網頁提取電話號碼

http://www.doska.ru/msg/work/courses-education/seminari-un-trenini/ailoo.html

下面是對代碼的AutoHotkey:

WebRequest := ComObjCreate("WinHttp.WinHttpRequest.5.1") 
WebRequest.Open("GET", "http://www.doska.ru/msg/work/courses-education/seminari-un-trenini/ailoo.html", false) 
WebRequest.Send() 
Body := WebRequest.ResponseBody 
pArr := ComObjValue(Body) 
cBytes := NumGet(pArr+0, A_PtrSize = 8? 24:16, "uint") 
pText := NumGet(pArr+0, A_PtrSize = 8? 16:12, "ptr") 
var := StrGet(pText, cBytes, "utf-8") 
RegExMatch(var, "id=""ph_td_2"">(.+?)</span>", phone) 
msgbox % phone1 

生成電話號碼的腳本似乎是:

<script type="text/javascript">open_stat_lnk("");change_price(8, 0, "", 0);show_banner(); 
var show_code = "Наберите текст с картинки"; var show_phone = "Показать номер"; var pcc_id=0; 
print_phone(PH_2,2,0);if(PH_c && !pcc_id){pcc_id=2;eval(PH_c);} 
</script> 

功能print_phone(PH_2,2,0)我發現這裏: http://i.doska.ru/w_inc/js/main.ru.doska.js?v=251

function print_phone(d, k, b) { 
    var g = el("ph_td_" + k); 
    if (!g) { 
     g = el("ph_td") 
    } 
    if (!g || !d) { 
     return 
    } 
    var f = p2 = _js_decode(d); 
    if (b) { 
     var c = el("ptd2_" + k); 
     if (c) { 
      c = c.innerHTML; 
      c = c.split("<"); 
      c = c[0]; 
      c = c.replace("(", ""); 
      c = c.replace(")", ""); 
      p2 = f.replace("-", ""); 
      p2 = p2.replace("-", ""); 
      p2 = p2.replace("-", ""); 
      p2 = c + "" + p2 
     } 
     g.innerHTML = '<a href="tel:' + p2 + '">' + f + "</a>" 
    } else { 
     g.innerHTML = f 
    } 
    g.style.visibility = "visible" 
} 

請幫我解決這個問題。

+3

你的問題是什麼? –

+0

問題是,如何獲得真正的電話號碼,而不執行此JavaScript。用我的代碼,我得到這個電話號碼1967607,但真正的電話號碼是166-09-77 – malcev

+0

如果你總是得到1967607然後只是改變數字的順序。 –

回答

1

您正在調用:print_phone(PH_2,2,0); 因此,你的值是這樣的輸入。

d := PH_2, 
k := 2, 
b := 0 

你正在做的:

if (0) { // do some stuff } else { display(f); } 

由於B == 0,0有false價值;你只是簡單地顯示f。

f是_js_decode(d)

這是這樣做的:使用瀏覽器debuger可能會給你對你的問題的更多提示

function _js_decode(b){ // PH_2 
    return _ph_dec(b,"Hb9c0mOswgV4p{zDlf",2); 
} 

function _ph_dec(g,r,k){ // PH_2, "Hb9c0mOswgV4p{zDlf", 2 
         // Your issue might be there ? 
    g=unescape(_b64_dec(g)); 
    var n=r.length; // 18 
    var d=g.length; 
    var c=""; 
    var q,p; 
    for(var f=0;f<d;f++){ 
     q=g.substring(f,f+1); // take the g[f] char 
     p=r.substring(f%n,f%n+1); // take g[f%n] char 
     if(k==1){ 
      q=q.charCodeAt(0)-p.charCodeAt(0) // not the case we are looking for ! 
     } else { 
      if(k==2) { 
       // why the '14' is here ? 
       q=q.charCodeAt(0)-p.charCodeAt(0)+14 // the case we are looking for ! 
      } else { 
       q=q.charCodeAt(0)^p.charCodeAt(0) // not the case we are looking for ! 
      } 
     } 
    c=c+String.fromCharCode(q) 
    } 
    return c 
} 

+0

謝謝!但我不明白這件事:q始終是負數,因此String.fromCharCode(q)是空白的。 – malcev

0

假設thenchanter是正確的順序總是相同的,這將做的工作:

mainHtml := HtmlGet("http://www.doska.ru/msg/work/courses-education/seminari-un-trenini/ailoo.html") 
RegExMatch(mainHtml, "<td width=100% class=""ads_contacts"" nowrap id=""ptd2_[^""]*"">([^<]+)<span class=""ads_contacts_bold"" id=""ph_td[^""]*"">([^<]+)</span>", phone) 
RegExMatch(mainHtml, "var ss_w='показать';document.write\('<scr'\+'ipt id=""contacts_js"" src=""([^\?]+)\?t='\+new Date\(\)\+'""></scr'\+'ipt>' \);", contactsJsPartUrl) 
;contactsJsUrl := "http://www.doska.ru" contactsJsPartUrl1 "?t=" GetJsDate() 
contactsJsUrl := "http://www.doska.ru" contactsJsPartUrl1 "?t=" GetJsDate() "0." GetRandomString(16,"") "&reload=1" 
;http://www.doska.ru/js/2015-08-17/1941/VnQNHE9hRRBVfQgZVWJJQVZ6RUYLLkNaXn0A.js?t=Thu Sep 03 2015 00:16:12 GMT+0200 (W. Europe Standard Time) 

secretVariablesJs := HtmlGet(contactsJsUrl) 
/* 
var PHONE_CNT=-1;var PHONE_CNT2=-1;var PHONE_CNT3=-1;var EMAIL_CNT=-1;var SHOW_CNT=29;var PH_c="";var PH_1=0;var PH_2=0;var PH_3=0; 
pcc_id=0;PH_1=gpzd("JTgwJUEyJTdDX3BsJTdDWnUlOEYlN0UlNUJ4JThFciU5M3hpJTdGZg==","79325724"); 
PH_2 = gpzd("JThFJTlEeCU1RXlvaSU4RXpobSU3RXUlN0MlOEZUJThEJThEeF8lN0MlOTF6ZHhlJTYwZw==","15372732"); 
PH_3 = gpzd("JTdFJUEyelpwbSU3RSU1Q3MlOEQlN0VZcyU4RXMlOTV6ZyU3RGY=","45575927"); 
*/ 
msgbox % contactsJsUrl 
_js_decode = 
(
function _js_decode(b) { 
    return _ph_dec(b, "Hb9c0mOswgV4p{zDlf", 2) 
} 
) 

_ph_dec = 
(
function _ph_dec(g, r, k) { 
    g = unescape(_b64_dec(g)); 
    var n = r.length; 
    var d = g.length; 
    var c = ""; 
    var q, p; 
    for (var f = 0; f < d; f++) { 
     q = g.substring(f, f + 1); 
     p = r.substring(f `% n, f `% n + 1); 
     if (k == 1) { 
      q = q.charCodeAt(0) - p.charCodeAt(0) 
     } else { 
      if (k == 2) { 
       q = q.charCodeAt(0) - p.charCodeAt(0) + 14 
      } else { 
       q = q.charCodeAt(0)^p.charCodeAt(0) 
      } 
     } 
     c = c + String.fromCharCode(q) 
    } 
    return c 
} 
) 

_b64_dec = 
(
function _b64_dec(n) { 
    var f = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/="; 
    var d, c, b, s, r, q, p, u, k = 0, 
     g = ""; 
    do { 
     s = f.indexOf(n.charAt(k++)); 
     r = f.indexOf(n.charAt(k++)); 
     q = f.indexOf(n.charAt(k++)); 
     p = f.indexOf(n.charAt(k++)); 
     u = s << 18 | r << 12 | q << 6 | p; 
     d = u >> 16 & 255; 
     c = u >> 8 & 255; 
     b = u & 255; 
     if (q == 64) { 
      g += String.fromCharCode(d) 
     } else { 
      if (p == 64) { 
       g += String.fromCharCode(d, c) 
      } else { 
       g += String.fromCharCode(d, c, b) 
      } 
     } 
    } while (k < n.length); 
    return g 
} 
) 

gpzd = 
(
function gpzd(data, key) { 
    key = key * 6 - 47289 + 517; 
    ret = _ph_dec(data, new String(key), 2); 
    return ret 
} 
) 

jsCode := secretVariablesJs "`r`n" 
jsCode .= _js_decode "`r`n" 
jsCode .= gpzd "`r`n" 
jsCode .= _ph_dec "`r`n" 
jsCode .= _b64_dec "`r`n" 

sc := ComObjCreate("ScriptControl") 
sc.Language := "JScript" 
sc.ExecuteStatement(jsCode) 
phone2Decrypted := sc.Eval("_js_decode(PH_2)") 
if InStr(phone2Decrypted,"*") 
    phone2Decrypted := sc.Eval("_js_decode(PH_1)") 
if InStr(phone2Decrypted,"*") 
    phone2Decrypted := sc.Eval("_js_decode(PH_3)") 
if InStr(phone2Decrypted,"*") 
    msgbox failed to get the phone number 
if !InStr(phone2Decrypted,"*") 
    MsgBox, % phone1 " " phone2Decrypted 


HtmlGet(url) { 
    Static WebRequest := ComObjCreate("WinHttp.WinHttpRequest.5.1") 
    WebRequest.Open("GET", url) 
    WebRequest.SetRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.3; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0") 
    WebRequest.Send() 
    Body := WebRequest.ResponseBody 
    pArr := ComObjValue(Body) 
    cBytes := NumGet(pArr+0, A_PtrSize = 8? 24:16, "uint") 
    pText := NumGet(pArr+0, A_PtrSize = 8? 16:12, "ptr") 
    Return StrGet(pText, cBytes, "utf-8") 
} 

GetJsDate() { 
    sc := ComObjCreate("ScriptControl") 
    sc.Language := "JScript" 
    ;Return sc.Eval("''+new Date()+''") 
    Return sc.Eval("encodeURI(''+new Date()+'')") 
} 

GetRandomString(length,chars:="") { 
    If (chars = "") 
     chars := "abcdefghijklmnopqrstuvwxyz" 
    charsCount := StrLen(chars) 
    Loop %length% { 
     Random, num, 1, % StrLen(chars) 
     string .= SubStr(chars,num,1) 
    } 
    Return string 
} 
+0

註釋不適用於擴展討論;這個對話已經[轉移到聊天](http://chat.stackoverflow.com/rooms/88901/discussion-on-answer-by-forivin-extract-phone-number-from-a-web-page)。 –

0

您可以通過在AutoHotkey的使用ImageSearch搜索電話號碼標籤,並通過增加Y軸移動鼠標指針並使用MouseClickDrag