2012-06-04 34 views
0

我試圖刮掉從這個頁面的內容:https://www.google.com/search?hl=en&biw=1920&bih=956&tbm=shop&q=Xenon+12640&oq=Xenon+12640&aq=f&gs_l=serp.3...3743.3743.0.3905.1.1.0.0.0.0.0.0..0.0.ekh..0.0.Hq3XS7AxFDU&sei=Dr_MT_WOM6nO2AWE25mTCA&gbv=2不同的看法源內容

我遇到的問題是,打開該網址在瀏覽器中,我得到我需要的一切刮但在代碼中抓取相同的鏈接,缺少兩個(重要)件,評論數量和評級,低於價格和賣家信息。 下面是從內部Web客戶端在C#中的截圖:http://gyazo.com/908a37c7f70712fba1f82ec90a604d4d.png?1338822369

這裏是我正努力獲取內容的代碼:

public string navGet(string inURL, CookieContainer inCookieContainer, bool GZip, string proxyAddress, int proxyPort,string proxyUserName, string proxyPassword) 
    { 
     try 
     { 
     this.currentUrl = inURL;   
     HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(inURL); 
    webRequest.Timeout = this.TimeOutSetting; 
    webRequest.CookieContainer = inCookieContainer; 

    if (proxyAddress == "0" || proxyPort == 0) 
    { } 
    else 
    { 
     webRequest.Proxy = new WebProxy(proxyAddress, proxyPort); 
     // Use login credentials to access proxy 
     NetworkCredential networkCredential = new NetworkCredential(proxyUserName, proxyPassword); 
     webRequest.Proxy.Credentials = networkCredential; 
    } 

    Uri destination = webRequest.Address; 
    webRequest.KeepAlive = true; 
    webRequest.Method = "GET"; 
    webRequest.Accept = "*/*"; 
    webRequest.Headers.Add("Accept-Language", "en-us"); 
    if (GZip) 
    { 
     webRequest.Headers.Add("Accept-Encoding", "gzip, deflate"); 
    } 
     webRequest.AllowAutoRedirect = true; 
    webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"; 

     webRequest.ContentType = "text/xml";   
     //webRequest.CookieContainer.Add(inCookieContainer.GetCookies(destination)); 

    try 
    { 
     string strSessionID = inCookieContainer.GetCookies(destination)["PHPSESSID"].Value; 
     webRequest.Headers.Add("Cookie", "USER_OK=1;PHPSESSID=" + strSessionID); 
    } 
    catch (Exception ex2) 
    { 

    } 
     HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse(); 
     if (webRequest.HaveResponse) 
     { 
      // First handle cookies 
      foreach(Cookie retCookie in webResponse.Cookies) 
      { 
       bool cookieFound = false; 
       foreach(Cookie oldCookie in inCookieContainer.GetCookies(destination)) 
       { 
        if (retCookie.Name.Equals(oldCookie.Name)) 
        { 
         oldCookie.Value = retCookie.Value; 
         cookieFound = true; 
        } 
       } 
       if (!cookieFound) 
        inCookieContainer.Add(retCookie); 
      }     


      // Read response 
     Stream responseStream = responseStream = webResponse.GetResponseStream(); 



     if (webResponse.ContentEncoding.ToLower().Contains("gzip")) 
     { 
      responseStream = new GZipStream(responseStream, CompressionMode.Decompress); 
     } 
     else if (webResponse.ContentEncoding.ToLower().Contains("deflate")) 
     { 
      responseStream = new DeflateStream(responseStream, CompressionMode.Decompress); 
     } 

     StreamReader stream = new StreamReader(responseStream, System.Text.Encoding.Default); 

     string responseString = stream.ReadToEnd(); 
      stream.Close(); 
      this.currentUrl = webResponse.ResponseUri.ToString(); 
      this.currentAddress = webRequest.Address.ToString();    
      setViewState(responseString); 
      return responseString; 
     } 
     throw new Exception("No response received from host."); 
     return "An error was encountered"; 
     } 
     catch(Exception ex) 
     { 
      //MessageBox.Show("NavGet:" + ex.Message); 
      return ex.Message; 
     } 
    } 

在此先感謝您的幫助。 Laziale

回答

0

看起來像是發生了,因爲評論數量和評分是使用Java腳本(可能是AJAX或其他)動態生成的。在這種情況下,您需要分析在瀏覽器中加載頁面時發生的額外流量,並查找數據傳輸的位置,或者分析JavaScript代碼以查看其生成方式。