0
我正試圖編寫一個程序來從谷歌刮URL,當被問及驗證碼時,將打開一個表單,允許用戶鍵入驗證碼並讓程序繼續。該程序正常工作,直到驗證碼。表單將打開並允許用戶輸入驗證碼,網頁瀏覽器將正確加載下一頁,但會話不會被轉到webrequest,從而導致打開webbrowser表單的循環,要求用戶輸入在驗證碼。我曾嘗試將cookie從webbrowser複製到webrequest cookie容器,但無濟於事。C#WebBrowser會話到WebRequest
foreach (string cookie in f2.webForm.Document.Cookie.Split(';'))
{
string name = cookie.Split('=')[0];
string value = cookie.Substring(name.Length + 1);
string path = "/";
string domain = "ipv4.google.com";
//webRequest.CookieContainer.Add(new Cookie(name.Trim(), value.Trim(), path, domain));
cookieJar.Add(new Cookie(name.Trim(), value.Trim(), path, domain));
}
這是完整的代碼。請記住這是一個粗略的寫,所以不要判斷:P
CookieContainer cookieJar = new CookieContainer();
for (int i = 0; i <= 30; i += 10)
{
string url = "https://www.google.com/search?newwindow=1&q=inurl:test.php" + "&start=" + i;
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(url);
webRequest.CookieContainer = cookieJar;
Thread.Sleep(1000);
try
{
webRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246";
//webRequest.CookieContainer = new CookieContainer();
webRequest.ProtocolVersion = HttpVersion.Version11;
webRequest.Method = "GET";
webRequest.KeepAlive = false;
webRequest.ContentType = "text/html";
webRequest.Timeout = 20000;
//webRequest.UseDefaultCredentials = true;
Stream objStream = webRequest.GetResponse().GetResponseStream();
StreamReader streamReader = new StreamReader(objStream);
String sLine = "";
List<string> lLines = new List<string>();
List<string> lUrls = new List<string>();
string[] findhttp;
int endIndex = 0;
Thread.Sleep(1000);
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
boxUrl.AppendText("test: " + webResponse.StatusCode + "\n");
// Get Google's web search and store each line in "lUrls" List
while (sLine != null)
{
boxDorks.AppendText(sLine);
lLines.Add(sLine);
sLine = streamReader.ReadLine();
}
// Lets loop through and get all the URLs
foreach (string s in lLines)
{
// Find the index of href="http
findhttp = s.Split(new string[] { "href=\"http" }, StringSplitOptions.None);
// Parse URL
foreach (string find in findhttp)
{
if (s.IndexOf("href=\"http") > 0)
{
endIndex = find.IndexOf("\" onmousedown"); // Find position of quote
if (endIndex > 0 && find.IndexOf("webcache.googleusercontent.com") < 0 &&
find.IndexOf("support.google.com") < 0 &&
find.IndexOf("robots.txt") < 0 &&
find.IndexOf("translate.google.com") < 0) // we don't want these!
{
lUrls.Add("http" + find.Substring(0, endIndex));
}
}
}
}
// Output URLs
foreach (string s in lUrls)
{
boxUrl.AppendText("test: " + s + "\n");
}
}
catch (WebException we)
{
boxUrl.AppendText("exception: " + we);
//using (var sr = new StreamReader(we.Response.GetResponseStream()))
// {
//var html = sr.ReadToEnd();
//}
// Open form to show google captcha
Form2 f2 = new Form2(we.Response.ResponseUri.ToString());//workaround to get webform.Navigate to work properly
f2.ShowDialog();
// Copy cookies from webbrowser to webrequest cookies
foreach (string cookie in f2.webForm.Document.Cookie.Split(';'))
{
string name = cookie.Split('=')[0];
string value = cookie.Substring(name.Length + 1);
string path = "/";
string domain = "ipv4.google.com";
//webRequest.CookieContainer.Add(new Cookie(name.Trim(), value.Trim(), path, domain));
cookieJar.Add(new Cookie(name.Trim(), value.Trim(), path, domain));
}
謝謝你提前!
我不知道如何共享winforms瀏覽器對象和webrequest之間的會話;但是我過去曾經使用過WatiN,它會爲您啓動IE或Firefox瀏覽器,並允許您從C#訪問DOM。或者,也許Selenium或Copyu會適合你的情況。 – denvercoder9