2013-01-19 273 views
0

我試圖來用LinkedIn提取一些數據網格,我只是試圖讓這對我自己的學習曲線的工作,但如果我刪除行瀏覽器加載文件

MessageBox.Show("asdfasdfasdf") 

名單「消息「只有1個項目,如果我包含上面的行確實是預期的,我會得到15條消息

有人可以解釋嗎?

public void extract_messages_received(object sender, RoutedEventArgs e) 
{ 
    triggered = false; 
    System.Windows.Forms.WebBrowser browser = new System.Windows.Forms.WebBrowser(); 
    browser.Navigate(new Uri(@"http://www.linkedin.com/inbox/messages/received")); 
    browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(browser_DocumentCompleted); 
} 

private void LoadMessages(string url) 
{ 
    txtOutput.Text = @"http://www.linkedin.com" + url.Substring(6, url.Length - 6); 
    if (!urls.Contains(url)) 
    { 
     urls.Add(url); 
     WebBrowser browser = new WebBrowser(); 
     browser.Navigate(new Uri(txtOutput.Text); 

     loaded_message = false; 
     browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(ReadMessages); 
    } 
} 

private void ReadMessages(object sender, WebBrowserDocumentCompletedEventArgs e) 
{ 
    if (loaded_message == false) 
    {   
     string url = ((WebBrowser)sender).Url.ToString(); 
     int loc1 = url.IndexOf("itemID") + 7; 
     int loc2 = url.IndexOf("&", loc1); 
     IEnumerable<string> name = null; 
     IEnumerable<string> odate = null; 
     IEnumerable<string> photo = null; 
     IEnumerable<string> subject = null; 
     IEnumerable<string> headline = null; 
     string body = ""; 
     string id = url.Substring(loc1, loc2 - loc1); 
     //System.Windows.MessageBox.Show("READ"); 
     foreach (HtmlElement element in ((WebBrowser)sender).Document.GetElementsByTagName("div")) 
     { 
      if (element.GetAttribute("classname").Equals("inbox-item-body")) 
      { 
       body = element.InnerText; 
      } 
      if (element.GetAttribute("classname").Equals("inbox-item-header")) 
      { 
       var doc = new HtmlAgilityPack.HtmlDocument(); 
       doc.LoadHtml(element.InnerHtml); 
       name = from foo in doc.DocumentNode.SelectNodes("//a[@class='fn']") select foo.InnerText; 
       odate = from foo in doc.DocumentNode.SelectNodes("//p[@class='date']") select foo.InnerText; 
       photo = from foo in doc.DocumentNode.SelectNodes("//img[@class='photo']") select foo.Attributes["src"].Value; 
       subject = from foo in doc.DocumentNode.SelectNodes("//h3") select foo.InnerText; 
       headline = from foo in doc.DocumentNode.SelectNodes("//span[@class='headline']") select foo.InnerText; 
      } 
     } 

     // **** 
     MessageBox.Show("asdfasdfasdf"); 
     // **** 

     messages.Add(new Messages() 
     { 
      ID = id, 
      Subject = subject.First().ToString(), 
      Headline = headline.First().ToString(), 
      Sender = name.First().ToString(), 
      Photo = photo.First().ToString(), 
      SendDate = odate.First().ToString(), 
      Body = body 
     }); 

      // dataMessages.ItemsSource = messages; 
    } 
    loaded_message = true; 
} 

void browser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) 
{ 
    if (!triggered) 
    { 
     triggered = true; 
     System.Windows.Forms.WebBrowser web = sender as System.Windows.Forms.WebBrowser; 
     foreach (HtmlElement element in web.Document.GetElementsByTagName("ol")) 
     { 
      if (element.GetAttribute("classname").Contains("inbox-list ")) 
      { 
       WebBrowser browser = new WebBrowser(); 
       browser.Navigate("about:blank"); 
       browser.Document.Write(element.InnerHtml); 
       HtmlElementCollection hrefTags = null; 
       hrefTags = browser.Document.GetElementsByTagName("a"); 
       foreach (HtmlElement a in hrefTags) 
       { 
        if (a.OuterHtml.Contains("displayMBox")) 
        { 
         LoadMessages(a.GetAttribute("href")); 
        } 
       } 
      } 
     } 
    }  
} 

回答

0

這是一個計時問題。

當你有消息框在那裏,loaded_message不會被設置爲true直到您關閉後,消息框,讓其他事件的處理,直到消息框,以及與他們沒有設置loaded_message到真正的,直到你關閉第一個消息框。

如果關閉該消息框的速度不夠快,你可能會看到一些數字beteween 1和15

讓我們更簡單的例子:

private void Form1_Load(object sender, EventArgs e) 
    { 

     for (int i = 0; i < 5; i++) 
     { 
      WebBrowser wb = new WebBrowser(); 
      wb.DocumentCompleted += wb_DocumentCompleted; 
      wb.Navigate("http://www.stackoverflow.com"); 
     } 
    } 

    bool shown = false; 
    void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) 
    { 
     if (!shown) 
     { 
      Console.WriteLine(shown); 
      MessageBox.Show(shown.ToString()); 
      shown = true; 
     } 
    } 

現在,如果你看控制檯,你在顯示第一個消息框之前會看到幾個false。當我關閉消息框時,我會看到4個更多的消息框,因爲這些消息框已經排隊並等待shown設置爲true之前顯示。如果我將消息框註釋掉,那麼在控制檯中只顯示一個消息框和一個false

現在,問題變成了,你爲什麼添加並需要檢查loaded_message布爾變量。

我的猜測是你只想加載每條消息只有一次。如果是這樣的話,跟蹤每一個URL的字典和維護每個URL一個布爾值:

Dictionary<string, bool> loadedUrls = new Dictionary<string, bool>(); 
    private void Form1_Load(object sender, EventArgs e) 
    { 

     for (int i = 0; i < 5; i++) 
     { 
      WebBrowser wb = new WebBrowser(); 
      wb.DocumentCompleted += wb_DocumentCompleted; 
      string url = "http://stackoverflow.com/" + i; 

      loadedUrls.Add(url, false); 
      wb.Navigate(url); 
     } 
    } 

    bool shown = false; 
    void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) 
    { 

     if (loadedUrls.ContainsKey(e.Url.OriginalString) && loadedUrls[e.Url.OriginalString] == false) 
     { 
      loadedUrls[e.Url.OriginalString] = true; 
      Console.WriteLine(shown); 
      shown = true; 
     } 
    } 

我離開那裏shown證明這種新方法現在工作在文檔完成事件的每通。您的輸出窗口應該有false,然後是4 true

+0

不錯的約翰!答對了 ;-) – user1320651