2012-12-28 53 views
-1

我正在寫一個.net網絡蜘蛛。雖然它在我的一個站點(大約20頁)上效果很好,但它與我管理的另一個站點(大約500頁)發生System.StackOverflowException異常爆炸。有關StackOverflowException的快速(希望)

我正在開發一款win7 64bit i3筆記本電腦,內含8g ram,128g hyperx ssd並且沒有交換文件。

我的問題是....我得到這個異常拋出,因爲我沒有交換文件?

cpu使用率(vs2010調試過程)只有74-75m的ram使用率,只能達到34%左右。

如果是這樣的話,我該如何確保它不會發生?

這是沒有遞歸。

代碼:

Imports System.Reflection 
Imports System.Net 
Imports Superstar.Html.Linq 

Public Class Downloader 
Implements IDisposable 

''' <summary> 
''' Get the returned downloaded string 
''' </summary> 
''' <value></value> 
''' <returns></returns> 
''' <remarks></remarks> 
Public ReadOnly Property ReturnString As String 
    Get 
     Return _StrReturn 
    End Get 
End Property 
Private Property _StrReturn As String 

''' <summary> 
''' Get the returned downloaded byte array 
''' </summary> 
''' <value></value> 
''' <returns></returns> 
''' <remarks></remarks> 
Public ReadOnly Property ReturnBytes As Byte() 
    Get 
     Return _FSReturn 
    End Get 
End Property 
Private Property _FSReturn As Byte() 


Private Property _UserAgent As String = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13" 
Private Property DataReceived As Boolean = False 

''' <summary> 
''' Download a string, but do not block the calling thread 
''' </summary> 
''' <param name="_Path"></param> 
''' <remarks></remarks> 
Public Sub DownloadString(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) 
    SetAllowUnsafeHeaderParsing20() 
    Using wc As New Net.WebClient() 
     With wc 
      Dim _ct As Long = 0 
      DataReceived = False 
      .Headers.Add("user-agent", _UserAgent) 
      .DownloadStringAsync(New System.Uri(_Path)) 
      AddHandler .DownloadStringCompleted, AddressOf StringDownloaded 
      Do While Not DataReceived 
       If _Worker IsNot Nothing Then 
        _ct += 1 
        ReportProgress(_ct, _Worker) 
       End If 
      Loop 
     End With 
    End Using 
End Sub 

''' <summary> 
''' Download a file, but do not block the calling thread 
''' </summary> 
''' <param name="_Path"></param> 
''' <remarks></remarks> 
Public Sub DownloadFile(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) 
    SetAllowUnsafeHeaderParsing20() 
    Using wc As New Net.WebClient() 
     With wc 
      Dim _ct As Long = 0 
      DataReceived = False 
      .Headers.Add("user-agent", _UserAgent) 
      .DownloadDataAsync(New System.Uri(_Path)) 
      AddHandler .DownloadDataCompleted, AddressOf FileStreamDownload 
      Do While Not DataReceived 
       If _Worker IsNot Nothing Then 
        _ct += 1 
        ReportProgress(_ct, _Worker) 
       End If 
      Loop 
     End With 
    End Using 
End Sub 

''' <summary> 
''' Download a parsable HDocument, for using HtmlToLinq 
''' </summary> 
''' <param name="_Path"></param> 
''' <returns></returns> 
''' <remarks></remarks> 
Public Function DownloadHDoc(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As HDocument 
    Try 
     ' 
     ' 
     ' 
     ' 
     ' 
     ' 
     'StackOverFlowException Occurring Here! 
     DownloadString(_Path, _Worker) 
     Return HDocument.Parse(_StrReturn) 
    Catch soex As StackOverflowException 
     'put some logging in here, with the path attempted 
     Return Nothing 
    Catch ex As Exception 
     SetAllowUnsafeHeaderParsing20() 
     Return HDocument.Load(_Path) 
    End Try 
End Function 

#Region "Internals" 

Private Sub SetAllowUnsafeHeaderParsing20() 
    Dim a As New System.Net.Configuration.SettingsSection 
    Dim aNetAssembly As System.Reflection.Assembly = Assembly.GetAssembly(a.GetType) 
    Dim aSettingsType As Type = aNetAssembly.GetType("System.Net.Configuration.SettingsSectionInternal") 
    Dim args As Object() = Nothing 
    Dim anInstance As Object = aSettingsType.InvokeMember("Section", BindingFlags.Static Or BindingFlags.GetProperty Or BindingFlags.NonPublic, Nothing, Nothing, args) 
    Dim aUseUnsafeHeaderParsing As FieldInfo = aSettingsType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic Or BindingFlags.Instance) 
    aUseUnsafeHeaderParsing.SetValue(anInstance, True) 
End Sub 

Private Sub FileStreamDownload(ByVal sender As Object, ByVal e As DownloadDataCompletedEventArgs) 
    If e.Cancelled = False AndAlso e.Error Is Nothing Then 
     DataReceived = True 
     _FSReturn = DirectCast(e.Result, Byte()) 
    Else 
     _FSReturn = Nothing 
    End If 
End Sub 

Private Sub StringDownloaded(ByVal sender As Object, ByVal e As DownloadStringCompletedEventArgs) 
    If e.Cancelled = False AndAlso e.Error Is Nothing Then 
     DataReceived = True 
     _StrReturn = DirectCast(e.Result, String) 
    Else 
     _StrReturn = String.Empty 
    End If 
End Sub 

#End Region 

#Region "IDisposable Support" 
Private disposedValue As Boolean ' To detect redundant calls 

' IDisposable 
Protected Overridable Sub Dispose(disposing As Boolean) 
    If Not Me.disposedValue Then 
     If disposing Then 
     End If 
     _StrReturn = String.Empty 
     _FSReturn = Nothing 
    End If 
    Me.disposedValue = True 
End Sub 

Public Sub Dispose() Implements IDisposable.Dispose 
    Dispose(True) 
    GC.SuppressFinalize(Me) 
End Sub 
#End Region 

End Class 

而且調用此當計算器被happenning

Private Function PopulateSEOList(Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As List(Of Typing.SEO) 
    Dim _L = LinkList, _Ct As Long = 0 
    Dim _NL As New List(Of Typing.SEO) 
    Dim _EL As Typing.SEO.Elements = Nothing 
    Dim _Doc As HDocument = Nothing, _Keywords As String = String.Empty, _Description As String = String.Empty, _Content As HElement = Nothing 
    For i As Long = 0 To _L.Count - 1 
     Try 
      _Ct += 1 
      Using _HDoc As New Downloader 
       With _HDoc 
        _Doc = .DownloadHDoc(_L(i).SiteUrl) 
       End With 
      End Using 
      Tasks.Parallel.Invoke(Sub() 
             'Keywords 
             For Each Item In _Doc.Descendants("meta") 
              If Item.Attribute("name") = "keywords" Then 
               _Keywords = Item.Attribute("content").Value 
               'Exit For 
              End If 
             Next 
            End Sub, 
            Sub() 
             'Description 
             For Each Item In _Doc.Descendants("meta") 
              If Item.Attribute("name") = "description" Then 
               _Description = Item.Attribute("content").Value 
               'Exit For 
              End If 
             Next 
            End Sub, 
            Sub() 
             If _Doc.Descendants("body") IsNot Nothing Then 
              _Content = _Doc.Descendants("body").FirstOrDefault 
             End If 
            End Sub, 
            Sub() 
             _EL = New Typing.SEO.Elements() With { 
             .H1 = If(_Doc.Descendants("h1") IsNot Nothing, (From n In _Doc.Descendants("h1").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H2 = If(_Doc.Descendants("h2") IsNot Nothing, (From n In _Doc.Descendants("h2").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H3 = If(_Doc.Descendants("h3") IsNot Nothing, (From n In _Doc.Descendants("h3").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H4 = If(_Doc.Descendants("h4") IsNot Nothing, (From n In _Doc.Descendants("h4").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H5 = If(_Doc.Descendants("h5") IsNot Nothing, (From n In _Doc.Descendants("h5").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .H6 = If(_Doc.Descendants("h6") IsNot Nothing, (From n In _Doc.Descendants("h6").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .UL = If(_Doc.Descendants("ul") IsNot Nothing, (From n In _Doc.Descendants("ul").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .OL = If(_Doc.Descendants("ol") IsNot Nothing, (From n In _Doc.Descendants("ol").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .STRONG = If(_Doc.Descendants("strong") IsNot Nothing OrElse _Doc.Descendants("b") IsNot Nothing, 
                (From n In _Doc.Descendants("strong").AsParallel() 
                Select n.Value).Union(From n In _Doc.Descendants("b").AsParallel() 
                Select n.Value).ToList(), Nothing), 
             .BLOCKQUOTE = If(_Doc.Descendants("blockquote") IsNot Nothing, (From n In _Doc.Descendants("blockquote").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .EM = If(_Doc.Descendants("em") IsNot Nothing OrElse _Doc.Descendants("i") IsNot Nothing, 
               (From n In _Doc.Descendants("em").AsParallel() 
               Select n.Value).Union(From n In _Doc.Descendants("i").AsParallel() 
               Select n.Value).ToList(), Nothing), 
             .A = If(_Doc.Descendants("a") IsNot Nothing, (From n In _Doc.Descendants("a").AsParallel() 
              Select New Typing.SEO.Elements.Links() With { 
               .Content = n.Value, 
               .Title = If(n.Attribute("title") IsNot Nothing, 
                  n.Attribute("title").Value, 
                  Nothing), 
               .Target = If(n.Attribute("target") IsNot Nothing, 
                  n.Attribute("target").Value, 
                  Nothing), 
               .Rel = If(n.Attribute("rel") IsNot Nothing, 
                  n.Attribute("rel").Value, 
                  Nothing), 
               .Href = If(n.Attribute("href") IsNot Nothing, 
                  n.Attribute("href").Value, 
                  Nothing) 
              }).ToList(), Nothing), 
             .IMG = If(_Doc.Descendants("img") IsNot Nothing, 
                (From n In _Doc.Descendants("img").AsParallel() 
                Select New Typing.SEO.Elements.Images() With { 
                 .Alt = If(n.Attribute("alt") IsNot Nothing, 
                  n.Attribute("alt").Value, 
                  Nothing), 
                 .Source = If(n.Attribute("src") IsNot Nothing, 
                  n.Attribute("src").Value, 
                  Nothing), 
                 .Title = If(n.Attribute("title") IsNot Nothing, 
                  n.Attribute("title").Value, 
                  Nothing) 
                }).ToList(), 
               Nothing) 
             } 
            End Sub) 
      _NL.Add(New Typing.SEO() With { 
        .Link = _L(i).SiteUrl, 
        .Title = _Doc.Descendants("title").First().Value, 
        .Keywords = _Keywords, 
        .Description = _Description, 
        .Content = _Content, 
        .ContentElements = _EL 
       }) 
      _L.RemoveAt(i) 
      _EL = Nothing : _Doc = Nothing 
      ReportProgress((_Ct/_L.Count) * 100, _Worker) 
     Catch ex As Exception 
      'Put logging in here 
     End Try 
    Next 
    Return _NL 
End Function 
+0

我們能否看到一些示例代碼? –

+0

你可以發佈一些針對蜘蛛算法的僞代碼嗎?也許方法調用蜘蛛鏈? –

+0

-1無堆棧跟蹤。這是分析[一般可以從任何地方] StackOverflowException所需的絕對*最小值*。 – 2012-12-28 04:03:58

回答

1

正如你可能知道的代碼,這個錯誤很可能是由於在代碼中的錯誤導致無限循環在遞歸算法中。雖然你說你不使用遞歸,但你可能無意中發生了遞歸。

找出造成它的最簡單方法是附加調試器,配置Visual Studio以打破異常,並在應用程序中觸發錯誤。

當發生錯誤並且調試程序中斷時,請查看調用堆棧 - 希望您會看到問題出在哪裏。

1

我的猜測可能是軟件問題。 StackOverflowException通常在遞歸算法出現問題時發生(儘管您提到您不使用它)。另一個常見原因是財產執行或平等比較中的錯誤。 例如:

public string Name 
{ 
    set 
    { 
    Name = value; 
    } 
} 
+0

(我不喜歡這*特殊*遞歸錯誤..事實上,VS和ReSharper嚷嚷着我。) – 2012-12-28 04:02:53