我正在寫一個.net網絡蜘蛛。雖然它在我的一個站點(大約20頁)上效果很好,但它與我管理的另一個站點(大約500頁)發生System.StackOverflowException異常爆炸。有關StackOverflowException的快速(希望)
我正在開發一款win7 64bit i3筆記本電腦,內含8g ram,128g hyperx ssd並且沒有交換文件。
我的問題是....我得到這個異常拋出,因爲我沒有交換文件?
cpu使用率(vs2010調試過程)只有74-75m的ram使用率,只能達到34%左右。
如果是這樣的話,我該如何確保它不會發生?
這是沒有遞歸。
代碼:
Imports System.Reflection
Imports System.Net
Imports Superstar.Html.Linq
Public Class Downloader
Implements IDisposable
''' <summary>
''' Get the returned downloaded string
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ReturnString As String
Get
Return _StrReturn
End Get
End Property
Private Property _StrReturn As String
''' <summary>
''' Get the returned downloaded byte array
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ReturnBytes As Byte()
Get
Return _FSReturn
End Get
End Property
Private Property _FSReturn As Byte()
Private Property _UserAgent As String = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13"
Private Property DataReceived As Boolean = False
''' <summary>
''' Download a string, but do not block the calling thread
''' </summary>
''' <param name="_Path"></param>
''' <remarks></remarks>
Public Sub DownloadString(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing)
SetAllowUnsafeHeaderParsing20()
Using wc As New Net.WebClient()
With wc
Dim _ct As Long = 0
DataReceived = False
.Headers.Add("user-agent", _UserAgent)
.DownloadStringAsync(New System.Uri(_Path))
AddHandler .DownloadStringCompleted, AddressOf StringDownloaded
Do While Not DataReceived
If _Worker IsNot Nothing Then
_ct += 1
ReportProgress(_ct, _Worker)
End If
Loop
End With
End Using
End Sub
''' <summary>
''' Download a file, but do not block the calling thread
''' </summary>
''' <param name="_Path"></param>
''' <remarks></remarks>
Public Sub DownloadFile(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing)
SetAllowUnsafeHeaderParsing20()
Using wc As New Net.WebClient()
With wc
Dim _ct As Long = 0
DataReceived = False
.Headers.Add("user-agent", _UserAgent)
.DownloadDataAsync(New System.Uri(_Path))
AddHandler .DownloadDataCompleted, AddressOf FileStreamDownload
Do While Not DataReceived
If _Worker IsNot Nothing Then
_ct += 1
ReportProgress(_ct, _Worker)
End If
Loop
End With
End Using
End Sub
''' <summary>
''' Download a parsable HDocument, for using HtmlToLinq
''' </summary>
''' <param name="_Path"></param>
''' <returns></returns>
''' <remarks></remarks>
Public Function DownloadHDoc(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As HDocument
Try
'
'
'
'
'
'
'StackOverFlowException Occurring Here!
DownloadString(_Path, _Worker)
Return HDocument.Parse(_StrReturn)
Catch soex As StackOverflowException
'put some logging in here, with the path attempted
Return Nothing
Catch ex As Exception
SetAllowUnsafeHeaderParsing20()
Return HDocument.Load(_Path)
End Try
End Function
#Region "Internals"
Private Sub SetAllowUnsafeHeaderParsing20()
Dim a As New System.Net.Configuration.SettingsSection
Dim aNetAssembly As System.Reflection.Assembly = Assembly.GetAssembly(a.GetType)
Dim aSettingsType As Type = aNetAssembly.GetType("System.Net.Configuration.SettingsSectionInternal")
Dim args As Object() = Nothing
Dim anInstance As Object = aSettingsType.InvokeMember("Section", BindingFlags.Static Or BindingFlags.GetProperty Or BindingFlags.NonPublic, Nothing, Nothing, args)
Dim aUseUnsafeHeaderParsing As FieldInfo = aSettingsType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic Or BindingFlags.Instance)
aUseUnsafeHeaderParsing.SetValue(anInstance, True)
End Sub
Private Sub FileStreamDownload(ByVal sender As Object, ByVal e As DownloadDataCompletedEventArgs)
If e.Cancelled = False AndAlso e.Error Is Nothing Then
DataReceived = True
_FSReturn = DirectCast(e.Result, Byte())
Else
_FSReturn = Nothing
End If
End Sub
Private Sub StringDownloaded(ByVal sender As Object, ByVal e As DownloadStringCompletedEventArgs)
If e.Cancelled = False AndAlso e.Error Is Nothing Then
DataReceived = True
_StrReturn = DirectCast(e.Result, String)
Else
_StrReturn = String.Empty
End If
End Sub
#End Region
#Region "IDisposable Support"
Private disposedValue As Boolean ' To detect redundant calls
' IDisposable
Protected Overridable Sub Dispose(disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
End If
_StrReturn = String.Empty
_FSReturn = Nothing
End If
Me.disposedValue = True
End Sub
Public Sub Dispose() Implements IDisposable.Dispose
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
#End Region
End Class
而且調用此當計算器被happenning
Private Function PopulateSEOList(Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As List(Of Typing.SEO)
Dim _L = LinkList, _Ct As Long = 0
Dim _NL As New List(Of Typing.SEO)
Dim _EL As Typing.SEO.Elements = Nothing
Dim _Doc As HDocument = Nothing, _Keywords As String = String.Empty, _Description As String = String.Empty, _Content As HElement = Nothing
For i As Long = 0 To _L.Count - 1
Try
_Ct += 1
Using _HDoc As New Downloader
With _HDoc
_Doc = .DownloadHDoc(_L(i).SiteUrl)
End With
End Using
Tasks.Parallel.Invoke(Sub()
'Keywords
For Each Item In _Doc.Descendants("meta")
If Item.Attribute("name") = "keywords" Then
_Keywords = Item.Attribute("content").Value
'Exit For
End If
Next
End Sub,
Sub()
'Description
For Each Item In _Doc.Descendants("meta")
If Item.Attribute("name") = "description" Then
_Description = Item.Attribute("content").Value
'Exit For
End If
Next
End Sub,
Sub()
If _Doc.Descendants("body") IsNot Nothing Then
_Content = _Doc.Descendants("body").FirstOrDefault
End If
End Sub,
Sub()
_EL = New Typing.SEO.Elements() With {
.H1 = If(_Doc.Descendants("h1") IsNot Nothing, (From n In _Doc.Descendants("h1").AsParallel()
Select n.Value).ToList(), Nothing),
.H2 = If(_Doc.Descendants("h2") IsNot Nothing, (From n In _Doc.Descendants("h2").AsParallel()
Select n.Value).ToList(), Nothing),
.H3 = If(_Doc.Descendants("h3") IsNot Nothing, (From n In _Doc.Descendants("h3").AsParallel()
Select n.Value).ToList(), Nothing),
.H4 = If(_Doc.Descendants("h4") IsNot Nothing, (From n In _Doc.Descendants("h4").AsParallel()
Select n.Value).ToList(), Nothing),
.H5 = If(_Doc.Descendants("h5") IsNot Nothing, (From n In _Doc.Descendants("h5").AsParallel()
Select n.Value).ToList(), Nothing),
.H6 = If(_Doc.Descendants("h6") IsNot Nothing, (From n In _Doc.Descendants("h6").AsParallel()
Select n.Value).ToList(), Nothing),
.UL = If(_Doc.Descendants("ul") IsNot Nothing, (From n In _Doc.Descendants("ul").AsParallel()
Select n.Value).ToList(), Nothing),
.OL = If(_Doc.Descendants("ol") IsNot Nothing, (From n In _Doc.Descendants("ol").AsParallel()
Select n.Value).ToList(), Nothing),
.STRONG = If(_Doc.Descendants("strong") IsNot Nothing OrElse _Doc.Descendants("b") IsNot Nothing,
(From n In _Doc.Descendants("strong").AsParallel()
Select n.Value).Union(From n In _Doc.Descendants("b").AsParallel()
Select n.Value).ToList(), Nothing),
.BLOCKQUOTE = If(_Doc.Descendants("blockquote") IsNot Nothing, (From n In _Doc.Descendants("blockquote").AsParallel()
Select n.Value).ToList(), Nothing),
.EM = If(_Doc.Descendants("em") IsNot Nothing OrElse _Doc.Descendants("i") IsNot Nothing,
(From n In _Doc.Descendants("em").AsParallel()
Select n.Value).Union(From n In _Doc.Descendants("i").AsParallel()
Select n.Value).ToList(), Nothing),
.A = If(_Doc.Descendants("a") IsNot Nothing, (From n In _Doc.Descendants("a").AsParallel()
Select New Typing.SEO.Elements.Links() With {
.Content = n.Value,
.Title = If(n.Attribute("title") IsNot Nothing,
n.Attribute("title").Value,
Nothing),
.Target = If(n.Attribute("target") IsNot Nothing,
n.Attribute("target").Value,
Nothing),
.Rel = If(n.Attribute("rel") IsNot Nothing,
n.Attribute("rel").Value,
Nothing),
.Href = If(n.Attribute("href") IsNot Nothing,
n.Attribute("href").Value,
Nothing)
}).ToList(), Nothing),
.IMG = If(_Doc.Descendants("img") IsNot Nothing,
(From n In _Doc.Descendants("img").AsParallel()
Select New Typing.SEO.Elements.Images() With {
.Alt = If(n.Attribute("alt") IsNot Nothing,
n.Attribute("alt").Value,
Nothing),
.Source = If(n.Attribute("src") IsNot Nothing,
n.Attribute("src").Value,
Nothing),
.Title = If(n.Attribute("title") IsNot Nothing,
n.Attribute("title").Value,
Nothing)
}).ToList(),
Nothing)
}
End Sub)
_NL.Add(New Typing.SEO() With {
.Link = _L(i).SiteUrl,
.Title = _Doc.Descendants("title").First().Value,
.Keywords = _Keywords,
.Description = _Description,
.Content = _Content,
.ContentElements = _EL
})
_L.RemoveAt(i)
_EL = Nothing : _Doc = Nothing
ReportProgress((_Ct/_L.Count) * 100, _Worker)
Catch ex As Exception
'Put logging in here
End Try
Next
Return _NL
End Function
我們能否看到一些示例代碼? –
你可以發佈一些針對蜘蛛算法的僞代碼嗎?也許方法調用蜘蛛鏈? –
-1無堆棧跟蹤。這是分析[一般可以從任何地方] StackOverflowException所需的絕對*最小值*。 – 2012-12-28 04:03:58