你可以做到這一點使用JMeter(JMeter是可以說是提供最好的免費和開源的負載測試工具)。您可以創建一個請求並使用正則表達式提取器來解析響應,提取任何href值,然後循環遍歷每個找到的url。
下面是一個JMeter測試計劃,它完全符合您的需求。只需將此xml複製到一個文本文件並將其保存爲something.jmx。在JMeter中打開它並將演示主機名更改爲您自己的站點(不要垃圾郵件,這是不好的形式)。 CTRL-R運行。
對於更多用戶(線程)或迭代(循環),只需單擊線程組名稱並將值更改爲所需。
非常模糊的建議:不要在一臺PC上使用多於300個線程。
如果你想在「雲」,然後租一個Amazon AWS機,install jmeter,這JMX文件複製到它,並且使用命令行運行它運行如下命令:
$ cd /to/where/jmeter/is/apache-blah/bin/
$ ./jmeter -n -t /path/to/my/testplan.jmx -l /where/to/put/results.jtl
演示抓取測試計劃:
<?xml version="1.0" encoding="UTF-8"?>
<jmeterTestPlan version="1.2" properties="2.3">
<hashTree>
<TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="crawl" enabled="true">
<stringProp name="TestPlan.comments"></stringProp>
<boolProp name="TestPlan.functional_mode">false</boolProp>
<boolProp name="TestPlan.serialize_threadgroups">false</boolProp>
<elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="TestPlan.user_define_classpath"></stringProp>
</TestPlan>
<hashTree>
<CookieManager guiclass="CookiePanel" testclass="CookieManager" testname="HTTP Cookie Manager" enabled="true">
<collectionProp name="CookieManager.cookies"/>
<boolProp name="CookieManager.clearEachIteration">false</boolProp>
</CookieManager>
<hashTree/>
<ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Thread Group Name" enabled="true">
<stringProp name="ThreadGroup.on_sample_error">startnextloop</stringProp>
<elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
<boolProp name="LoopController.continue_forever">false</boolProp>
<stringProp name="LoopController.loops">1</stringProp>
</elementProp>
<stringProp name="ThreadGroup.num_threads">1</stringProp>
<stringProp name="ThreadGroup.ramp_time">1</stringProp>
<longProp name="ThreadGroup.start_time">1346172541000</longProp>
<longProp name="ThreadGroup.end_time">1346172541000</longProp>
<boolProp name="ThreadGroup.scheduler">false</boolProp>
<stringProp name="ThreadGroup.duration"></stringProp>
<stringProp name="ThreadGroup.delay"></stringProp>
</ThreadGroup>
<hashTree>
<HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="homepage" enabled="true">
<elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" enabled="true">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="HTTPSampler.domain">www.bbc.co.uk</stringProp>
<stringProp name="HTTPSampler.port"></stringProp>
<stringProp name="HTTPSampler.connect_timeout"></stringProp>
<stringProp name="HTTPSampler.response_timeout"></stringProp>
<stringProp name="HTTPSampler.protocol"></stringProp>
<stringProp name="HTTPSampler.contentEncoding"></stringProp>
<stringProp name="HTTPSampler.path">/</stringProp>
<stringProp name="HTTPSampler.method">GET</stringProp>
<boolProp name="HTTPSampler.follow_redirects">true</boolProp>
<boolProp name="HTTPSampler.auto_redirects">false</boolProp>
<boolProp name="HTTPSampler.use_keepalive">true</boolProp>
<boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
<boolProp name="HTTPSampler.monitor">false</boolProp>
<stringProp name="HTTPSampler.embedded_url_re"></stringProp>
<stringProp name="TestPlan.comments">enter YOUR SITE in the Server Name field</stringProp>
</HTTPSamplerProxy>
<hashTree>
<RegexExtractor guiclass="RegexExtractorGui" testclass="RegexExtractor" testname="urls" enabled="true">
<stringProp name="RegexExtractor.useHeaders">false</stringProp>
<stringProp name="RegexExtractor.refname">urls</stringProp>
<stringProp name="RegexExtractor.regex"><a href="http:\/\/www.bbc.co.uk([^"]+)"</stringProp>
<stringProp name="RegexExtractor.template">$1$</stringProp>
<stringProp name="RegexExtractor.default">NOTFOUND</stringProp>
<stringProp name="RegexExtractor.match_number">-1</stringProp>
</RegexExtractor>
<hashTree/>
<ConstantThroughputTimer guiclass="TestBeanGUI" testclass="ConstantThroughputTimer" testname="Constant Throughput Timer" enabled="true">
<doubleProp>
<name>throughput</name>
<value>6.0</value>
<savedValue>0.0</savedValue>
</doubleProp>
<stringProp name="calcMode">this thread only</stringProp>
<stringProp name="TestPlan.comments">Each thread makes 6 requests every minute (if it completes in time)</stringProp>
</ConstantThroughputTimer>
<hashTree/>
</hashTree>
<ForeachController guiclass="ForeachControlPanel" testclass="ForeachController" testname="ForEach Controller" enabled="true">
<stringProp name="ForeachController.inputVal">urls</stringProp>
<stringProp name="ForeachController.returnVal">url</stringProp>
<boolProp name="ForeachController.useSeparator">true</boolProp>
</ForeachController>
<hashTree>
<HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="foundUrl" enabled="true">
<elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" enabled="true">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="HTTPSampler.domain">www.bbc.co.uk</stringProp>
<stringProp name="HTTPSampler.port"></stringProp>
<stringProp name="HTTPSampler.connect_timeout"></stringProp>
<stringProp name="HTTPSampler.response_timeout"></stringProp>
<stringProp name="HTTPSampler.protocol"></stringProp>
<stringProp name="HTTPSampler.contentEncoding"></stringProp>
<stringProp name="HTTPSampler.path">${url}</stringProp>
<stringProp name="HTTPSampler.method">GET</stringProp>
<boolProp name="HTTPSampler.follow_redirects">true</boolProp>
<boolProp name="HTTPSampler.auto_redirects">false</boolProp>
<boolProp name="HTTPSampler.use_keepalive">true</boolProp>
<boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
<boolProp name="HTTPSampler.monitor">false</boolProp>
<stringProp name="HTTPSampler.embedded_url_re"></stringProp>
<stringProp name="TestPlan.comments">enter YOUR SITE in the Server Name field</stringProp>
</HTTPSamplerProxy>
<hashTree>
<GaussianRandomTimer guiclass="GaussianRandomTimerGui" testclass="GaussianRandomTimer" testname="Gaussian Random Timer" enabled="true">
<stringProp name="ConstantTimer.delay">300</stringProp>
<stringProp name="RandomTimer.range">500.0</stringProp>
</GaussianRandomTimer>
<hashTree/>
</hashTree>
</hashTree>
</hashTree>
<ResultCollector guiclass="ViewResultsFullVisualizer" testclass="ResultCollector" testname="View Results Tree" enabled="true">
<boolProp name="ResultCollector.error_logging">false</boolProp>
<objProp>
<name>saveConfig</name>
<value class="SampleSaveConfiguration">
<time>true</time>
<latency>true</latency>
<timestamp>true</timestamp>
<success>true</success>
<label>true</label>
<code>true</code>
<message>true</message>
<threadName>true</threadName>
<dataType>true</dataType>
<encoding>false</encoding>
<assertions>true</assertions>
<subresults>true</subresults>
<responseData>false</responseData>
<samplerData>false</samplerData>
<xml>false</xml>
<fieldNames>true</fieldNames>
<responseHeaders>false</responseHeaders>
<requestHeaders>false</requestHeaders>
<responseDataOnError>false</responseDataOnError>
<saveAssertionResultsFailureMessage>false</saveAssertionResultsFailureMessage>
<assertionsResultsToSave>0</assertionsResultsToSave>
<bytes>true</bytes>
<hostname>true</hostname>
<threadCounts>true</threadCounts>
</value>
</objProp>
<stringProp name="filename"></stringProp>
</ResultCollector>
<hashTree/>
<ResultCollector guiclass="StatVisualizer" testclass="ResultCollector" testname="Aggregate Report" enabled="true">
<boolProp name="ResultCollector.error_logging">false</boolProp>
<objProp>
<name>saveConfig</name>
<value class="SampleSaveConfiguration">
<time>true</time>
<latency>true</latency>
<timestamp>true</timestamp>
<success>true</success>
<label>true</label>
<code>true</code>
<message>true</message>
<threadName>true</threadName>
<dataType>true</dataType>
<encoding>false</encoding>
<assertions>true</assertions>
<subresults>true</subresults>
<responseData>false</responseData>
<samplerData>false</samplerData>
<xml>false</xml>
<fieldNames>true</fieldNames>
<responseHeaders>false</responseHeaders>
<requestHeaders>false</requestHeaders>
<responseDataOnError>false</responseDataOnError>
<saveAssertionResultsFailureMessage>false</saveAssertionResultsFailureMessage>
<assertionsResultsToSave>0</assertionsResultsToSave>
<bytes>true</bytes>
<hostname>true</hostname>
<threadCounts>true</threadCounts>
</value>
</objProp>
<stringProp name="filename"></stringProp>
</ResultCollector>
<hashTree/>
</hashTree>
</hashTree>
</jmeterTestPlan>
新增面露JMeter的標籤,以提高反饋 –