2016-06-18 23 views
-2
require "openssl" 
require "nokogiri" 
require 'csv' 
require "open-uri" 
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE 
$n=0 

#~ Open_Page 
page = ('http://www.residentadvisor.net/dj/aguycalledgerald/tracks?sort=mostcharted') 
html = Nokogiri::HTML(open(page)) 

#~ Array 
names= [] 
html.css('a').each do |x| 
    names<< x.text.strip.gsub(/\t/,'') 
    names.delete('RA on YouTube') 
    names.delete('Login') 
    names.delete('Register') 
    names.delete('Resident Advisor') 
    names.delete('Submit') 
    names.delete('Listings') 
    names.delete('Clubs') 
    names.delete('News') 
    names.delete('Reviews') 
    names.delete('Features') 
    names.delete('Films') 
    names.delete('Submit event') 
    names.delete('Artists') 
    names.delete('Photos') 
    names.delete('DJ Charts') 
    names.delete('Labels') 
    names.delete('Podcasts') 
    names.delete('Search') 
    names.delete('Top 1000') 
    names.delete('Top 100') 
    names.delete('Local') 
    names.delete('Favourites') 
    names.delete('Create an artist profile') 
    names.delete('Reviews') 
    names.delete('Features') 
    names.delete('A') 
    names.delete('B') 
    names.delete('C') 
    names.delete('D') 
    names.delete('E') 
    names.delete('F') 
    names.delete('G') 
    names.delete('H') 
    names.delete('I') 
    names.delete('J') 
    names.delete('K') 
    names.delete('L') 
    names.delete('M') 
    names.delete('N') 
    names.delete('O') 
    names.delete('P') 
    names.delete('Q') 
    names.delete('R') 
    names.delete('S') 
    names.delete('T') 
    names.delete('U') 
    names.delete('V') 
    names.delete('W') 
    names.delete('X') 
    names.delete('Y') 
    names.delete('Z') 
    names.delete('0-9') 
    names.delete('RA') 
    names.delete('About') 
    names.delete('Advertise') 
    names.delete('Jobs') 
    names.delete('RA In Residence') 
    names.delete('Ticketing FAQ') 
    names.delete('Sell tickets on RA') 
    names.delete('Privacy') 
    names.delete('Terms') 
    names.delete('RA is also available in Japanese. 日本版') 
    names.delete('Download the RA Guide') 
    names.delete('RA on Twitter') 
    names.delete('RA on Facebook') 
    names.delete('RA on Google+') 
    names.delete('RA on Instagram') 
    names.delete('RA on Soundcloud') 
    names.delete('Biography') 
    names.delete('Events') 
    names.delete('Tracks') 
    names.delete('RA News') 
    names.delete('RA Editorial') 
    names.delete('Remixes') 
    names.delete('Solo productions') 
    names.delete('Collaborations') 
    names.delete('Laboratory Instinct') 
    names.delete('Highgrade Records') 
    names.delete('Bosconi') 
    names.delete('!K7') 
    names.delete('Perlon') 
    names.delete('Beatstreet') 
    names.delete('Title') 
    names.delete('Label') 
    names.delete('Release Date') 
    names.delete('51 chartings') 
    puts names 
end 

#~ To_CSV 
for $n in 0..names.count do 
    CSV.open('Most_Charted.csv','a+') do |csv| 
    csv << [names[$n]] 

    end 
end 

這將創建一個CSV文件:如何僅將前五個曲目傳遞給CSV文件?

PositiveNoise (Carl Craig remix) System 7 & Guy Called Gerald A-Wave 22 chartings 

Voodoo Ray (Shield Re-Edit) A Guy Called Gerald 18 chartings 

Falling (D. Digglers Cleptomania remix) Tom Clark & Benno Blome feat. 
    A Guy Called Gerald 18 chartings 

How Long Is Now A Guy Called Gerald 14 chartings 

Groove Of The Ghetto A Guy Called Gerald 12 chartings 

Voodoo Ray A Guy Called Gerald 10 chartings 

Falling (D Diggler's Rescue remix) Tom Clark & Benno Blome feat. A 
Guy Called Gerald 9 chartings 

等。

我如何通過僅前5歌曲名到CSV文件?

+0

這看起來像一個非常糟糕的主意:'OpenSSL :: SSL :: VERIFY_PEER = OpenSSL :: SSL :: VERIFY_NONE'。另請參閱[世界上最危險的代碼:在非瀏覽器軟件中驗證SSL證書](http://crypto.stanford.edu/~dabo/pubs/abstracts/ssl-client-bugs.html)。 – jww

+0

你的代碼有很多錯誤:除非你明白爲什麼以及何時使用它們,否則不要使用'$ n'(全局變量)。他們可能會導致調試噩夢。你長長的'delete'列表可以寫得更優雅。 –

回答

1

一定要知道,當你禁用SSL檢查你正在做什麼。

你可以找到一個更好的選擇器的曲目列表,所以你不需要所有這些「刪除」。軌道都在裏面ul.tracks

然後,我建議你把整個事情做成一個班。所以你可以封裝行爲。然後不要使用$全局變量。不需要,通常是錯誤代碼的標誌。

這裏是工作示例:

require "openssl" 
require "nokogiri" 
require 'csv' 
require "open-uri" 
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE 

class Tracklist 

    def initialize(url) 
    @url = url 
    end 

    def parse(top = nil) 
    html = Nokogiri::HTML(open(url)) 
    result = [] 

    html.css('ul.tracks li').each do |node| 
     title = node.css('div.title a:nth-child(1)').first 
     result << title.text if !title.nil? 
     break if top && result.length == top 
    end 

    result 
    end 

    private 

    attr_reader :url 

end 


list = Tracklist.new("https://www.residentadvisor.net/dj/aguycalledgerald/tracks?sort=mostcharted") 
p list.parse(5) 

如果您需要有關軌道的詳細信息,那麼你可以在parse方法內循環提取更多的細節。

此代碼在到達top後停止解析。之後,您可以隨意構建自己的CSV文件。

相關問題