2015-05-14 145 views
2

我想將這個信息從一個網頁導入iOS應用程序。Swift解析html表格

HTML代碼:

<table border="0" cellpadding="3" cellspacing="0" width="85%"><tr><td width="100%" colspan="3" bgcolor="#C9C9E7"><b>Update as of 3:57:00 PM (CDT) Thu., Apr. 16, 2015</b><br></td></tr><tr> 
<td width="50%" bgcolor="#FFFFFF">Production Line 1</td> 
<td width="35%" bgcolor="#FFFFFF">9:00 minutes (10 min)&nbsp;</td> 
<td width="15%" bgcolor="#FFFFFF">No delay</td> 
</tr><tr> 
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 2</b></td> 
<td width="35%" bgcolor="#FFFFFF"><b>7:57 minutes </b><b>(4 min)&nbsp;</b></td> 
<td width="15%" bgcolor="#FFFFFF"><b>+3:57</b></td> 
</tr><tr> 
<td width="50%" bgcolor="#FFFFFF"><b>Production Line 3</b></td> 
<td width="35%" bgcolor="#FFFFFF"><b>10:35 minutes </b><b>(8 min)&nbsp;</b></td> 
<td width="15%" bgcolor="#FFFFFF"><b>+2:35</b></td> 
</tr></table> 

這是我的代碼當前的嘗試。我已經標記了我認爲是//問題的問題。我得到可選的零值。我正在使用Swift-HTML-Parser來幫助實現此目的。我只做了一個更改,那就是爲表添加一個部分。

import UIKit 
extension String { 
var html2String:String { 
    return NSAttributedString(data: dataUsingEncoding(NSUTF8StringEncoding)!, options: [NSDocumentTypeDocumentAttribute:NSHTMLTextDocumentType,NSCharacterEncodingDocumentAttribute:NSUTF8StringEncoding], documentAttributes: nil, error: nil)!.string 
    } 
} 

class mainViewController: UIViewController { 

@IBOutlet var textView: UITextView! 


override func viewDidLoad() { 
    super.viewDidLoad() 
    updateTrafficInfo() 

    let singleFingerTap = UITapGestureRecognizer(target: self, action: "handleSingleTap:") 
    self.textView.addGestureRecognizer(singleFingerTap) 
} 

// MARK: gestutre recognizer 
func handleSingleTap(recognizer: UITapGestureRecognizer) { 
    updateTrafficInfo() 
} 

func updateTrafficInfo(){ 
    var request = HTTPTask() 
    var err: NSError? 
    request.GET("http://localhost/productiontimes.html", parameters: nil, success: {(response: HTTPResponse) in 
     if let data = response.responseObject as? NSData { 
      let rawHTML = NSString(data: data, encoding: NSUTF8StringEncoding) as! String 
      let option = CInt(HTML_PARSE_NOERROR.value | HTML_PARSE_RECOVER.value) 

      var parser = HTMLParser(html: rawHTML, encoding: NSUTF8StringEncoding, option: option, error: &err) 
      if err != nil { 
       println(err) 
      } 
      var bodyNode = parser.table 
      if let inputNodes = bodyNode?.xpath("//tr") { 
       for node in inputNodes { 
        dispatch_sync(dispatch_get_main_queue(), { 
        self.textView.text = node.xpath("//td")?[0].contents //issue 
        println(node.xpath("//td")?[0].contents) 
        }); 
       } 
      } 


      dispatch_sync(dispatch_get_main_queue(), { 
       self.textView.text = rawHTML.html2String 


      }); 

     } 
     },failure: {(error: NSError, response: HTTPResponse?) in 
      println("error: \(error)") 
    }) 
} 



override func didReceiveMemoryWarning() { 
    super.didReceiveMemoryWarning() 
    // Dispose of any resources that can be recreated. 
} 


} 

目標是將輸出格式化爲這樣的格式。

Production Line -  Time -      Delay 
Production Line 1 -  9:00 minutes (10 min) - No delay 
Production Line 2 -  7:57 minutes (4 min) -  +3:57 
+0

你真的沒有其他可能性獲得數據? HTML不是交換格式,而是用於佈局和顯示內容的標記語言。 根據我的經驗,從任何網站(甚至你自己的網站)解析HTML是一個痛苦..因爲每一個設計改變混淆你的進口。 – gutenmorgenuhu

+0

不,我不希望我做,HTML的佈局不會改變。如果有人能夠幫助將cvs,xml或某些我可以放入表格的東西轉換,我會很高興。 – MwcsMac

+0

我已經看到這個擴展名:) –

回答

2
extension NSData { 
    var htmlData2String:String { 
     return NSAttributedString(data: self, options: [NSDocumentTypeDocumentAttribute : NSHTMLTextDocumentType, NSCharacterEncodingDocumentAttribute : NSUTF8StringEncoding], documentAttributes: nil, error: nil)!.string 
    } 
} 

let output = data.htmlData2String 
let components = output.componentsSeparatedByString("\n") 
for index in stride(from: 1, to: 9, by: 3) { 
    let line = components[index] 
    let time = components[index+1] 
    let delay = components[index+2] 
    println(line + " - " + time + " - " + delay) 
} 
+1

我希望我可以標記兩個正確的答案。我喜歡這兩種方法。 – MwcsMac

2

您的XPath的一些問題,請參閱下面的代碼:

let html = "<table border='0' cellpadding='3' cellspacing='0' width='85%'><tr><td width='100%' colspan='3' bgcolor='#C9C9E7'><b>Update as of 3:57:00 PM (CDT) Thu., Apr. 16, 2015</b><br></td></tr><tr>" + 
     "<td width='50%' bgcolor='#FFFFFF'><b>Production Line 1</b></td>" + 
     "<td width='35%' bgcolor='#FFFFFF'><b>9:00 minutes</b><b>(10 min)&nbsp;</b></td>" + 
     "<td width='15%' bgcolor='#FFFFFF'><b>No delay</b></td>" + 
     "</tr><tr>" + 
     "<td width='50%' bgcolor='#FFFFFF'><b>Production Line 2</b></td>" + 
     "<td width='35%' bgcolor='#FFFFFF'><b>7:57 minutes </b><b>(4 min)&nbsp;</b></td>" + 
     "<td width='15%' bgcolor='#FFFFFF'><b>+3:57</b></td>" + 
     "</tr><tr>" + 
     "<td width='50%' bgcolor='#FFFFFF'><b>Production Line 3</b></td>" + 
     "<td width='35%' bgcolor='#FFFFFF'><b>10:35 minutes </b><b>(8 min)&nbsp;</b></td>" + 
     "<td width='15%' bgcolor='#FFFFFF'><b>+2:35</b></td>" + 
    "</tr></table>" 



    var err : NSError? 
    var parser = HTMLParser(html: html, error: &err) 
    if err != nil { 
     println(err) 
     exit(1) 
    }   

    var table = parser.html 

    // avoid the first <td> tag 
    if let inputNodes = table?.xpath("//td[position() > 1]/b") { 

     println("Production Line -  Time -      Delay")    
     for (index, node) in enumerate(inputNodes) { 
      if index % 4 == 0 { 
       println("\n") 
      } 
      print(node.contents + "- ") 
     } 
    } 

輸出如下:

Production Line -  Time -      Delay 

Production Line 1- 9:00 minutes- (10 min) - No delay-  

Production Line 2- 7:57 minutes - (4 min) - +3:57-  

Production Line 3- 10:35 minutes - (8 min) - +2:35- 

您可以將輸出個性化的你想。我希望這對你有所幫助。

+1

我希望我可以將兩者都標記爲正確的答案。我喜歡這兩種方法。 – MwcsMac

+0

jajjaja尼斯競爭 –