2011-12-07 13 views
1

我使用JSoup分析器來查找html文檔的特定部分(由正則表達式定義),並通過包裝在<span>標記中找到的字符串突出顯示它。這裏是我的代碼,不會高亮 -突出在JSOUP中使用正則表達式爲android

public String highlightRegex() { 
Document doc = Jsoup.parse(htmlContent); 

     NodeTraversor nd = new NodeTraversor(new NodeVisitor() { 

      @Override 
      public void tail(Node node, int depth) { 
       if (node instanceof Element) { 

        Element elem = (Element) node; 

        StringBuffer obtainedText; 
        for(Element tn : elem.getElementsMatchingOwnText(pat)) { 

         Log.e("HELLO", tn.baseUri()); 
         Log.e("HELLO", tn.text()); 
         obtainedText = new StringBuffer(tn.ownText()); 
         mat = pat.matcher(obtainedText.toString()); 
         int nextStart = 0; 
         while(mat.find(nextStart)) { 
          obtainedText = obtainedText.replace(mat.start(), mat.end(), "<span>" + mat.group() + "</span>"); 
          nextStart = mat.end() + 1; 
         } 
         tn.text(obtainedText.toString()); 
         Log.e("HELLO" , "AFTER:" + tn.text()); 

        } 
       } 
      } 

      @Override 
      public void head(Node node, int depth) {   
      } 
     }); 

     nd.traverse(doc.body()); 
     return doc.toString(); 
    } 

它的工作,但是標籤<span>是web視圖內可見。我究竟做錯了什麼?

回答

0

看起來沒有人知道。這是我想出的一些代碼。無論如何,效率低下,效率低下。建議被接受:)

這個類可以用來突出顯示使用正則表達式的任何html。

public class Highlighter { 

    private String regex; 
    private String htmlContent; 
    Pattern pat; 
    Matcher mat; 


    public Highlighter(String searchString, String htmlString) { 
     regex = buildRegexFromQuery(searchString); 
     htmlContent = htmlString; 
     pat = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); 
    } 

    public String getHighlightedHtml() { 

     Document doc = Jsoup.parse(htmlContent); 

     final List<TextNode> nodesToChange = new ArrayList<TextNode>(); 

     NodeTraversor nd = new NodeTraversor(new NodeVisitor() { 

      @Override 
      public void tail(Node node, int depth) { 
       if (node instanceof TextNode) { 
        TextNode textNode = (TextNode) node; 
        String text = textNode.getWholeText(); 

        mat = pat.matcher(text); 

        if(mat.find()) { 
         nodesToChange.add(textNode); 
        } 
       } 
      } 

      @Override 
      public void head(Node node, int depth) {   
      } 
     }); 

     nd.traverse(doc.body()); 

     for (TextNode textNode : nodesToChange) { 
      Node newNode = buildElementForText(textNode); 
      textNode.replaceWith(newNode); 
     } 
     return doc.toString(); 
    } 

    private static String buildRegexFromQuery(String queryString) { 
     String regex = ""; 
     String queryToConvert = queryString; 

     /* Clean up query */ 

     queryToConvert = queryToConvert.replaceAll("[\\p{Punct}]*", " "); 
     queryToConvert = queryToConvert.replaceAll("[\\s]*", " "); 

     String[] regexArray = queryString.split(" "); 

     regex = "("; 
     for(int i = 0; i < regexArray.length - 1; i++) { 
      String item = regexArray[i]; 
      regex += "(\\b)" + item + "(\\b)|"; 
     } 

     regex += "(\\b)" + regexArray[regexArray.length - 1] + "[a-zA-Z0-9]*?(\\b))"; 
     return regex; 
    } 

    private Node buildElementForText(TextNode textNode) { 
     String text = textNode.getWholeText().trim(); 

     ArrayList<MatchedWord> matchedWordSet = new ArrayList<MatchedWord>(); 

     mat = pat.matcher(text); 

     while(mat.find()) { 
      matchedWordSet.add(new MatchedWord(mat.start(), mat.end())); 
     } 

     StringBuffer newText = new StringBuffer(text); 

     for(int i = matchedWordSet.size() - 1; i >= 0; i--) { 
      String wordToReplace = newText.substring(matchedWordSet.get(i).start, matchedWordSet.get(i).end); 
      wordToReplace = "<b>" + wordToReplace+ "</b>"; 
      newText = newText.replace(matchedWordSet.get(i).start, matchedWordSet.get(i).end, wordToReplace);  
     } 
     return new DataNode(newText.toString(), textNode.baseUri()); 
    } 

    class MatchedWord { 
     public int start; 
     public int end; 

     public MatchedWord(int start, int end) { 
      this.start = start; 
      this.end = end; 
     } 
    } 
} 

你必須調用這兩個方法來獲取高亮顯示的HTML -

Highlighter hl = new Highlighter("abc def", htmlString); 
String newhtmlString = hl.getHighlightedHtml(); 

這將突出顯示的正則表達式匹配(abc)|(def)*一切。 您可以通過modifying buildRegexFromQuery()函數更改您希望構建正則表達式的方式。

相關問題