2015-11-01 44 views
0

操作我有一個html文件,例如:jsoup:文本

<!DOCTYPE html> 
<html> 
<body> 

<h1>My First Heading</h1> 

<p>My first paragraph.</p> 

</body> 
</html> 

我已經用Java編寫的方法,這些文字符號從拉丁文到西里爾文轉換,是這樣的:

public static char changeLetterLatCyr(char charSent) { 
    char l_A = 'A', 
     l_a = 'a', 
     l_B = 'B', 
     l_b = 'b', 

     r_A = 'А', 
     r_a = 'а', 
     r_B = 'Б', 
     r_b = 'б', 
     result = ' '; 

    if (charSent == l_A) { 
     result = r_A; 
    } else if (charSent == l_a) { 
     result = r_a; 
    } else if (charSent == l_B) { 
     result = r_B; 
    } else if (charSent == l_b) { 
     result = r_b; 
    } else { 
     result = charSent; 
    } 
    return result; 
} 

如何在保存所有標籤結構的文檔中的所有文本上實現我的功能?該函數將每個字符更改爲特定。

我需要這樣的結果:

<!DOCTYPE html> 
<html> 
<body> 

<h1>some manipulation on text</h1> 

<p>some manipulation on text</p> 

</body> 
</html> 
+0

你可以添加自己的功能? –

+0

public static char changeLetterLatCyr(char charSent){ char l_A ='A', l_a ='a', l_C ='C', l_c ='c'; if(charSent == 1_A){ result = r_A; else if(charSent == 1_a){ result = r_a; result = r_b; else if(charSent == 1_C){ result = r_C; else if(charSent == 1_c){ result = r_c; 返回結果; } 它將拉丁符號轉換爲西里爾文。 – Seomat

+0

更新您的問題,添加此代碼。 –

回答

1

這是你可以做到這一點。 NodeVisitor在這裏發揮着魔力。

的Java

package com.github.davidepastore.stackoverflow33463949; 

import java.io.IOException; 
import java.io.InputStream; 

import org.jsoup.Jsoup; 
import org.jsoup.nodes.Document; 
import org.jsoup.nodes.Node; 
import org.jsoup.nodes.TextNode; 
import org.jsoup.select.Elements; 
import org.jsoup.select.NodeVisitor; 

/** 
* Stackoverflow 33463949 question. 
* 
*/ 
public class App { 

    /** 
    * Starts the app here. 
    * @param args 
    * @throws IOException 
    */ 
    public static void main(String[] args) throws IOException { 
     ClassLoader classloader = Thread.currentThread() 
       .getContextClassLoader(); 
     InputStream is = classloader.getResourceAsStream("file.html"); 
     Document document = Jsoup.parse(is, "UTF-8", ""); 
     Elements elements = document.select("body"); 
     manipulateElements(elements); 

     System.out.println("Result: " + document.toString()); 
    } 

    /** 
    * Manipulate the {@link Elements}. 
    * @param elements The {@link Elements} to manipulate. 
    */ 
    private static void manipulateElements(Elements elements) { 
     elements.traverse(new NodeVisitor() { 

      public void tail(Node node, int depth) { 
      } 

      public void head(Node node, int depth) { 
       if (node instanceof TextNode) { 
        TextNode textNode = (TextNode) node; 
        String text = textNode.text().trim(); 
        if (!text.isEmpty()) { 
         char[] newChars = new char[text.length()]; 
         for (int i = 0; i < text.length(); i++) { 
          newChars[i] = changeLetterLatCyr(text.charAt(i)); 
         } 
         textNode.text(new String(newChars)); 
        } 
       } 
      } 
     }); 
    } 

    /** 
    * Your own custom change letter method. 
    * @param charSent The char to convert. 
    * @return Returns the converted char. 
    */ 
    public static char changeLetterLatCyr(char charSent) { 
     char l_A = 'A', 
      l_a = 'a', 
      l_B = 'B', 
      l_b = 'b', 
      r_A = 'А', 
      r_a = 'а', 
      r_B = 'Б', 
      r_b = 'б', 
      result = ' '; 

     if (charSent == l_A) { 
      result = r_A; 
     } else if (charSent == l_a) { 
      result = r_a; 
     } else if (charSent == l_B) { 
      result = r_B; 
     } else if (charSent == l_b) { 
      result = r_b; 
     } else { 
      result = charSent; 
     } 
     return result; 
    } 
} 

HTML

<!DOCTYPE html> 
<html> 
<body> 

<h1>My First Heading</h1> 

<p>My first paragraph.</p> 
<div> 
    <p>A a B b Complex structure</p> 
</div> 

</body> 
</html> 

輸出

Result: <!doctype html> 
<html> 
<head></head> 
<body> 
    <h1>My First Heаding</h1> 
    <p>My first pаrаgrаph.</p> 
    <div> 
    <p>А а Б б Complex structure</p> 
    </div> 
</body> 
</html> 
+0

我有這些錯誤:例外在線程 「主要」 拋出java.lang.ClassNotFoundException。的loadClass(ClassLoader.java:424) \t在sun.misc.Launcher $ AppClassLoader.loadClass(Launcher.java:331) \t在java.lang.ClassLoader.loadClass(ClassLoader.java:357) \t在java.lang中.Class.forName0(Native Method) \t at java.lang.Class.forName(Class.java:264) \t at com.intellij.rt.execution.application.AppMain.main(AppMain.java:122) – Seomat

+0

How你有HT嗎? ML閱讀? –

+0

非常感謝!它很棒!你能否請你也看到這個相關的我的問題:http://stackoverflow.com/questions/33466932/jsoup-baseurl-in-connect-method – Seomat