我想在MySQL數據庫中存儲一些印地語詞。爲此我寫了一個網絡爬蟲。我能夠從HTML頁面成功讀取這些單詞,並將它們顯示在NetBeans控制檯中。但是當我在MySQL中插入它們時,它們變成了???????。另外,如果我在PHPMyAdmin中使用SQL查詢插入相同的單詞,它們會正確存儲。在MYSQL中使用JAVA網絡爬蟲存儲印地文詞
我在Google和各種論壇上搜索了很多,並且在大多數地方處理Unicode時都採取了適當的預防措施。我們是否必須在SQL語句(JDBC)中明確提及是否輸入Unicode?
這是我的整個代碼。
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.*;
public class TestDataParsing2 {
public int counter = 1;
private String ID = "";
private String title = "";
private String owner = "";
private String s="";
private Connection conn = null;
private String url = "jdbc:mysql://localhost:3306";
private String dbName = "/hindi-eng";
private String driver = "com.mysql.jdbc.Driver";
private String userName = "root";
private String password = "";
private String TABLE = "dict";
private void initdb(){
try {
Class.forName(driver).newInstance();
conn = DriverManager.getConnection(url+dbName,userName,password);
} catch (Exception e) {
e.printStackTrace();
}
}
private void closedb(){
try {
conn.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void process(String content){
try{
BufferedReader reader = new BufferedReader(new StringReader(content));
String text = "";
boolean start1 = false;
boolean start2 = false;
while ((text = reader.readLine()) != null) {
if(text.contains("\"a")) {
System.out.println("______________________________________________________________");
String id = getID(text);
this.ID = id;
String title = getTitle(text);
this.title = title;
String owner = getOwner(text);
this.owner = owner;
start1 = true;
}
if(start1 && start2) {
String s = getS(text);
this.s = s;
counter++;
insert();
start2=false;
start1= false;
}
if(start1) {
start2= true;
}
}
}catch(Exception e){
System.out.println(e);
}
}
public void insert(){
String insertString = "INSERT INTO " + TABLE + " VALUES (" + this.counter + ",'" +
this.ID + "','" + this.title + "','" + this.owner + "','" + this.s + "')";
System.out.println(insertString);
try {
Statement stmt = conn.createStatement();
stmt.executeUpdate(insertString);
stmt.close();
} catch(Exception e) {
System.out.println(e);
}
}
public String getID(String text){
String id = "";
id = text.substring(text.indexOf("\"")+1, text.indexOf("\","));
return id;
}
public String getTitle(String text){
String title = "";
title = text.substring(text.indexOf(",\"")+2, text.indexOf("\",\"1."));
return title;
}
public String getOwner(String text){
try{
String owner = "";
owner = text.substring(text.indexOf("\",\"1.")+5, text.indexOf("\"<br>"));
int i;
for(i=0;i<owner.length();i++) {
String fifthChar = "\u00AE";
int codePoint = owner.codePointAt(i);
}
return owner;
} catch(Exception e) {
System.out.println(e);
System.out.println("eeee");
}
return owner;
}
public String getS(String text){
String s = "";
s = text.substring(0, text.indexOf("<br>"));
return s;
}
public String download(String path) {
String result = "";
try {
URL url = new URL(path);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
InputStream in = null;
in = url.openStream();
String content = pipe(in,"utf-8");
result = content;
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
public String pipe(InputStream in,String charset) throws IOException {
StringBuffer s = new StringBuffer();
if(charset==null||"".equals(charset)){
charset="utf-8";
}
String rLine = null;
BufferedReader bReader = new BufferedReader(new InputStreamReader(in,"UTF-8"));
FileOutputStream("C:\\Research\\MiningSoftwareRepositories\\Traceability-Link-Recovery\\EXPERIMENTS\\BR\\"
+ bugid + ".txt");
while ((rLine = bReader.readLine()) != null) {
String tmp_rLine = rLine;
s.append(tmp_rLine+"\n");
}
tmp_rLine = null;
}
in.close();
return s.toString();
}
public static void main(String[] args) {
TestDataParsing2 tdp = new TestDataParsing2();
tdp.initdb();
System.out.println("process started");
String urlPath = "file:///C:/Users/Abhinav/Downloads/Compressed/eng-hindi-dict-utf8/sa.htm";
String content = tdp.download(urlPath);
tdp.process(content);
tdp.closedb();
}
而且當我添加 「?了useUnicode = YES &的characterEncoding = UTF-8」 到conn_url我收到以下錯誤,這不是沒有它的到來。
java.sql.SQLException:不支持的字符編碼'UTF-8/hindi-eng'。 過程在com.mysql.jdbc.SQLError.createSQLException(SQLError.java:1074) 開始 在com.mysql.jdbc.SQLError.createSQLException(SQLError.java:988) 在com.mysql.jdbc.SQLError.createSQLException (SQLError.java:974) at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:919) at com.mysql.jdbc.StringUtils.getBytes(StringUtils.java:574) at com.mysql.jdbc .StringUtils.getBytes(StringUtils.java:719) 在com.mysql.jdbc.Buffer.writeStringNoNull(Buffer.java:704) 在com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2573) 在玉米.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2713) at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2 663) 在com.mysql.jdbc.StatementImpl.executeQuery(StatementImpl.java:1599) 在com.mysql.jdbc.ConnectionImpl.loadServerVariables(ConnectionImpl.java:3928) 在com.mysql.jdbc.ConnectionImpl.initializePropsFromServer( ConnectionImpl.java:3473) 在com.mysql.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:2445) 在com.mysql.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:2215) 在com.mysql.jdbc。 ConnectionImpl(ConnectionImpl.java:813) 在com.mysql.jdbc.JDBC4Connection。(JDBC4Connection.java:47) 在sun.reflect.NativeConstructorAccessorImpl.newInstance0(本機方法) 在sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl .java:57) at su n.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) 在java.lang.reflect.Constructor.newInstance(Constructor.java:525) 在com.mysql.jdbc.Util.handleNewInstance(Util.java:411) at com.mysql.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:399) at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:334) at java.sql.DriverManager.getConnection(DriverManager.java: 579) 在java.sql.DriverManager.getConnection(DriverManager.java:221) 在TestDataParsing2.initdb(TestDataParsing2.java:29) 在TestDataParsing2.main(TestDataParsing2.java:239)
INSERT INTO dict VALUES(2,'a','Art','एक','我買了一支筆。') java.lang。NullPointerException異常 _ __ _ __ _ __ _ __ _ __ _ __ _ __ _ __ _ __ _ __ _ __ _ INSERT INTO dict VALUES(3,'aback','Adv','पीछे/हतप्रभ','I was some ') java.lang.NullPointerException
工作。我的意思是我知道我們必須使用它,但我正在混合的語法。但今天的最後一行是在今天。非常感謝。乾杯。 – Abhinav 2012-07-21 09:36:58