我已經創建了一個程序來執行如下操作:使用Java計算基於mysql表中值的值
我有22000個作者分成22000個文件。每個文件都包含一組關鍵字(可以是一個或幾百個)。 例如AUTHOR-22945.txt
由關鍵字如下:
algorithm
problem
computational
solution
general
application
base
然後,我有在MySQL 100個表(topic0
直到topic99
)。每個表格由keywords
和value
組成。每個主題表格包含20行。 例如:
keywords | value
us 0.021263
base 0.019618
within 0.014545
new 0.010844
apply 0.010296
process 0.010158
propose 0.009199
provide 0.008239
analysis 0.007005
three 0.006594
approach 0.006183
domain 0.005634
structural 0.004126
adapt 0.003989
aspect 0.003989
product 0.003441
efficient 0.003441
information 0.003167
associate 0.003167
class 0.00303
我試圖創建將產生從全部100個主題拍攝的每張作者設定的值的節目。我想要將作者的平均值從topic0
改爲topic99
。因此,如果在topic0中存在來自作者文件的關鍵字,我會採取這些值並對其進行求和。依此類推,直到topic99
。因此,作者可以有一個輸出,包括每個主題的總和[t0, t1, t2, ..., t99]
這將是例如[0, 0.3452, 0.2343, .... , 0.08342].
目前我的程序所做的是安靜混亂的方式。當我用一個作者數據對它進行測試時,它工作併產生了正確的數字。但是,當我輸入22000個作者來產生輸出時,花了超過5天的時間來完成這個任務。當我抽取樣品並隨機檢查時,結果是錯誤的。
我想問你,什麼是正確的算法或工具或邏輯方法來做到這一點,這將產生一個準確的輸出,也沒有那麼長的編譯時間呢?僅供參考,我也擁有所有作者的數據和每個作者在mysql表中的關鍵字。在這種情況下使用mysql innerjoin或java會更好嗎?
非常感謝。
僅供參考,我目前的雜亂的代碼如下:
package counttopicscore;
import java.sql.*;
import java.io.*;
import java.util.Arrays;
public class CountTopicScore{
public static void main(String[] args) {
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
String query0 = "SELECT DISTINCT author_key FROM authorkeywords";
Statement st0 = conn.createStatement();
ResultSet rs0 = st0.executeQuery(query0);
while(rs0.next())
{
String authorkey = rs0.getString("author_key");
String query = "SELECT keywords FROM authorkeywords WHERE author_key ='"+ authorkey +"'"+";";
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery(query);
double Value0, Value1, Value2, Value3, Value4, Value5, Value6, Value7, Value8, Value9, Value10,
Value11, Value12, Value13, Value14, Value15, Value16, Value17, Value18, Value19, Value20,
Value21, Value22, Value23, Value24, Value25, Value26, Value27, Value28, Value29, Value30,
Value31, Value32, Value33, Value34, Value35, Value36, Value37, Value38, Value39, Value40,
Value41, Value42, Value43, Value44, Value45, Value46, Value47, Value48, Value49, Value50,
Value51, Value52, Value53, Value54, Value55, Value56, Value57, Value58, Value59, Value60,
Value61, Value62, Value63, Value64, Value65, Value66, Value67, Value68, Value69, Value70,
Value71, Value72, Value73, Value74, Value75, Value76, Value77, Value78, Value79, Value80,
Value81, Value82, Value83, Value84, Value85, Value86, Value87, Value88, Value89, Value90,
Value91, Value92, Value93, Value94, Value95, Value96, Value97, Value98, Value99;
double topic0Value = 0.000000, topic1Value = 0.000000, topic2Value = 0.000000, topic3Value = 0.000000,
topic4Value = 0.000000, topic5Value = 0.000000, topic6Value = 0.000000, topic7Value = 0.000000,
topic8Value = 0.000000, topic9Value = 0.000000, topic10Value = 0.000000, topic11Value = 0.000000,
topic12Value = 0.000000, topic13Value = 0.000000;
double topic14Value = 0.000000;
double topic15Value = 0.000000;
double topic16Value = 0.000000;
double topic17Value = 0.000000;
double topic18Value = 0.000000;
double topic19Value = 0.000000;
double topic20Value = 0.000000;
double topic21Value = 0.000000;
double topic22Value = 0.000000;
double topic23Value = 0.000000;
double topic24Value = 0.000000;
double topic25Value = 0.000000;
double topic26Value = 0.000000;
double topic27Value = 0.000000;
double topic28Value = 0.000000;
double topic29Value = 0.000000;
double topic30Value = 0.000000;
double topic31Value = 0.000000;
double topic32Value = 0.000000;
double topic33Value = 0.000000;
double topic34Value = 0.000000;
double topic35Value = 0.000000;
double topic36Value = 0.000000;
double topic37Value = 0.000000;
double topic38Value = 0.000000;
double topic39Value = 0.000000;
double topic40Value = 0.000000;
double topic41Value = 0.000000;
double topic42Value = 0.000000;
double topic43Value = 0.000000;
double topic44Value = 0.000000;
double topic45Value = 0.000000;
double topic46Value = 0.000000;
double topic47Value = 0.000000;
double topic48Value = 0.000000;
double topic49Value = 0.000000;
double topic50Value = 0.000000;
double topic51Value = 0.000000;
double topic52Value = 0.000000;
double topic53Value = 0.000000;
double topic54Value = 0.000000;
double topic55Value = 0.000000;
double topic56Value = 0.000000;
double topic57Value = 0.000000;
double topic58Value = 0.000000;
double topic59Value = 0.000000;
double topic60Value = 0.000000;
double topic61Value = 0.000000;
double topic62Value = 0.000000;
double topic63Value = 0.000000;
double topic64Value = 0.000000;
double topic65Value = 0.000000;
double topic66Value = 0.000000;
double topic67Value = 0.000000;
double topic68Value = 0.000000;
double topic69Value = 0.000000;
double topic70Value = 0.000000;
double topic71Value = 0.000000;
double topic72Value = 0.000000;
double topic73Value = 0.000000;
double topic74Value = 0.000000;
double topic75Value = 0.000000;
double topic76Value = 0.000000;
double topic77Value = 0.000000;
double topic78Value = 0.000000;
double topic79Value = 0.000000;
double topic80Value = 0.000000;
double topic81Value = 0.000000;
double topic82Value = 0.000000;
double topic83Value = 0.000000;
double topic84Value = 0.000000;
double topic85Value = 0.000000;
double topic86Value = 0.000000;
double topic87Value = 0.000000;
double topic88Value = 0.000000;
double topic89Value = 0.000000;
double topic90Value = 0.000000;
double topic91Value = 0.000000;
double topic92Value = 0.000000;
double topic93Value = 0.000000;
double topic94Value = 0.000000;
double topic95Value = 0.000000;
double topic96Value = 0.000000;
double topic97Value = 0.000000;
double topic98Value = 0.000000;
double topic99Value = 0.000000;
while (rs.next())
{
String keyword = rs.getString("keywords");
Value0 = findTopic0(keyword);
Value1 = findTopic1(keyword);
Value2 = findTopic2(keyword);
Value3 = findTopic3(keyword);
Value4 = findTopic4(keyword);
Value5 = findTopic5(keyword);
Value6 = findTopic6(keyword);
Value7 = findTopic7(keyword);
Value8 = findTopic8(keyword);
Value9 = findTopic9(keyword);
Value10 = findTopic10(keyword);
Value11 = findTopic11(keyword);
Value12 = findTopic12(keyword);
Value13 = findTopic13(keyword);
Value14 = findTopic14(keyword);
Value15 = findTopic15(keyword);
Value16 = findTopic16(keyword);
Value17 = findTopic17(keyword);
Value18 = findTopic18(keyword);
Value19 = findTopic19(keyword);
Value20 = findTopic20(keyword);
Value21 = findTopic21(keyword);
Value22 = findTopic22(keyword);
Value23 = findTopic23(keyword);
Value24 = findTopic24(keyword);
Value25 = findTopic25(keyword);
Value26 = findTopic26(keyword);
Value27 = findTopic27(keyword);
Value28 = findTopic28(keyword);
Value29 = findTopic29(keyword);
Value30 = findTopic30(keyword);
Value31 = findTopic31(keyword);
Value32 = findTopic32(keyword);
Value33 = findTopic33(keyword);
Value34 = findTopic34(keyword);
Value35 = findTopic35(keyword);
Value36 = findTopic36(keyword);
Value37 = findTopic37(keyword);
Value38 = findTopic38(keyword);
Value39 = findTopic39(keyword);
Value40 = findTopic40(keyword);
Value41 = findTopic41(keyword);
Value42 = findTopic42(keyword);
Value43 = findTopic43(keyword);
Value44 = findTopic44(keyword);
Value45 = findTopic45(keyword);
Value46 = findTopic46(keyword);
Value47 = findTopic47(keyword);
Value48 = findTopic48(keyword);
Value49 = findTopic49(keyword);
Value50 = findTopic50(keyword);
Value51 = findTopic51(keyword);
Value52 = findTopic52(keyword);
Value53 = findTopic53(keyword);
Value54 = findTopic54(keyword);
Value55 = findTopic55(keyword);
Value56 = findTopic56(keyword);
Value57 = findTopic57(keyword);
Value58 = findTopic58(keyword);
Value59 = findTopic59(keyword);
Value60 = findTopic60(keyword);
Value61 = findTopic61(keyword);
Value62 = findTopic62(keyword);
Value63 = findTopic63(keyword);
Value64 = findTopic64(keyword);
Value65 = findTopic65(keyword);
Value66 = findTopic66(keyword);
Value67 = findTopic67(keyword);
Value68 = findTopic68(keyword);
Value69 = findTopic69(keyword);
Value70 = findTopic70(keyword);
Value71 = findTopic71(keyword);
Value72 = findTopic72(keyword);
Value73 = findTopic73(keyword);
Value74 = findTopic74(keyword);
Value75 = findTopic75(keyword);
Value76 = findTopic76(keyword);
Value77 = findTopic77(keyword);
Value78 = findTopic78(keyword);
Value79 = findTopic79(keyword);
Value80 = findTopic80(keyword);
Value81 = findTopic81(keyword);
Value82 = findTopic82(keyword);
Value83 = findTopic83(keyword);
Value84 = findTopic84(keyword);
Value85 = findTopic85(keyword);
Value86 = findTopic86(keyword);
Value87 = findTopic87(keyword);
Value88 = findTopic88(keyword);
Value89 = findTopic89(keyword);
Value90 = findTopic90(keyword);
Value91 = findTopic91(keyword);
Value92 = findTopic92(keyword);
Value93 = findTopic93(keyword);
Value94 = findTopic94(keyword);
Value95 = findTopic95(keyword);
Value96 = findTopic96(keyword);
Value97 = findTopic97(keyword);
Value98 = findTopic98(keyword);
Value99 = findTopic99(keyword);
topic0Value = topic0Value + Value0;
topic1Value = topic1Value + Value1;
topic2Value = topic2Value + Value2;
topic3Value = topic3Value + Value3;
topic4Value = topic4Value + Value4;
topic5Value = topic5Value + Value5;
topic6Value = topic6Value + Value6;
topic7Value = topic7Value + Value7;
topic8Value = topic8Value + Value8;
topic9Value = topic9Value + Value9;
topic10Value = topic10Value + Value10;
topic11Value = topic11Value + Value11;
topic12Value = topic12Value + Value12;
topic13Value = topic13Value + Value13;
topic14Value = topic14Value + Value14;
topic15Value = topic15Value + Value15;
topic16Value = topic16Value + Value16;
topic17Value = topic17Value + Value17;
topic18Value = topic18Value + Value18;
topic19Value = topic19Value + Value19;
topic20Value = topic20Value + Value20;
topic21Value = topic21Value + Value21;
topic22Value = topic22Value + Value22;
topic23Value = topic23Value + Value23;
topic24Value = topic24Value + Value24;
topic25Value = topic25Value + Value25;
topic26Value = topic26Value + Value26;
topic27Value = topic27Value + Value27;
topic28Value = topic28Value + Value28;
topic29Value = topic29Value + Value29;
topic30Value = topic30Value + Value30;
topic31Value = topic31Value + Value31;
topic32Value = topic32Value + Value32;
topic33Value = topic33Value + Value33;
topic34Value = topic34Value + Value34;
topic35Value = topic35Value + Value35;
topic36Value = topic36Value + Value36;
topic37Value = topic37Value + Value37;
topic38Value = topic38Value + Value38;
topic39Value = topic39Value + Value39;
topic40Value = topic40Value + Value40;
topic41Value = topic41Value + Value41;
topic42Value = topic42Value + Value42;
topic43Value = topic43Value + Value43;
topic44Value = topic44Value + Value44;
topic45Value = topic45Value + Value45;
topic46Value = topic46Value + Value46;
topic47Value = topic47Value + Value47;
topic48Value = topic48Value + Value48;
topic49Value = topic49Value + Value49;
topic50Value = topic50Value + Value50;
topic51Value = topic51Value + Value51;
topic52Value = topic52Value + Value52;
topic53Value = topic53Value + Value53;
topic54Value = topic54Value + Value54;
topic55Value = topic55Value + Value55;
topic56Value = topic56Value + Value56;
topic57Value = topic57Value + Value57;
topic58Value = topic58Value + Value58;
topic59Value = topic59Value + Value59;
topic60Value = topic60Value + Value60;
topic61Value = topic61Value + Value61;
topic62Value = topic62Value + Value62;
topic63Value = topic63Value + Value63;
topic64Value = topic64Value + Value64;
topic65Value = topic65Value + Value65;
topic66Value = topic66Value + Value66;
topic67Value = topic67Value + Value67;
topic68Value = topic68Value + Value68;
topic69Value = topic69Value + Value69;
topic70Value = topic70Value + Value70;
topic71Value = topic71Value + Value71;
topic72Value = topic72Value + Value72;
topic73Value = topic73Value + Value73;
topic74Value = topic74Value + Value74;
topic75Value = topic75Value + Value75;
topic76Value = topic76Value + Value76;
topic77Value = topic77Value + Value77;
topic78Value = topic78Value + Value78;
topic79Value = topic79Value + Value79;
topic80Value = topic80Value + Value80;
topic81Value = topic81Value + Value81;
topic82Value = topic82Value + Value82;
topic83Value = topic83Value + Value83;
topic84Value = topic84Value + Value84;
topic85Value = topic85Value + Value85;
topic86Value = topic86Value + Value86;
topic87Value = topic87Value + Value87;
topic88Value = topic88Value + Value88;
topic89Value = topic89Value + Value89;
topic90Value = topic90Value + Value90;
topic91Value = topic91Value + Value91;
topic92Value = topic92Value + Value92;
topic93Value = topic93Value + Value93;
topic94Value = topic94Value + Value94;
topic95Value = topic95Value + Value95;
topic96Value = topic96Value + Value96;
topic97Value = topic97Value + Value97;
topic98Value = topic98Value + Value98;
topic99Value = topic99Value + Value99;
}
st.close();
double[] score = new double[100];
score[0]=topic0Value;
score[1]=topic1Value;
score[2]=topic2Value;
score[3]=topic3Value;
score[4]=topic4Value;
score[5]=topic5Value;
score[6]=topic6Value;
score[7]=topic7Value;
score[8]=topic8Value;
score[9]=topic9Value;
score[10]=topic10Value;
score[11]=topic11Value;
score[12]=topic12Value;
score[13]=topic13Value;
score[14]=topic14Value;
score[15]=topic15Value;
score[16]=topic16Value;
score[17]=topic17Value;
score[18]=topic18Value;
score[19]=topic19Value;
score[20]=topic20Value;
score[21]=topic21Value;
score[22]=topic22Value;
score[23]=topic23Value;
score[24]=topic24Value;
score[25]=topic25Value;
score[26]=topic26Value;
score[27]=topic27Value;
score[28]=topic28Value;
score[29]=topic29Value;
score[30]=topic30Value;
score[31]=topic31Value;
score[32]=topic32Value;
score[33]=topic33Value;
score[34]=topic34Value;
score[35]=topic35Value;
score[36]=topic36Value;
score[37]=topic37Value;
score[38]=topic38Value;
score[39]=topic39Value;
score[40]=topic40Value;
score[41]=topic41Value;
score[42]=topic42Value;
score[43]=topic43Value;
score[44]=topic44Value;
score[45]=topic45Value;
score[46]=topic46Value;
score[47]=topic47Value;
score[48]=topic48Value;
score[49]=topic49Value;
score[50]=topic50Value;
score[51]=topic51Value;
score[52]=topic52Value;
score[53]=topic53Value;
score[54]=topic54Value;
score[55]=topic55Value;
score[56]=topic56Value;
score[57]=topic57Value;
score[58]=topic58Value;
score[59]=topic59Value;
score[60]=topic60Value;
score[61]=topic61Value;
score[62]=topic62Value;
score[63]=topic63Value;
score[64]=topic64Value;
score[65]=topic65Value;
score[66]=topic66Value;
score[67]=topic67Value;
score[68]=topic68Value;
score[69]=topic69Value;
score[70]=topic70Value;
score[71]=topic71Value;
score[72]=topic72Value;
score[73]=topic73Value;
score[74]=topic74Value;
score[75]=topic75Value;
score[76]=topic76Value;
score[77]=topic77Value;
score[78]=topic78Value;
score[79]=topic79Value;
score[80]=topic80Value;
score[81]=topic81Value;
score[82]=topic82Value;
score[83]=topic83Value;
score[84]=topic84Value;
score[85]=topic85Value;
score[86]=topic86Value;
score[87]=topic87Value;
score[88]=topic88Value;
score[89]=topic89Value;
score[90]=topic90Value;
score[91]=topic91Value;
score[92]=topic92Value;
score[93]=topic93Value;
score[94]=topic94Value;
score[95]=topic95Value;
score[96]=topic96Value;
score[97]=topic97Value;
score[98]=topic98Value;
score[99]=topic99Value;
PrintWriter pr = new PrintWriter("E:/scoring/"+authorkey+".txt");
for (int j=0; j<100 ; j++)
{
pr.println(score[j]);
}
pr.close();
}
}
catch (Exception e){
System.err.println("Got an exception! ");
System.err.println(e.getMessage());
}
}
private static double findTopic0(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic0 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
private static double findTopic1(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic1 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
private static double findTopic2(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic2 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
private static double findTopic3(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic3 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
// .... until topic99
}