2012-04-07 87 views
2

我想實現一個百萬個節點作爲輸入的pagerank算法。pagerank:如何解決java.lang.StackOverflowError?

但是,我有這個錯誤。我很確定這是因爲一個開放的遞歸調用問題,從generateParamList()。

我試圖使這種方法不使用遞歸,但一次又一次地失敗。如果你讓我知道如何修改「generateParamList()」,我會明白這一點!

Exception in thread "main" java.lang.StackOverflowError 
    at Ranking.getInboundLinks(Ranking.java:200) 
    at Ranking.generateParamList(Ranking.java:172) 


    import Jama.Matrix; 

    public class Ranking { 

    private final double DAMPING_FACTOR = 0.85; 

    private List params = new ArrayList(); 
    private static Map<String, String[]> outlinkMap = new HashMap(); 
    private static Map<String, String[]> inlinkMap = new HashMap(); 

    public static void main(String[] args) throws IOException { 

     Ranking ranking = new Ranking(); 

     FileInputStream in = new FileInputStream("linkgraph_test.txt"); 
     BufferedReader br = new BufferedReader(new InputStreamReader(in)); 

     String strLine; 
     String[] tempStringArray = null; 

     int iTemp = 0; 

     while ((strLine = br.readLine()) != null) { 

      tempStringArray = strLine.split(" "); 

      String[] tempValueArray = new String[tempStringArray.length-1]; 

      for(iTemp=1;iTemp<tempStringArray.length;iTemp++){ 
       tempValueArray[iTemp-1] = tempStringArray[iTemp]; 
      } 

      outlinkMap.put(tempStringArray[0], tempValueArray); 
     } 
     in.close(); 

     setInboundLinks(); 


     for(int j=1;j<=outlinkMap.size();j++){ 
      String keytoString = Integer.toString(j); 
      System.out.println(keytoString+" "+ranking.rank(keytoString)); 
     } 
    } 

    /* 
    * 
    * Solve the equation of ax=b, which : a is the generated matrix based on the parameter constants. x is the page ranks matrix. b is a n*1 matrix which all the values are equal to the damping factor. 
    * 
    */ 

    public double rank(String pageId) { 

     generateParamList(pageId); 
     Matrix a = new Matrix(generateMatrix()); 
     double[][] arrB = new double[params.size()][1]; 
     for (int i = 0; i < params.size(); i++) { 
      arrB[i][0] = 1 - DAMPING_FACTOR; 
     } 
     Matrix b = new Matrix(arrB); 

     // Solve the equation and get the page ranks 
     Matrix x = a.solve(b); 
     int ind = 0; 
     int cnt = 0; 
     for (Iterator it = params.iterator(); it.hasNext();) { 
      String curPage = (String) it.next(); 
      if (curPage.equals(pageId)) 
       ind = cnt; 
      cnt++; 
     } 
     return x.getArray()[ind][0]; 
    } 

    /* 
    * 
    * This method generates the matrix of the linear equations. The generated 
    * 
    * matrix is a n*n matrix where n is number of the related pages. 
    * 
    */ 
    private double[][] generateMatrix() { 
     double[][] arr = new double[params.size()][params.size()]; 
     for (int i = 0; i < params.size(); i++) { 
      for (int j = 0; j < params.size(); j++) { 
       arr[i][j] = getMultiFactor((String) params.get(i),(String) params.get(j)); 

      } 

     } 

     return arr; 

    } 

    /* 
    * 
    * This method returns the constant of the given variable in the linear equation. 
    * 
    */ 

    private double getMultiFactor(String sourceId, String linkId) { 
     if (sourceId.equals(linkId)) 
      return 1; 
     if(!sourceId.equals(linkId) && getInboundLinks(sourceId)!=null) { 
      //System.out.println("source"+sourceId+"\tlinkInd"+linkId); 
      String[] inc = getInboundLinks(sourceId); 
      for (int i = 0; i < inc.length; i++) { 
       if (inc[i].equals(linkId)) { 
        return -1*(DAMPING_FACTOR/getOutboundLinks(linkId).length); 
       } 
      } 
     } 

     return 0; 

    } 

    /* 
    * 
    * This method returns list of the related pages. This list is also the parameters in the linear equation. 
    * 
    */ 

    private void generateParamList(String pageId) { 

     System.out.println("start"+pageId); 
     if(getInboundLinks(pageId)!=null){ 
     // Add the starting page. 
     if (!params.contains(pageId)) params.add(pageId); 

     // Get list of the inbound pages 

      String[] inc = getInboundLinks(pageId); 
     // Add the inbound links to the params list and do same for inbound links 

      for (int i = 0; i < inc.length; i++) { 

       if (!params.contains(inc[i])){ 
        generateParamList(inc[i]); 

       } 

      } 
     } 

    } 

    /* 
    * 
    * Return list of the inbound links to a given page. 
    * 
    */ 

    private String[] getInboundLinks(String pageId) { 

     return (String[]) inlinkMap.get(pageId); 

    } 

private static void setInboundLinks(){ 

     int valueLength; 
     String keytoString; 
     String[] outlinkValueArray, inlinkValueArrayTemp, tempCopyArray, resultArray; 


     for(int i=1; i<=outlinkMap.size();i++){ 

      keytoString = Integer.toString(i); 
      outlinkValueArray= outlinkMap.get(keytoString); 
      if(outlinkValueArray!=null){ 
       valueLength = outlinkValueArray.length;   
       for(int j=0; j<valueLength;j++){ 
        String[] tempValueArray = new String[1]; 
        if(!inlinkMap.containsKey(outlinkValueArray[j])){ 


         tempValueArray[0] = keytoString; 
         inlinkMap.put(outlinkValueArray[j],tempValueArray); 

        } 
        else{ 

         tempCopyArray = inlinkMap.get(outlinkValueArray[j]); 
         tempValueArray[0] = keytoString; 

         resultArray = mergeArray(tempCopyArray, tempValueArray); 
         inlinkMap.put(outlinkValueArray[j],resultArray); 
        } 
       } 
      } 
     } 

    } 

    public static String[] mergeArray(String[] ar1, String[] ar2){ 
     String[] ar3 = Arrays.copyOf(ar1, ar1.length+ar2.length); 
     System.arraycopy(ar2, 0, ar3, ar1.length, ar2.length); 
     return ar3; 
    } 
    /* 
    * 
    * Returns list of the outbound links from a page. 
    * 
    */ 

    private String[] getOutboundLinks(String pageId) { 

     // This simulates a simple page collection 
     return (String[]) outlinkMap.get(pageId); 

    } 
} 

輸入格式: 1 126645 320349 144068 635538​​ 37354 141327 53842 121180 4786 31658 285466 526963 93938 9097 32341 ............. 2 ......

+0

遞歸函數不適用於大數據,因爲它會做很多遞歸調用。 在這種情況下,你應該使用循環。 – LawfulHacker 2012-04-07 19:34:59

+0

@SergioGarcia謝謝塞爾吉奧,但你能告訴更具體的嗎?那麼我該如何修改:「private void generateParamList(String pageId){........ for(int i = 0; i 2012-04-07 19:51:43

回答

0

改變你的功能generateParamList爲此,它是一個非遞歸的版本。 我認爲它做同樣的工作,請測試。我提出了一些意見來幫助你理解這個概念。

private void generateParamList(String pageId) { 
    // Don't process duplicates 
    if (params.contains(pageId)) return; 

    // Store links not processed 
    Stack<String> links = new Stack<String>(); 

    // Store current pageId to be processed 
    links.push(pageId); 

    // Loop, processed the Stack link, not a recursive call 
    while (!links.empty()) { 
     pageId = links.pop(); 

     // Link already processed 
     if (params.contains(pageId)) continue; 

     params.add(pageId); 

     // Get list of the inbound pages 
     String[] inc = getInboundLinks(pageId); 

     for (int i = 0; i < inc.length; i++) { 
      // Store link to be processed by the loop 
      links.push(inc[i]); 
     } 
    } 
}