2011-08-08 60 views
1

如果我有2個文件說ABCD.txt和DEF.txt。我需要檢查DEF.txt中是否存在字符串「ABCD」,以及ABCD.txt中是否存在字符串「DEF」,並將組合寫入文件。如何找到使用Java的字符串的循環依賴關係

完全我有大約15000個文件,每個文件包含近50 - 3000行必須被搜索。我寫了一段代碼,它的工作..但它需要一個小時來顯示整個列表...

是否有更好的方式來執行此操作?請建議我。

public void findCyclicDependency(){ 

    Hashtable<String, String> htFileNameList_1 = new Hashtable<String, String>(); 
    Hashtable<String, String> htCyclicNameList = new Hashtable<String, String>(); 

    FileWriter fwCyclicDepen = null; 
    PrintWriter outFile = null; 

    FileInputStream fstream = null;  
    FileInputStream fstream_1 = null; 

    DataInputStream in = null; 
    BufferedReader br = null; 

    DataInputStream in_1 = null; 
    BufferedReader br_1 = null; 

    String strSV_File_CK=""; 

    boolean bFound = false; 
    File fileToSearch = null; 

    String strSVFileNameForComparison = ""; 
    String strSVDependencyFileLine = ""; 
    String strSVDFileLineExisting = ""; 
    String strCyclicDependencyOut = ""; 

    try {    
     File baseInputDirectory = new File(strInputPath);    

     List<File> baseInputDirListing = FileListing.getFileListing(baseInputDirectory); 

     // Printing out the filenames for the SodaSystem 
     for (File swPackage : baseInputDirListing) 
     { 

      if (swPackage.isDirectory() && swPackage.getName().endsWith("Plus")) { 
       List<File> currSwPackageFileListing = FileListing.getFileListing(swPackage); 
       System.out.println("\n swPackage File --> " + swPackage.getName()); 
       strCyclicDependencyOut = strOutputPath + "_"+ swPackage.getName() + "_CyclicDependency.xml"; 
     System.out.println("\n strCyclicDependencyOut File --> " + strCyclicDependencyOut); 
     fwCyclicDepen = new FileWriter(strCyclicDependencyOut); 
     outFile = new PrintWriter(new BufferedWriter(fwCyclicDepen)); 
     outFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 
     outFile.write("<CyclicDependencyFile>"); 

       for (File DependentFile : currSwPackageFileListing) {       
        strSV_File_CK = DependentFile.getName().substring(0, (DependentFile.getName().length() - 4)).trim(); 

        htFileNameList_1.put(strSV_File_CK.toUpperCase(),strSV_File_CK.toUpperCase()); 
       } 


       for (File DependentFile : currSwPackageFileListing) 
       {       
        fstream = new FileInputStream(DependentFile); 
        // Get the object of DataInputStream 
        in = new DataInputStream(fstream); 
        br = new BufferedReader(new InputStreamReader(in)); 

        strSVFileNameForComparison = DependentFile.getName().substring(0, (DependentFile.getName().length() - 4)).trim(); 

        //Read File Line By Line 
        while ((strSVDependencyFileLine = br.readLine()) != null) 
        { 
         bFound = false;            

         if (strSVDependencyFileLine.toUpperCase().indexOf("INDICES") == -1) 
         { 
          //Check the current line matches any of the file name in software package folder 
          if (htFileNameList_1.contains(strSVDependencyFileLine.trim().toUpperCase()) 
          && strSVDependencyFileLine.compareTo(strSVFileNameForComparison) != 0) 
          { 
           bFound = true; 

           // Get the file to search 
           for (File searchFile : currSwPackageFileListing) 
           { 

            if((searchFile.getName().substring(0, (searchFile.getName().length() - 4)).trim()).equals(strSVDependencyFileLine)) 
            { 
             fileToSearch = searchFile; 
             break; 
            } 
           } 

           // Read the file where the file name is found 
           fstream_1 = new FileInputStream(fileToSearch); 

           in_1 = new DataInputStream(fstream_1); 
           br_1 = new BufferedReader(new InputStreamReader(in_1)); 

           while ((strSVDFileLineExisting = br_1.readLine()) != null) 
           { 
            if (strSVDFileLineExisting.toUpperCase().indexOf("EXTRA") == -1) 
            { 
             if (htFileNameList_1.contains(strSVDFileLineExisting.trim().toUpperCase()) && bFound 
               && strSVDFileLineExisting.compareTo(strSVDependencyFileLine) != 0 
               && strSVDFileLineExisting.compareTo(strSVFileNameForComparison) == 0) 
             { 

              if(!htCyclicNameList.containsKey(strSVDependencyFileLine) && 
                !htCyclicNameList.containsValue(strSVDFileLineExisting)) 
               { 
               htCyclicNameList.put(strSVDFileLineExisting,strSVDependencyFileLine); 

               outFile.write("<CyclicDepedency FileName = \"" + strSVDFileLineExisting + "\""+ " CyclicFileName = \"" + 
                strSVDependencyFileLine + "\" />"); 
                break; 
               } 

             } 
            } 

           } 

          }   

         } 
         else 
         { 
          bFound = false; 
         }        

         }//if current line <> 

        }// reach each line in the current file 

       outFile.write("</CyclicDependencyFile>"); 
      } 
      outFile.flush(); 
      outFile.close(); 

     }   

    } 
    catch(Exception e){ 
     e.printStackTrace(); 
    } 
} 

感謝 拉姆

+0

這將是在這裏複製的代碼片段,它是由其他特定部位清理乾淨,只專注於純粹的問題本身是至關重要的。當前的代碼有太多的責任。很難說出緩慢的真正原因是什麼。 – pcjuzer

+0

@pcjuzer:我複製了整個代碼,以確保我編碼的內容是正確的。如果讓你困惑,請原諒我。其實我正在讀取文件夾中的每個文件並檢查文件中的每一行。如果文件行是一個文件,那麼我將導航到該文件,並檢查第一個文件的行,如果它存在..然後我寫他們到一個XML文件。 – Ramm

回答

1

您的設計有幾個問題。最重要的是重複掃描文件系統。嘗試下面的代碼。

static public void findCyclicDependency2() { 
    PrintWriter outFile = null; 

    Map<String,File> fileNames = new HashMap<String,File>(); 
    Map<String,Set<String>> fileBackward = new HashMap<String,Set<String>>(); 
    Map<String,Set<String>> fileForward = new HashMap<String,Set<String>>(); 

    try { 
     File baseInputDirectory = new File(strInputPath); 

     List<File> baseInputDirListing = getFileListing(baseInputDirectory); 

     // Printing out the filenames for the SodaSystem 
     for(File swPackage:baseInputDirListing) { 

      if (! (swPackage.isDirectory() 
        || swPackage.getName().endsWith("Plus"))) continue; 

      System.out.println("Loading file names"); 
      List<File> currSwPackageFileListing = getFileListing(swPackage); 
      for(File dependentFile:currSwPackageFileListing) { 
       String name = trimName(dependentFile); 
       fileNames.put(name,dependentFile); 

       BufferedReader br = new BufferedReader(new FileReader(dependentFile)); 
       String line; 
       Set<String> contFor = new HashSet<String>(); 
       Set<String> contBack = new HashSet<String>(); 
       while((line=br.readLine()) != null) { 
        line = line.toUpperCase().trim(); 
        if(line.equals("EXTRA")) continue; 
        if(line.equals("INDICES")) continue; 
        if(line.equals(name)) continue; 

        if(line.compareTo(name) == 1) { 
         contFor.add(line); 
        } else { 
         contBack.add(line); 
        } 
       } 
       fileBackward.put(name,contBack); 
       fileForward.put(name,contFor); 
      } 

      String strCyclicDependencyOut = strOutputPath + "_" 
        + swPackage.getName() + "_CyclicDependency.xml"; 
      outFile = new PrintWriter(new BufferedWriter(new FileWriter(strCyclicDependencyOut))); 
      outFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 
      outFile.write("<CyclicDependencyFile>"); 

      for(Entry<String,Set<String>> entry : fileForward.entrySet()) { 
       String curr = entry.getKey(); 
       for(String other : entry.getValue()) { 
        Set<String> otherRefs = fileBackward.get(other); 
        if(otherRefs == null) continue; 
        if(otherRefs.contains(curr)) { 
         outFile.write("<CyclicDepedency FileName = \"" 
           + fileNames.get(curr).getPath() 
           + "\"" 
           + " CyclicFileName = \"" 
           + fileNames.get(other).getPath() 
           + "\" />"); 
        } 
       } 
      } 


      outFile.write("</CyclicDependencyFile>"); 
      outFile.flush(); 
      outFile.close(); 
     } 

    } catch (Exception e) { 
     e.printStackTrace(); 
    } 
} 
+0

嗨西蒙,感謝您的更新代碼。但是在這一行「if(otherRefs.contains(curr)」.. java.lang.NullPointerException is seen ... and「otherRefs」value is print null。 – Ramm

+0

這將是因爲當前文件中的行不參考一個現有的文件我已經添加了一個空的檢查之間獲取設置和使用它來處理這種情況 –

+0

我添加了代碼,當我發現空值返回但不知何故,控制不進入if(otherRefs.contains( curr))..所以輸出文件沒有行,除了標題部分,「OtherRefs」中的值始終爲空。謝謝 – Ramm

1

Lucene在我腦海中。

也許索引所有文件更高效,然後查詢文件名並使用結果來檢測您的循環依賴關係。

+0

Lucene可能是一個好主意,但是如果文件的語料庫是靜態的並且不會經常更改的話,那麼這種情況只會這樣。否則,你只需放鬆你在創建Lucene索引時搜索的時間。 – fgysin

+0

應在不到3分鐘內創建完整索引。然後我們有57分鐘左右的週期檢測;) –