2012-01-23 155 views
4

給定一組字符串(大集合)和一個輸入字符串,您需要高效地查找輸入字符串的所有字母。你會使用什麼數據結構。使用它,你將如何找到字形?我想到的在一組字符串中查找輸入字符串..?

事情是這些:

  1. 使用地圖

    一)消除了比輸入更多/更少字母的所有單詞。

    二)把輸入的字符在地圖

    C)遍歷地圖每個字符串,看看是否所有字母都存在與他們的計數。

  2. 使用嘗試次數

    一)把具有字符正確的號碼爲線索的所有字符串。

    b)如果輸入中包含字母,則遍歷每個分支並進一步深入。

    c)若葉達到這個詞是字謎

任何人都可以找到一個更好的解決方案?

在上述方法中是否有任何問題?

+0

比方說,你有一個字謎的候選人。您可以嘗試對輸入字符串和此字符串進行排序 - 排序後它們應該是相同的。你有沒有考慮過這種方法? – user998692

+0

排序會給我額外的時間消耗。而我的上述方法是線性的,無需排序 –

+0

假設平均字長在範圍[3,20]字符中......當對一個字進行排序時,您的數據比較有限。此外,一旦使用散列表預處理了整個字典,則每個後續對getAnagrams的調用都將是O(1),而在trie方法中不是這樣。 –

回答

5

從每個單詞構建一個頻率圖並比較這些圖。

僞代碼:

class Word 

    string word 
    map<char, int> frequency 

    Word(string w) 
    word = w 
    for char in word 
     int count = frequency.get(char) 
     if count == null 
     count = 0 
     count++ 
     frequency.put(char, count) 

    boolean is_anagram_of(that) 
    return this.frequency == that.frequency 
+0

聽起來不錯!但它比trie方法更快嗎?此外,由於某些字母會很常見,因此字典會使用較少的內存。 –

+0

@KshitijBanerjee爲了工作,你需要對這些字符進行排序並且從那些已排序的字符中構建字典句柄(你會如何確定「mary」和「army」是字謎)?對字詞排序需要'O(n * log(n))',同時構建一個基於散列的映射將採用'O(n)'。 –

+0

爲什麼我會排序來製作特里?考慮瑪麗作爲輸入詞和包含軍隊的列表。我在mary上創建一個hashmap。現在在列表中的所有單詞上建立一個trie。並遍歷每個分支。當分支是軍隊時,我會順利地到達葉子,並且我有一場比賽。不需要排序..對嗎? –

4

你可以建立其中關鍵的排序的的HashMap(字),並將該值是,排序的所有單詞的列表,給予相應的鍵:

private Map<String, List<String>> anagrams = new HashMap<String, List<String>>(); 

void buildIndex(){ 
    for(String word : words){ 
     String sortedWord = sortWord(word); 
     if(!anagrams.containsKey(sortedWord)){ 
      anagrams.put(sortedWord, new ArrayList<String>()); 
     } 
     anagrams.get(sortedWord).add(word); 
    } 
} 

然後,您只需在剛剛構建的散列映射中查找已排序的單詞,即可得到所有字母表的列表。

0
import java.util.HashMap; 
import java.util.HashSet; 
import java.util.Map; 
import java.util.Set; 
/* 
*Program for Find Anagrams from Given A string of Arrays. 
* 
*Program's Maximum Time Complexity is O(n) + O(klogk), here k is the length of word. 
* 
* By removal of Sorting, Program's Complexity is O(n) 
* **/ 
public class FindAnagramsOptimized { 
    public static void main(String[] args) { 
     String[] words = { "gOd", "doG", "doll", "llod", "lold", "life", 
"sandesh", "101", "011", "110" }; 
     System.out.println(getAnaGram(words)); 
    } 
    // Space Complexity O(n) 
    // Time Complexity O(nLogn) 
    static Set<String> getAnaGram(String[] allWords) { 
     // Internal Data Structure for Keeping the Values 
     class OriginalOccurence { 
      int occurence; 
      int index; 
     } 
     Map<String, OriginalOccurence> mapOfOccurence = new HashMap<>(); 
     int count = 0; 
     // Loop Time Complexity is O(n) 
    // Space Complexity O(K+2K), here K is unique words after sorting on a 

    for (String word : allWords) { 
     String key = sortedWord(word); 

     if (key == null) { 
      continue; 
     } 
     if (!mapOfOccurence.containsKey(key)) { 
      OriginalOccurence original = new OriginalOccurence(); 
      original.index = count; 
      original.occurence = 1; 
      mapOfOccurence.put(key, original); 
     } else { 
      OriginalOccurence tempVar = mapOfOccurence.get(key); 
      tempVar.occurence += 1; 
      mapOfOccurence.put(key, tempVar); 
     } 
     count++; 
    } 

    Set<String> finalAnagrams = new HashSet<>(); 

    // Loop works in O(K), here K is unique words after sorting on 
    // characters 
    for (Map.Entry<String, OriginalOccurence> anaGramedWordList : mapOfOccurence.entrySet()) { 
     if (anaGramedWordList.getValue().occurence > 1) { 
      finalAnagrams.add(allWords[anaGramedWordList.getValue().index]); 
     } 
    } 

    return finalAnagrams; 
} 

// Array Sort works in O(nLogn) 
// Customized Sorting for only chracter's works in O(n) time. 
private static String sortedWord(String word) { 

    // int[] asciiArray = new int[word.length()]; 
    int[] asciiArrayOf26 = new int[26]; 
    // char[] lowerCaseCharacterArray = new char[word.length()]; 
    // int characterSequence = 0; 
    // Ignore Case Logic written in lower level 
    for (char character : word.toCharArray()) { 
     if (character >= 97 && character <= 122) { 
      // asciiArray[characterSequence] = character; 
      if (asciiArrayOf26[character - 97] != 0) { 
       asciiArrayOf26[character - 97] += 1; 
      } else { 
       asciiArrayOf26[character - 97] = 1; 
      } 
     } else if (character >= 65 && character <= 90) { 
      // asciiArray[characterSequence] = character + 32; 
      if (asciiArrayOf26[character + 32 - 97] != 0) { 
       asciiArrayOf26[character + 32 - 97] += 1; 
      } else { 
       asciiArrayOf26[character + 32 - 97] = 1; 
      } 
     } else { 
      return null; 
     } 

     // lowerCaseCharacterArray[characterSequence] = (char) 
     // asciiArray[characterSequence]; 
     // characterSequence++; 
    } 
    // Arrays.sort(lowerCaseCharacterArray); 

    StringBuilder sortedWord = new StringBuilder(); 
    int asciiToIndex = 0; 
    // This Logic uses for reading the occurrences from array and copying 
    // back into the character array 
    for (int asciiValueOfCharacter : asciiArrayOf26) { 
     if (asciiValueOfCharacter != 0) { 
      if (asciiValueOfCharacter == 1) { 
       sortedWord.append((char) (asciiToIndex + 97)); 
      } else { 
       for (int i = 0; i < asciiValueOfCharacter; i++) { 
        sortedWord.append((char) (asciiToIndex + 97)); 
       } 
      } 
     } 
     asciiToIndex++; 
    } 
    // return new String(lowerCaseCharacterArray); 
    return sortedWord.toString(); 
} 
}