2017-09-28 76 views
0

重複的短語(不是說說而已)比方說,我有一個數組:查找陣列

[ 
"I want **a dog**", 
"**A dog** is here", 
"Pet **a dog**", 
"A **red cat**", 
"**red cat** is cute" 
... 
] 

如何找出重複的短語是,不是說說而已?例如,我想要返回「一條狗」和「紅貓」。

我發現的大多數現有帖子只是關於獲取單個詞,而不是詞組(多個詞)。

+0

你有沒有嘗試過任何東西?至少是一種合乎邏輯的方法? – brk

+0

陣列有多大。 – Amit

+0

陣列中有星號嗎? – PHPglue

回答

0

你給我們的信息太少。我假設你正在分裂空間。 ES6來拯救:)。集合有O(1)查找當你正在尋找重複的短語。

編輯:只是意識到你可以通過一些小的修改減少一噸的空間複雜性。如果你想要我這樣做,給我一個呼喊。

const buildAllPhrases = sentence => { 
    const splitSentence = sentence.split(" ") 
    const phraseSize = splitSentence.length 
    const allPhrases = [] 
    for (let i = phraseSize; i > 0; i--) { 
     for (let y = 0; y + i <= phraseSize; y++) { 
      allPhrases.push(splitSentence.slice(y, y + i)) 
     } 
    } 
    return allPhrases.map(phrase => phrase.join(" ")) 
} 

const findRepeats = sentences => { 
    const allPhrases = new Set() 
    const repeatedPhrases = new Set() 
    let phrases 
    sentences.forEach(phrase => { 
     phrases = buildAllPhrases(phrase) 
     phrases.forEach(subPhrase => { 
      if (allPhrases.has(subPhrase)) { 
       repeatedPhrases.add(subPhrase) 
      } else { 
       allPhrases.add(subPhrase) 
      } 
     }) 
    }) 
    return [...repeatedPhrases] 
} 

const sample = [ 
"I want **a dog**", 
"**A dog** is here", 
"Pet **a dog**", 
"A **red cat**", 
"**red cat** is cute" 
] 

findRepeats(sample) 
//['dog**', '**a dog**', '**a', '**red cat**', '**red', 'cat**', 'is'] 
0

這不是JavaScript函數的最終版本,它可以進一步優化。可能還需要進行一些更改,但可以根據您的要求進行調整。

function GetPhrases(stringsArray) { 
 
    //Array to split your string into words. 
 
    var jaggedArray = []; 
 

 
    //Array to keep indexes of strings where 2 matching words are found together. 
 
    var newArray = []; 
 

 
    var phrases = []; 
 

 
    //Loop through your array 
 
    for (var ic = 0; ic < stringsArray.length; ic++) { 
 
     //Convert every item to array of strings 
 
     var items = (stringsArray[ic]).split(" "); 
 
     for (var it = 0; it < items.length; it++) 
 
      items[it] = items[it].toLowerCase(); 
 

 
     //Push the array of words to main array 
 
     jaggedArray.push(items); 
 
    } 
 
    //console.log(jaggedArray); 
 

 
    // Loop through the main array 
 
    for (var iLoop = 0; iLoop < jaggedArray.length; iLoop++) { 
 
     // For every item in main array, loop through words in that item. 
 
     for (var ik = 0; ik < jaggedArray[iLoop].length; ik++) { 
 
      var currentWord = jaggedArray[iLoop][ik]; 
 
      // For every word, check its existence in the main array in all items coming after current item. 
 
      for (var il = iLoop + 1; il < jaggedArray.length; il++) { 
 
       // Find the index in the string. 
 
       var indexOfFind = jaggedArray[il].indexOf(currentWord); 
 

 

 
       if (indexOfFind > 0) { 
 
        // if matching index is more than 0, find if the word before this word also matches. 
 
        var indexofPrevWord = jaggedArray[il].indexOf(jaggedArray[iLoop][ik - 1]); 
 
        if ((indexofPrevWord >= 0) && (indexofPrevWord == (indexOfFind - 1))) 
 
         if (newArray.indexOf(il + " - " + iLoop) < 0) 
 
          newArray.push(il + " - " + iLoop); 
 

 
        // if matching index is more than 0, find if the word after this word also matches. 
 
        var indexofNextWord = jaggedArray[il].indexOf(jaggedArray[iLoop][ik + 1]); 
 
        if (indexofNextWord >= 0 && (indexofNextWord == (indexOfFind + 1))) 
 
         if (newArray.indexOf(il + " - " + iLoop) < 0) 
 
          newArray.push(il + " - " + iLoop); 
 
       } 
 
       else if (indexOfFind = 0) { 
 
        // if matching index is more than 0, find if the word after this word also matches. 
 
        var indexofNewWord = jaggedArray[il].indexOf(jaggedArray[iLoop][ik + 1]); 
 
        if (indexofNewWord >= 0 && (indexofNewWord == (indexOfFind + 1))) 
 
         if (newArray.indexOf(il + " - " + iLoop) < 0) 
 
          newArray.push(il + " - " + iLoop); 
 
       } 
 
      } 
 
     } 
 

 
    } 
 
    //newArray will store indexes of those string arrays in jagged array which has a matching sequence of atleast 2 words. 
 
    //console.log(newArray); 
 
    //Loop through newArray 
 
    for (var itl = 0; itl < newArray.length; itl++) { 
 
     var item = newArray[itl]; 
 
     var values = item.split(" - "); 
 

 
     var firstArrayItem = jaggedArray[values[0]]; 
 
     var secondArrayItem = jaggedArray[values[1]]; 
 
     var phraseStartPoint = []; 
 

 
     //for every word in firstItem 
 
     for (var iy = 0; iy < firstArrayItem.length - 1; iy++) { 
 

 
      var t = iy + 1; 
 
      // check if that word and next word exist in second array 
 
      if (secondArrayItem.toString().indexOf(firstArrayItem[iy] + "," + firstArrayItem[t]) >= 0) { 
 
       // if they do exist, get the indexes of these and store in local array, if they are not there, since we do not want repeating words later. 
 
       if (phraseStartPoint.indexOf(iy) < 0) 
 
        phraseStartPoint.push(iy); 
 
       if (phraseStartPoint.indexOf(t) < 0) 
 
        phraseStartPoint.push(t); 
 
      } 
 
     } 
 

 
     var str = ""; 
 
     // Prepare the phrase from the local array and push into phrases array, if it not exists there. 
 
     for (var ifinalLoop = 0; ifinalLoop < phraseStartPoint.length; ifinalLoop++) { 
 
      str = str + firstArrayItem[phraseStartPoint[ifinalLoop]] + " "; 
 
     } 
 

 
     if (phrases.indexOf(str) < 0) 
 
      phrases.push(str); 
 

 
    } 
 
    return phrases; 
 
} 
 

 
var stringsArray = [ 
 
"I want a dog", 
 
"A dog is here", 
 
"Pet a dog is cute", 
 
"A red cat is here", 
 
"red cat is cute" 
 
]; 
 

 
var result = GetPhrases(stringsArray); 
 
// Print the phrases array. 
 
for (var iPhrase = 0; iPhrase < result.length; iPhrase++) { 
 
    console.log(result[iPhrase]); 
 
}

0

使用正則表達式,您可以檢測字符串中的重複項。
根據這個表達式: (?:.*?)(\b\w.{3,}\b)(?:.*?)(\1)

,如果你正在尋找兩次相同的模式它纔會起作用。
注意:您可以使用任何其他整數替換3中的{3,}並查看更改。 該參數約束了您要查找兩次的最小字符串長度。