把下面的代碼在名爲THISfile.py文件並執行它,看看有什麼是不:
# myFile = input("Enter file name: ")
# line No 2: line with with double 'with'
# line No 3: double (word , word) is not a double word
myFile="THISfile.py"
lstUniqueWords = []
noOfFoundWordDoubles = 0
totalNoOfWords = 0
lineNo = 0
lstLineNumbersWithWordDoubles = []
with open(myFile, "r") as myFile:
for line in myFile:
lineNo+=1 # memorize current line number
lineWords = line.split()
if len(lineWords) > 0: # scan line only if it contains words
currWord = lineWords[0] # remember already 'visited' word
totalNoOfWords += 1
if currWord not in lstUniqueWords:
lstUniqueWords.append(currWord)
# put 'visited' word word into lstAllWordsINmyFile (if it is not already there)
lastWord = currWord # we are done with current, so current becomes last one
if len(lineWords) > 1 : # proceed only if line has two or more words
for word in lineWords[1:] : # loop over all other words
totalNoOfWords += 1
currWord = word
if currWord not in lstUniqueWords:
lstUniqueWords.append(currWord)
# put 'visited' word into lstAllWordsINmyFile (if it is not already there)
if(currWord == lastWord): # duplicate word found:
noOfFoundWordDoubles += 1
print("Found double word: ['{""}'] in line {}".format(currWord, lineNo))
lstLineNumbersWithWordDoubles.append(lineNo)
lastWord = currWord
# ^--- now after all all work is done, the currWord is considered lastWord
print(
"noOfDoubles", noOfFoundWordDoubles, "\n",
"totalNoOfWords", totalNoOfWords, "uniqueWords", len(lstUniqueWords), "\n",
"linesWithDoubles", lstLineNumbersWithWordDoubles
)
輸出應該是:
Found double word: ['with'] in line 2
Found double word: ['word'] in line 19
Found double word: ['all'] in line 33
noOfDoubles 3
totalNoOfWords 221 uniqueWords 111
linesWithDoubles [2, 19, 33]
現在您可以查看代碼中的註釋以更好地瞭解它的工作原理。玩得開心:)編碼
剛復位'LST = []'在每行迭代。 –
@ Jean-FrançoisFabre,它可以檢測任何重複的單詞,而不僅僅是相鄰的單詞。 – Maciek