我知道這個帖子已經過時了,但是我上週給了這個試試,這是背面的痛苦。這遠非完美,但這是我想出的。
的片段生成:
public static string SelectKeywordSnippets(string StringToSnip, string[] Keywords, int SnippetLength)
{
string snippedString = "";
List<int> keywordLocations = new List<int>();
//Get the locations of all keywords
for (int i = 0; i < Keywords.Count(); i++)
keywordLocations.AddRange(SharedTools.IndexOfAll(StringToSnip, Keywords[i], StringComparison.CurrentCultureIgnoreCase));
//Sort locations
keywordLocations.Sort();
//Remove locations which are closer to each other than the SnippetLength
if (keywordLocations.Count > 1)
{
bool found = true;
while (found)
{
found = false;
for (int i = keywordLocations.Count - 1; i > 0; i--)
if (keywordLocations[i] - keywordLocations[i - 1] < SnippetLength/2)
{
keywordLocations[i - 1] = (keywordLocations[i] + keywordLocations[i - 1])/2;
keywordLocations.RemoveAt(i);
found = true;
}
}
}
//Make the snippets
if (keywordLocations.Count > 0 && keywordLocations[0] - SnippetLength/2 > 0)
snippedString = "... ";
foreach (int i in keywordLocations)
{
int stringStart = Math.Max(0, i - SnippetLength/2);
int stringEnd = Math.Min(i + SnippetLength/2, StringToSnip.Length);
int stringLength = Math.Min(stringEnd - stringStart, StringToSnip.Length - stringStart);
snippedString += StringToSnip.Substring(stringStart, stringLength);
if (stringEnd < StringToSnip.Length) snippedString += " ... ";
if (snippedString.Length > 200) break;
}
return snippedString;
}
它會發現所有的關鍵字的索引示例文本
private static List<int> IndexOfAll(string haystack, string needle, StringComparison Comparison)
{
int pos;
int offset = 0;
int length = needle.Length;
List<int> positions = new List<int>();
while ((pos = haystack.IndexOf(needle, offset, Comparison)) != -1)
{
positions.Add(pos);
offset = pos + length;
}
return positions;
}
這是在其執行一個有點笨拙的功能。它的工作方式是通過查找字符串中所有關鍵字的位置。然後檢查沒有任何關鍵字比期望的片段長度更接近彼此,以便片段不會重疊(這就是它有點兒可能......)。然後抓住以關鍵字位置爲中心的所需長度的子串並將整個東西拼接在一起。
我知道這是晚年,但如果只是張貼它可能會幫助別人過這個問題來了。
感嘆......又浪費了150分...... – 2010-07-27 20:44:26
?那是什麼意思? – 2010-08-17 18:57:30
對於任何對這個問題感興趣的人,都有一個更新的語言不可知的問題,它的回答比任何這個問題都要高:** [給出一個文檔,選擇一個相關的代碼片斷](http://stackoverflow.com/questions/2829303)** – hippietrail 2012-10-20 18:02:06