0
我正在開發一個非常基本的網絡搜索引擎,它有幾個部分。根據用戶查詢檢索結果後,我想計算每個結果的比率,然後按計算得出的比率對結果進行排序。這裏是我的查詢:根據用戶查詢對結果進行排序的簡單評級算法
var tmpQuery = (from urls in _context.Urls
join documents in _context.Documents
on urls.UrlId equals documents.DocumentId
let words = (from words in _context.Words
join hits in _context.Hits
on words.WordId equals hits.WordId
where hits.DocumentId == documents.DocumentId
select words.Text)
select new { urls, documents, words });
var results = (from r in tmpQuery.AsEnumerable()
where r.urls.ResolvedPath.Contains(breakedQuery, KeywordParts.Url, part) ||
r.documents.Title.Contains(breakedQuery, KeywordParts.Title, part) ||
r.documents.Keywords.Contains(breakedQuery, KeywordParts.Keywords, part) ||
r.documents.Description.Contains(breakedQuery, Description, part) ||
r.words.Contains(breakedQuery, KeywordParts.Content, part)
select new SearchResult()
{
UrlId = r.urls.UrlId,
Url = r.urls.ResolvedPath,
IndexedOn = r.documents.IndexedOn,
Title = r.documents.Title,
Description = r.documents.Description,
Host = new Uri(r.urls.ResolvedPath).Host,
Length = r.documents.Length,
Rate = 0CalculateRating(breakedQuery, r.urls.ResolvedPath, r.documents.Title, r.documents.Keywords, r.documents.Description, r.words)
}).AsEnumerable()
.OrderByDescending(result => result.Rate)
.Distinct(new SearchResultEqualityComparer());
和速率是通過這種方法計算:
private int CalculateRating(IEnumerable<string> breakedQuery, string resolvedPath, string title, string keywords, string description, IEnumerable<string> words)
{
var baseRate = 0;
foreach (var query in breakedQuery)
{
/*first I'm breaking up user raw query (Microsoft -Apple) to list of broken
queries (Microsoft, -Apple) if broken query start with - that means
results shouldn't have*/
var none = (query.StartsWith("-"));
string term = query.Replace("-", "");
var pathCount = Calculate(resolvedPath, term);
var titleCount = Calculate(title, term);
var keywordsCount = Calculate(keywords, term);
var descriptionCount = Calculate(description, term);
var wordsCount = Calculate(words, term);
var result = (pathCount * 100) + (titleCount * 50) + (keywordsCount * 25) + (descriptionCount * 10) + (wordsCount);
if (none)
baseRate -= result;
else
baseRate += result;
}
return baseRate;
}
private int Calculate(string source, string query)
{
if (!string.IsNullOrWhiteSpace(source))
return Calculate(source.Split(' ').AsEnumerable<string>(), query);
return 0;
}
private int Calculate(IEnumerable<string> sources, string query)
{
var count = 0;
if (sources != null && sources.Count() > 0)
{
//to comparing two strings
//first case sensitive
var elements = sources.Where(source => source == query);
count += elements.Count();
//second case insensitive (half point of sensitive)
count += sources.Except(elements).Where(source => source.ToLowerInvariant() == query.ToLowerInvariant()).Count()/2;
}
return count;
}
請指引我以提高性能(我的搜索引擎的速度是非常非常低)
是的,實際上是對第二個查詢執行真正的過濾。請看看這裏知道爲什麼我使用這個簽名http://stackoverflow.com/questions/3274648/method-boolean-contains-has-no-supported-translation-to-sql – Sadegh 2010-07-18 09:11:08
爲測試porpuses超過1176頁, 57283個網址,35733個單詞和330621個點擊(此處保存的單詞和文檔之間的關係) – Sadegh 2010-07-18 09:12:20
我希望在存儲過程中嘗試儘可能多地完成這項工作會更好。 – 2010-07-18 21:57:13