2009-10-02 30 views
2

我想創建一個搜索網站來搜索文檔(各種格式,包括pdf),圖像,視頻和音頻。我也希望能夠根據作者姓名,日期等條件篩選我的搜索結果。lucene或sql全文?

我在.NET中這樣做,那麼啓動和運行最簡單的方法是什麼? SQL全文搜索似乎很誘人,因爲我熟悉sql,而且由於我想過濾搜索結果,所以很容易爲每個項目存儲過濾字段。

+0

我還需要從每個搜索結果獲得片段 – Prabhu

回答

4

如果您主要關心的是如何快速輕鬆地啓動並運行SQL搜索,那麼SQL全文搜索無疑是最佳選擇。

Lucene.NET有其優點,但它決不是在公園散步正確設置。文檔有點缺乏,網絡上的示例數量非常有限。

+0

感謝......你知道,如果有可能與SQL全文搜索返回的搜索結果的摘要嗎? – Prabhu

+0

是的,你可以做到這一點。但是你需要一個存儲過程。看看這個例子: –

0

內容片斷存儲過程:

CREATE PROCEDURE SimpleCommentar 
    @SearchTerm nvarchar(100), 
    @Style nvarchar(200) 
AS 
BEGIN 
    CREATE TABLE #match_docs 
    (
    doc_id bigint NOT NULL PRIMA 
); 
    INSERT INTO #match_docs 
    (
    doc_id 
) 
    SELECT DISTINCT 
    Commentary_ID 
    FROM Commentary 
    WHERE FREETEXT 
    (
    Commentary, 
    @SearchTerm, 
    LANGUAGE N'English' 
); 
    DECLARE @db_id int = DB_ID(), 
    @table_id int = OBJECT_ID(N' 
    @column_id int = 
    (
     SELECT 
     column_id 
     FROM sys.columns 
     WHERE object_id = OBJECT_I 
     AND name = N'Commentary' 
    ); 
    SELECT 
    s.Commentary_ID, 
    t.Title, 
    MIN 
    (
     N'...' + SUBSTRING 
     (
     REPLACE 
      (
      c.Commentary, 
      s.Display_Term, 
N'<span style="' + @Style + '">' + s.Display_Term + '</span>' 
     ), 
     s.Pos - 512, 
     s.Length + 1024 
    ) + N'...' 
    ) AS Snippet 
    FROM 
    (
     SELECT DISTINCT 
     c.Commentary_ID, 
     w.Display_Term, 
     PATINDEX 
      (
      N'%[^a-z]' + w.Display_Term + N'[^a-z]%', 
      c.Commentary 
     ) AS Pos, 
     LEN(w.Display_Term) AS Length 
     FROM sys.dm_fts_index_keywords_by_document 
     (
      @db_id, 
      @table_id 
     ) w 
     INNER JOIN dbo.Commentary c 
     ON w.document_id = c.Commentary_ID 
     WHERE w.column_id = @column_id 
     AND EXISTS 
      (
      SELECT 1 
      FROM #match_docs m 
      WHERE m.doc_id = w.document_id 
     ) 
     AND EXISTS 
      (
      SELECT 1 
      FROM sys.dm_fts_parser 
       (
       N'FORMSOF(FREETEXT, "' + @SearchTerm + N'")', 
       1033, 
       0, 
       1 
      ) p 
      WHERE p.Display_Term = w.Display_Term 
     ) 
    ) s 
    INNER JOIN dbo.Commentary c 
    ON s.Commentary_ID = c.Commentary_ID 
INNER JOIN dbo.Book_Commentary bc 
    ON c.Commentary_ID = bc.Commentary_ID 
    INNER JOIN dbo.Book_Title bt 
    ON bc.Book_ID = bt.Book_ID 
    INNER JOIN dbo.Title t 
    ON bt.Title_ID = t.Title_ID 
    WHERE t.Is_Primary_Title = 1 
    GROUP BY 
    s.Commentary_ID, 
    t.Title; 
    DROP TABLE #match_docs; 
END;