2017-01-18 85 views
1

我的印象是,當搜索短語用雙引號引起時,它會執行精確搜索。但我也得到部分比賽(儘管得分很低)。我期待它應該完全匹配。以下是我的示例代碼..我思念的東西Marklogic搜索語法問題

xquery version "1.0-ml"; 
declare namespace html = "http://www.w3.org/1999/xhtml"; 
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy"; 
import module namespace functx = "http://www.functx.com" at "/MarkLogic/functx/functx-1.0-doc-2007-01.xqy"; 

let $q := '(「protein degradation」) AND ((context:PCS)) AND (sort:date_desc)' 

let $options := 
    <options xmlns="http://marklogic.com/appservices/search"> 
    <additional-query> 
     <cts:collection-query xmlns:cts="http://marklogic.com/cts"> 
      <cts:uri>http://XXXXX/type/envelope</cts:uri> 
     </cts:collection-query> 
    </additional-query> 
    <operator name="sort"> 
     <state name="date_desc"> 
      <sort-order type="xs:dateTime" direction="descending"> 
       <field name="upload_date"/> 
      </sort-order> 
     </state> 
     <state name="date_asc"> 
      <sort-order type="xs:dateTime" direction="ascending"> 
       <field name="upload_date"/> 
      </sort-order> 
     </state> 
    </operator> 
    <constraint name="context"> 
     <range type="xs:string" facet="true"> 
      <element name="context" ns="http://XXXXX/metadata"/> 
      <facet-option>frequency-order</facet-option> 
      <facet-option>descending</facet-option> 
     </range> 
    </constraint> 
    <constraint name="type"> 
     <range type="xs:string" facet="true"> 
      <element name="type" ns="http://XXXXX/metadata"/> 
      <facet-option>frequency-order</facet-option> 
      <facet-option>descending</facet-option> 
     </range> 
    </constraint> 
    <term> 
     <term-option>case-insensitive</term-option> 
     <term-option>punctuation-insensitive</term-option> 
     <term-option>whitespace-insensitive</term-option> 
     <term-option>wildcarded</term-option> 
    </term> 
    <search-option>unfiltered</search-option> 
</options> 

let $start := 1 
let $page-length :=1 

let $result := search:search($q, $options, $start, $page-length) 
return $result 

以下是我得到了什麼..我很困惑..我不爲什麼下面的結果來作爲一個命中

<search:result index="1" uri="/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml" path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")" score="58624" confidence="0.329381" fitness="0.5856407"> 
    <search:snippet> 
    <search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:metadata/*:context"> 
     <search:highlight>PCS</search:highlight> 
    </search:match> 
    <search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[1]"> 
Analysis of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in LCIG RD12714 ra-rd12714-hydrazine</search:match> 
    <search:match path="fn:doc("/documents/PCS/0ba1e4a0190b77a3962e1218c3c1a7f4cb233ddf.xml")/*:document-envelope/*:extractedText/*:html/*:body/*:p[9]">...of the Safety Risks Associated with Hydrazine as a <search:highlight>Degradation</search:highlight> Product in...</search:match> 
    </search:snippet> 
</search:result> 

如果我們在上面的結果中注意到它匹配<search:highlight>Degradation</search:highlight> ...當我們嘗試進行精確搜索時,爲什麼它會進行部分匹配?

-----增加的搜索:解析輸出------

<cts:and-query xmlns:cts="http://marklogic.com/cts" xmlns:search="http://marklogic.com/appservices/search"> 
    <cts:word-query> 
    <cts:text xml:lang="en">「protein</cts:text> 
    <cts:option>case-insensitive</cts:option> 
    <cts:option>punctuation-insensitive</cts:option> 
    <cts:option>whitespace-insensitive</cts:option> 
    <cts:option>wildcarded</cts:option> 
    </cts:word-query> 
    <cts:word-query> 
    <cts:text xml:lang="en">degradation」</cts:text> 
    <cts:option>case-insensitive</cts:option> 
    <cts:option>punctuation-insensitive</cts:option> 
    <cts:option>whitespace-insensitive</cts:option> 
    <cts:option>wildcarded</cts:option> 
    </cts:word-query> 
    <cts:element-range-query operator="="> 
    <cts:element xmlns:_1="http://XXXXX/metadata">_1:context</cts:element> 
    <cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">PCS</cts:value> 
    <cts:option>collation=http://marklogic.com/collation/</cts:option> 
    </cts:element-range-query> 
    <cts:annotation operator-ref="sort" state-ref="date_desc"> 
    </cts:annotation> 
</cts:and-query> 
+0

請分享您的查詢和選項的搜索輸出:解析。這是調試字符串搜索的好方法。 –

+0

添加搜索:解析輸出 – Ravi

回答

2

我認爲這個問題是看中報價:

import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy"; 

search:parse('"protein degradation"') 

給出:

<cts:word-query xmlns:cts="http://marklogic.com/cts"> 
    <cts:text xml:lang="en">protein degradation</cts:text> 
</cts:word-query> 

同時:

import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy"; 

search:parse('「protein degradation」') 

給出:

<cts:and-query xmlns:cts="http://marklogic.com/cts"> 
    <cts:word-query> 
    <cts:text xml:lang="en">「protein</cts:text> 
    </cts:word-query> 
    <cts:word-query> 
    <cts:text xml:lang="en">degradation」</cts:text> 
    </cts:word-query> 
</cts:and-query> 
+0

謝謝..應該檢查解析函數 – Ravi

0

我會建議你使用術語選項稱爲「精確」

從文檔: 「確切」 精確匹配查詢。 「區分大小寫」,「區分大小寫敏感」,「標點符號敏感」,「空白敏感」,「未定型」和「無僞裝」的速記。

+0

嗯...所以你的建議是如果有報價,然後我加'確切 Ravi

+0

我在考慮術語選項適用於整個搜索字符串,而不僅僅是短語,正確嗎?所以這隻適用於雙引號圍繞整個搜索字符串的情況。 。 。 –

+0

是的,這是我的想法...如果只有部分搜索字符串有引號,那麼會發生什麼? – Ravi