2017-03-02 43 views
0

我有許多領域的指標,一個場「ServiceCategories」有類似這樣的數據:自定義「選項卡」標記者在ElasticSearch NEST 2.4

|管理案例|發育殘疾

我需要通過分隔符「|」分解數據我試圖用這個可以這樣做:

var descriptor = new CreateIndexDescriptor(_DataSource.ToLower()) 
     .Mappings(ms => ms 
      .Map<ProviderContent>(m => m 
       .AutoMap() 
       .Properties(p => p 
        .String(s => s 
         .Name(n => n.OrganizationName) 
         .Fields(f => f 
          .String(ss => ss.Name("raw").NotAnalyzed()))) 
        .String(s => s 
         .Name(n => n.ServiceCategories) 
         .Analyzer("tab_delim_analyzer")) 
        .GeoPoint(g => g.Name(n => n.Location).LatLon(true))))) 
     .Settings(st => st 
      .Analysis(an => an 
       .Analyzers(anz => anz 
        .Custom("tab_delim_analyzer", td => td 
         .Filters("lowercase") 
        .Tokenizer("tab_delim_tokenizer"))) 
       .Tokenizers(t => t 
        .Pattern("tab_delim_tokenizer", tdt => tdt 
         .Pattern("|"))))); 
    _elasticClientWrapper.CreateIndex(descriptor); 

我對ServiceCategories搜索代碼(serviceCategories到ES)使用一個簡單TermQuery設置爲小寫的值。

它沒有得到使用這個搜索參數的結果(其他工作正常)。預期的結果是至少從上述一個術語得到完全匹配。

我試圖得到它使用的是經典標記生成器以及工作:

var descriptor = new CreateIndexDescriptor(_DataSource.ToLower()) 
     .Mappings(ms => ms 
      .Map<ProviderContent>(m => m 
       .AutoMap() 
       .Properties(p => p 
        .String(s => s 
         .Name(n => n.OrganizationName) 
         .Fields(f => f 
          .String(ss => ss.Name("raw").NotAnalyzed()))) 
        .String(s => s 
         .Name(n => n.ServiceCategories) 
         .Analyzer("classic_tokenizer") 
         .SearchAnalyzer("standard")) 
        .GeoPoint(g => g.Name(n => n.Location).LatLon(true))))) 
     .Settings(s => s 
      .Analysis(an => an 
       .Analyzers(a => a.Custom("classic_tokenizer", ca => ca 
        .Tokenizer("classic"))))); 

這也不工作。任何人都可以幫助我確定我要出錯的地方嗎?

這裏的搜索請求:

### ES REQEUST ### 
{ 
    "from": 0, 
    "size": 10, 
    "sort": [ 
    { 
     "organizationName": { 
     "order": "asc" 
     } 
    } 
    ], 
    "query": { 
    "bool": { 
     "must": [ 
     { 
      "match_all": {} 
     }, 
     { 
      "term": { 
      "serviceCategories": { 
       "value": "developmental disabilities" 
      } 
      } 
     } 
     ] 
    } 
    } 
} 

回答

1

您的tab_delim_tokenizer模式接近,但並不完全正確:)看到這是使用分析API理解的分析將如何記號化一塊最簡單的方法的文字。有了你的第一個映射,我們可以檢查一下自定義分析儀確實

client.Analyze(a => a 
    .Index(_DataSource.ToLower()) 
    .Analyzer("tab_delim_analyzer") 
    .Text("|Case Management|Developmental Disabilities") 
); 

返回(剪斷,爲了簡潔)

{ 
    "tokens" : [ { 
    "token" : "|", 
    "start_offset" : 0, 
    "end_offset" : 1, 
    "type" : "word", 
    "position" : 0 
    }, { 
    "token" : "c", 
    "start_offset" : 1, 
    "end_offset" : 2, 
    "type" : "word", 
    "position" : 1 
    }, { 
    "token" : "a", 
    "start_offset" : 2, 
    "end_offset" : 3, 
    "type" : "word", 
    "position" : 2 
    }, { 
    "token" : "s", 
    "start_offset" : 3, 
    "end_offset" : 4, 
    "type" : "word", 
    "position" : 3 
    }, ... ] 
} 

證明該tab_delim_tokenizer沒有標記化而我們如何期待。一個小小的改變通過在\模式中轉義|來修復此問題,並通過以@爲前綴使該模式成爲逐字字符串文字。

這裏有一個完整的例子

void Main() 
{ 
    var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200")); 
    var defaultIndex = "default-index"; 
    var connectionSettings = new ConnectionSettings(pool) 
      .DefaultIndex(defaultIndex); 

    var client = new ElasticClient(connectionSettings); 

    if (client.IndexExists(defaultIndex).Exists) 
     client.DeleteIndex(defaultIndex); 

    var descriptor = new CreateIndexDescriptor(defaultIndex) 
     .Mappings(ms => ms 
      .Map<ProviderContent>(m => m 
       .AutoMap() 
       .Properties(p => p 
        .String(s => s 
         .Name(n => n.OrganizationName) 
         .Fields(f => f 
          .String(ss => ss.Name("raw").NotAnalyzed()))) 
        .String(s => s 
         .Name(n => n.ServiceCategories) 
         .Analyzer("tab_delim_analyzer") 
        ) 
        .GeoPoint(g => g 
         .Name(n => n.Location) 
         .LatLon(true) 
        ) 
       ) 
      ) 
     ) 
     .Settings(st => st 
      .Analysis(an => an 
       .Analyzers(anz => anz 
        .Custom("tab_delim_analyzer", td => td 
         .Filters("lowercase") 
         .Tokenizer("tab_delim_tokenizer") 
        ) 
       ) 
       .Tokenizers(t => t 
        .Pattern("tab_delim_tokenizer", tdt => tdt 
         .Pattern(@"\|") 
        ) 
       ) 
      ) 
     ); 

    client.CreateIndex(descriptor); 

    // check our custom analyzer does what we think it should 
    client.Analyze(a => a 
     .Index(defaultIndex) 
     .Analyzer("tab_delim_analyzer") 
     .Text("|Case Management|Developmental Disabilities") 
    ); 

    // index a document and make it immediately available for search 
    client.Index(new ProviderContent 
    { 
     OrganizationName = "Elastic", 
     ServiceCategories = "|Case Management|Developmental Disabilities" 
    }, i => i.Refresh()); 


    // search for our document. Use a term query in a bool filter clause 
    // as we don't need scoring (probably) 
    client.Search<ProviderContent>(s => s 
     .From(0) 
     .Size(10) 
     .Sort(so => so 
      .Ascending(f => f.OrganizationName) 
     ) 
     .Query(q => +q 
      .Term(f => f.ServiceCategories, "developmental disabilities")   
     ) 
    ); 

} 

public class ProviderContent 
{ 
    public string OrganizationName { get; set; } 

    public string ServiceCategories { get; set; } 

    public GeoLocation Location { get; set; } 
} 

搜索結果返回

{ 
    "took" : 2, 
    "timed_out" : false, 
    "_shards" : { 
    "total" : 5, 
    "successful" : 5, 
    "failed" : 0 
    }, 
    "hits" : { 
    "total" : 1, 
    "max_score" : null, 
    "hits" : [ { 
     "_index" : "default-index", 
     "_type" : "providercontent", 
     "_id" : "AVqNNqlQpAW_5iHrnIDQ", 
     "_score" : null, 
     "_source" : { 
     "organizationName" : "Elastic", 
     "serviceCategories" : "|Case Management|Developmental Disabilities" 
     }, 
     "sort" : [ "elastic" ] 
    } ] 
    } 
} 
+0

完善和簡單!我在這一段時間裏一直在絞盡腦汁!謝謝。最後一個問題是......分析儀 - 這個迴歸的對象是什麼,所以我知道如何在將來最好地處理它? – Michael

+0

@Michael不確定你的意思 - 「.Analyze()'方法調用返回的是什麼?它返回一個'IAnalyzeResponse' –