2017-02-08 72 views
0

我試圖指數具有以下格式(每個「信息」字段的開始和結束的數據1400個文檔的XML是文檔的單獨實體。索引內部XML每個文檔字段分別作爲單獨的文件

<add> 
<information> 
    <id>a1a</id> 
    <author>abcd</author> 
    <bibliography>a. b. ,c</bibliography> 
    <body>This sample one.</body> 
    <title>Sample one</title> 
</information> 
<information> 
    <id>a2a</id> 
    <author>xyz</author> 
    <bibliography>x. y.x</bibliography> 
    <body>This is sample two</body> 
    <title>Sample Two</title> 
</information> 
</add> 

我用命令提示使用交命令提交文件,JAVA -Durl = http://localhost:8983/solr/update/extract?literal.id=VR -dtype =應用/ XML的罐子post.jar VR.xml

Schema.xml的(直到字段聲明)

<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> 

    <field name="bibliography" type="string" indexed="true" stored="true"/> 
    <field name="body" type="text_en" indexed="true" stored="true"/> 

    <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> 
    <field name="name" type="text_general" indexed="true" stored="true"/> 
    <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> 
    <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> 
    <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> 
    <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> 

    <field name="weight" type="float" indexed="true" stored="true"/> 
    <field name="price" type="float" indexed="true" stored="true"/> 
    <field name="popularity" type="int" indexed="true" stored="true" /> 
    <field name="inStock" type="boolean" indexed="true" stored="true" /> 

    <field name="store" type="location" indexed="true" stored="true"/> 

    <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> 
    <field name="subject" type="text_general" indexed="true" stored="true"/> 
    <field name="description" type="text_general" indexed="true" stored="true"/> 
    <field name="comments" type="text_general" indexed="true" stored="true"/> 
    <field name="author" type="text_general" indexed="true" stored="true"/> 
    <field name="keywords" type="text_general" indexed="true" stored="true"/> 
    <field name="category" type="text_general" indexed="true" stored="true"/> 
    <field name="resourcename" type="text_general" indexed="true" stored="true"/> 
    <field name="url" type="text_general" indexed="true" stored="true"/> 
    <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> 
    <field name="last_modified" type="date" indexed="true" stored="true"/> 
    <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> 


    <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> 


    <!-- catchall field, containing all other searchable text fields (implemented 
     via copyField further on in this schema --> 
    <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> 

    <!-- catchall text field that indexes tokens both normally and in reverse for efficient 
     leading wildcard queries. --> 
    <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> 

    <!-- non-tokenized version of manufacturer to make it easier to sort or group 
     results by manufacturer. copied from "manu" via copyField --> 
    <field name="manu_exact" type="string" indexed="true" stored="false"/> 

    <field name="payloads" type="payloads" indexed="true" stored="true"/> 



    <dynamicField name="*_i" type="int" indexed="true" stored="true"/> 
    <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_s" type="string" indexed="true" stored="true" /> 
    <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_l" type="long" indexed="true" stored="true"/> 
    <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> 
    <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> 
    <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_f" type="float" indexed="true" stored="true"/> 
    <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_d" type="double" indexed="true" stored="true"/> 
    <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/> 

    <!-- Type used to index the lat and lon components for the "location" FieldType --> 
    <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" /> 

    <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> 
    <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/> 
    <dynamicField name="*_p" type="location" indexed="true" stored="true"/> 

    <!-- some trie-coded dynamic fields for faster range queries --> 
    <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> 
    <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> 
    <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> 
    <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> 
    <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/> 

    <dynamicField name="*_c" type="currency" indexed="true" stored="true"/> 

    <dynamicField name="ignored_*" type="ignored" multiValued="true"/> 
    <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> 

    <dynamicField name="random_*" type="random" /> 

<uniqueKey>id</uniqueKey> 

所以,在這裏我已經添加了默認配置在xml中提到的所有額外字段。

索引完成之後,如果我在這裏把查詢,給出的響應是

{ 「ID」: 「VR」, 「CONTENT_TYPE」:[ 「應用程序/ xml」 的 ], 「內容「:[ 」\ n \ n \ n \ n \ n \ n \ n \ n \ n \ n a1a \ n abcd \ n ab,c \ n此示例一。\ n示例一\ n \ n \ n a2a \ n xyz \ n xyx \ n這是樣本2 \ n樣本2 \ n \ n \ n「 」, 「版本」:1558754454898999300

效應初探預期是:

{ 「ID」: 「ABCD」, 「CONTENT_TYPE」:[ 「應用程序/ xml」 的 ], 「內容」:[ 「\ n \ n \ n \ n \ n \ n \ n \ n \ n a1a \ n abcd \ n a。灣,C \ n這個樣品一個\ n個採樣一個\ n \ n \ n A2A \ n 「個 ],

"_version_": 1558754454898999300 

{ 」ID「: 」XYZ「, 」CONTENT_TYPE「:[ 」 application/xml「 ], 」content「:[ 」xyz \ n x。 YX \ n這個是樣品2 \ n個採樣兩個\ n \ n \ n 「個 ], 」 「:1558754454898999301

讓我知道,我在哪裏丟失,請提出要求congigurations,如果有的話額外的信息是需要隨時問

+0

什麼反應,如果你嘗試此http://本地主機:8983/Solr的/集合名稱/選擇Q = *%* 3A對不起 –

回答

0

我認爲,一切都被索引爲一個文件。

變化<information></information>標籤<doc></doc>

<add> 
<doc> 
    <id>a1a</id> 
    <author>abcd</author> 
    <bibliography>a. b. ,c</bibliography> 
    <body>This sample one.</body> 
    <title>Sample one</title> 
</doc> 
<doc> 
    <id>a2a</id> 
    <author>xyz</author> 
    <bibliography>x. y.x</bibliography> 
    <body>This is sample two</body> 
    <title>Sample Two</title> 
</doc> 
</add> 
+0

,對於查詢仍然是相同的。任何其他解決方案,如果你有? –

0

轉換您的XML以下格式:

/opt/solr/bin/post -c collection_name VR.xml 
+0

對不起,仍然是相同的查詢響應。任何其他解決方案,如果你有? –

0

的XML:

<add> 
    <doc> 
    <field name="id">a1a</field> 
    <field name="author">abcd</field> 
    <field name="bibliography">a. b. ,c</field> 
    <field name="body">This sample one.</field> 
    </doc> 

    <doc> 
    <field name="id">a2a</field> 
    <field name="author">xyz</field> 
    <field name="bibliography">x. y. ,z</field> 
    <field name="body">Sample Two.</field> 
    </doc> 
</add> 

現在你可以使用下面的命令發佈數據Ashraful的答案是正確的:

<add> 
    <doc> 
    <field name="id">a1a</field> 
    <field name="author">abcd</field> 
    <field name="bibliography">a. b. ,c</field> 
    <field name="body">This sample one.</field> 
    </doc> 

    <doc> 
    <field name="id">a2a</field> 
    <field name="author">xyz</field> 
    <field name="bibliography">x. y. ,z</field> 
    <field name="body">Sample Two.</field> 
    </doc> 
</add> 

你的崗位上ExtractingRequestHandlerliteral.id=VR創建了idVR /更新單個文件。

剛剛發佈的java -jar post.jar VR.xml會給你下面的結果:

"docs": [ 
    { 
    "id": "a1a", 
    "author": "abcd", 
    "author_s": "abcd", 
    "bibliography": "a. b. ,c", 
    "body": "This sample one.", 
    "_version_": 1558910648654495700 
    }, 
    { 
    "id": "a2a", 
    "author": "xyz", 
    "author_s": "xyz", 
    "bibliography": "x. y. ,z", 
    "body": "Sample Two.", 
    "_version_": 1558910648656593000 
    } 
+0

非常感謝你......它工作得很好。用新的命令。 –

+0

太棒了!你可以接受爲答案。 –

相關問題