2017-02-21 54 views
2

我一直在使用mongo 3.2.9安裝進行一些實時數據調查。主要關鍵是要找出文件中缺少數據的記錄的一些細節。但我正在運行的查詢是在robomongo和指南針中超時。

我有一個包含超過300萬條記錄的集合(foo)。我在尋找所有不具有barId的記錄,這是我在蒙戈發射查詢:

db.foo.find({barId:{$exists:true}}).explain(true) 

從蒙戈外殼,這是執行計劃(超時在robomongo或羅盤)

MongoDB Enterprise > db.foo.find({barId:{$exists:true}}).explain(true) 
{ 
    "queryPlanner" : { 
    "plannerVersion" : 1, 
    "namespace" : "myDatabase01.foo", 
    "indexFilterSet" : false, 
    "parsedQuery" : { 
     "barId" : { 
     "$exists" : true 
     } 
    }, 
    "winningPlan" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "barId" : { 
      "$exists" : true 
     } 
     }, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, MaxKey]" 
      ] 
     } 
     } 
    }, 
    "rejectedPlans" : [ ] 
    }, 
    "executionStats" : { 
    "executionSuccess" : true, 
    "nReturned" : 2, 
    "executionTimeMillis" : 154716, 
    "totalKeysExamined" : 3361040, 
    "totalDocsExamined" : 3361040, 
    "executionStages" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "barId" : { 
      "$exists" : true 
     } 
     }, 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 152060, 
     "works" : 3361041, 
     "advanced" : 2, 
     "needTime" : 3361038, 
     "needYield" : 0, 
     "saveState" : 27619, 
     "restoreState" : 27619, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "docsExamined" : 3361040, 
     "alreadyHasObj" : 0, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "nReturned" : 3361040, 
     "executionTimeMillisEstimate" : 1260, 
     "works" : 3361041, 
     "advanced" : 3361040, 
     "needTime" : 0, 
     "needYield" : 0, 
     "saveState" : 27619, 
     "restoreState" : 27619, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, MaxKey]" 
      ] 
     }, 
     "keysExamined" : 3361040, 
     "dupsTested" : 0, 
     "dupsDropped" : 0, 
     "seenInvalidated" : 0 
     } 
    }, 
    "allPlansExecution" : [ ] 
    }, 
    "serverInfo" : { 
    "host" : "myLinuxMachine", 
    "port" : 8080, 
    "version" : "3.2.9", 
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c" 
    }, 
    "ok" : 1 
} 

它看起來它使用我barId_1指數,但同時它的所有掃描300萬條記錄只返回2.

我跑了類似的查詢,但像而不是找場的存在我查找了大於0的ID(全部是)

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}).explain(true) 
{ 
    "queryPlanner" : { 
    "plannerVersion" : 1, 
    "namespace" : "myDatabase01.foo", 
    "indexFilterSet" : false, 
    "parsedQuery" : { 
     "barId" : { 
     "$gt" : "0" 
     } 
    }, 
    "winningPlan" : { 
     "stage" : "FETCH", 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "(\"0\", {})" 
      ] 
     } 
     } 
    }, 
    "rejectedPlans" : [ ] 
    }, 
    "executionStats" : { 
    "executionSuccess" : true, 
    "nReturned" : 2, 
    "executionTimeMillis" : 54, 
    "totalKeysExamined" : 2, 
    "totalDocsExamined" : 2, 
    "executionStages" : { 
     "stage" : "FETCH", 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 3, 
     "advanced" : 2, 
     "needTime" : 0, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "docsExamined" : 2, 
     "alreadyHasObj" : 0, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 3, 
     "advanced" : 2, 
     "needTime" : 0, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "(\"1\", {})" 
      ] 
     }, 
     "keysExamined" : 2, 
     "dupsTested" : 0, 
     "dupsDropped" : 0, 
     "seenInvalidated" : 0 
     } 
    }, 
    "allPlansExecution" : [ ] 
    }, 
    "serverInfo" : { 
    "host" : "myLinuxMachine", 
    "port" : 8080, 
    "version" : "3.2.9", 
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c" 
    }, 
    "ok" : 1 
} 

這又做了barId_1的索引掃描。它掃描了2條記錄返回2.

爲了完整起見,這裏是2條記錄,其他300萬條在大小和組成上非常相似。

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}) 
{ 
    "_id" : "00002f5d-ee4a-4996-bb27-b54ea84df777", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z" 
} 
{ 
    "_id" : "00002f5d-ee4a-4996-bb27-b54ea84df888", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z" 
} 

當然,我做了一些谷歌上搜索了一圈,發現有曾經是使用索引連同條款存在問題,但在許多線程我讀過這是固定的。是嗎?另外,我發現可以使用以下Hack而不是$ exists子句來在查找字段的存在時強制使用索引。

MongoDB Enterprise > db.foo.find({barId:{$ne:null}}).explain(true) 
{ 
    "queryPlanner" : { 
    "plannerVersion" : 1, 
    "namespace" : "myDatabase01.foo", 
    "indexFilterSet" : false, 
    "parsedQuery" : { 
     "$not" : { 
     "barId" : { 
      "$eq" : null 
     } 
     } 
    }, 
    "winningPlan" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "$not" : { 
      "barId" : { 
      "$eq" : null 
      } 
     } 
     }, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, null)", 
      "(null, MaxKey]" 
      ] 
     } 
     } 
    }, 
    "rejectedPlans" : [ ] 
    }, 
    "executionStats" : { 
    "executionSuccess" : true, 
    "nReturned" : 2, 
    "executionTimeMillis" : 57, 
    "totalKeysExamined" : 3, 
    "totalDocsExamined" : 2, 
    "executionStages" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "$not" : { 
      "barId" : { 
      "$eq" : null 
      } 
     } 
     }, 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 4, 
     "advanced" : 2, 
     "needTime" : 1, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "docsExamined" : 2, 
     "alreadyHasObj" : 0, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 4, 
     "advanced" : 2, 
     "needTime" : 1, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, null)", 
      "(null, MaxKey]" 
      ] 
     }, 
     "keysExamined" : 3, 
     "dupsTested" : 0, 
     "dupsDropped" : 0, 
     "seenInvalidated" : 0 
     } 
    }, 
    "allPlansExecution" : [ ] 
    }, 
    "serverInfo" : { 
    "host" : "myLinuxMachine", 
    "port" : 8080, 
    "version" : "3.2.9", 
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c" 
    }, 
    "ok" : 1 
} 

這項工作,只有2個文件掃描,只有2個文件返回。

因此,我的問題是。 我應該在查詢中使用$ exists嗎?它是否適合在現場製作應用程序中使用?如果答案是否定的,爲什麼$ exist子句甚至存在於第一位?

總有這種可能性,它的安裝mongo是有過錯的,或者可能是索引不知所措。任何燈光都會非常受歡迎,但現在我堅持使用$ ne:null黑客。

回答

2

您應該使用partial index(首選)或稀疏索引barId領域:

db.foo.createIndex(
    { barId: 1 }, 
    { partialFilterExpression: { barId: { $exists: true } } } 
) 
+0

感謝這個得很完美。建議添加索引可以減少執行barId所花費的時間:{$ exists:true}查詢的因子爲10.我只關心索引差異的原因。爲什麼不創建像這樣的所有索引? – Damo

相關問題