2015-05-13 42 views
1

首先,這可能是一個誤導的問題,如果是這種情況,我將不勝感激關於如何繼續的一些指導。如何映射使用複雜的子文檔來映射對象,這些子文檔彼此相關

從我在網上找到的東西看起來好像是mongodb/mongoose mapReduce是做這件事的最好方法,但我一直試圖圍繞它來解決這個問題,並且我努力去理解它,我想知道是否有人可以幫助解釋我的問題。我不一定在尋找完整的解決方案。我真的很感謝很好解釋的僞代碼。我認爲特別困惑的是如何處理彙總和組合2個或更多子文檔。

另外我知道這可能是一個糟糕的模型/集合設計,但不幸的是,這是完全沒有我的手,所以請不要建議重塑。

我的具體問題是,我們有一個現成的模型,看起來像下面這樣:

survey: { 
      _id: 1111, 
      name: "name", 
      questions: [ 
       {_id: 1, text: "a,b, or c?", type: "multipleChoice", options: [a, b, c,]}, 
       {_id: 2, text: "what do you think", type: "freeform"} 
      ], 
      participants: [{_id: 1, name: "user 1"}, {_id: 2, name: "user 2"}], 
      results: [{_id: 123, userId: 1, questionId: 1, answer: "a"}, 
       {_id: 124, userId: 2, questionId: 1, answer: "b"}, 
       {_id: 125, userId: 1, questionId: 2, answer: "this is some answer"}, 
       {_id: 126, userId: 2, questionId: 2, answer: "this is another answer"}] 

     } 

,然後我們有一個被單獨開發使用的是另一種模型來追蹤用戶的用戶的整個進度調查(這僅僅是一個基本的子集,我們也追蹤不同的事件)

trackings:{ 
    _id:123, 
    surveyId: 1, 
    userId: 123, 
    starttime: "2015-05-13 10:46:20.347Z" 
    endtime: "2015-05-13 10:59:20.347Z" 
} 

什麼,我想以某種方式做的就是這樣的:

{ 
    survey: "survey name", 
    _id : 1, 
    totalAverageTime: "00:23:00", 
    fastestTime : "00:23:00", 
    slowestTime: "00:25:00", 
    questions: [ 
    { 
     _id: 1, text: "a,b, or c?", 
     type: "multipleChoice", 
     mostPopularAnswer: "a", 
     averageTime: "00:13:00", 
     anwers : [{ userId: 1, answer: "a", time:"00:14:00"}, 
       { userId: 2, answer: "a", time:"00:12:00"}] 

    },{ 
     _id: 2, text:"what do you think", 
     type:"freeform", 
     averageTime : "00:10:00", 
     answers : [{ userId: 1, answer: "this is some answer", time:"00:11:00"}, 
       { userId: 2, answer: "this is another answer", time:"00:09:00"}] 


    } 

    ] 

} 
+1

我有一個解決方案,涉及創建另一個輸出集合,將兩個模式連接起來,然後使用聚合框架計算所需的聚合。但是,如果您可以指定正在使用的MongoDB版本,這將會非常有幫助,因爲這會影響聚合操作,因爲它需要使用某些在更高版本中找到的運算符。 – chridam

+0

我們目前在使用mongodb 3.8和mongoose 4 – jonnie

+0

這是一個錯字,mongodb 3.8嗎? – chridam

回答

2

以下方法使用aggregation framework來提出更接近所需輸出的解決方案。這取決於第三個集合,可以看作是兩個集合surveytrackings之間的合併。

首先,假設你有一個測試文件以下集合根據你的問題的例子:

// survey collection 
db.survey.insert({ 
    _id: 1111, 
    name: "name", 
    questions: [ 
     {_id: 1, text: "a,b, or c?", type: "multipleChoice", options: ["a", "b", "c",]}, 
     {_id: 2, text: "what do you think", type: "freeform"} 
    ], 
    participants: [{_id: 1, name: "user 1"}, {_id: 2, name: "user 2"}], 
    results: [{_id: 123, userId: 1, questionId: 1, answer: "a"}, 
     {_id: 124, userId: 2, questionId: 1, answer: "b"}, 
     {_id: 125, userId: 1, questionId: 2, answer: "this is some answer"}, 
     {_id: 126, userId: 2, questionId: 2, answer: "this is another answer"}] 

}) 

// trackings collection 
db.trackings.insert([ 
    { 
     _id:1, 
     surveyId: 1111, 
     userId: 1, 
     starttime: "2015-05-13 10:46:20.347Z", 
     endtime: "2015-05-13 10:59:20.347Z" 
    }, 
    { 
     _id:2, 
     surveyId: 1111, 
     userId: 2, 
     starttime: "2015-05-13 10:13:06.176Z", 
     endtime: "2015-05-13 10:46:28.176Z" 
    }  
]) 

要創建第三集合(讓叫它output_collection),你會需要遍歷在trackings集合上使用find()遊標的forEach()方法,將帶有日期字符串的字段轉換爲實際的ISODate對象,創建一個存儲survey結果的數組字段,然後將合併的對象保存到第三個集合中。下面演示了此操作:

db.trackings.find().forEach(function(doc){ 
    var survey = db.survey.find({"_id": doc.surveyId}).toArray(); 
    doc.survey = survey; 
    doc["starttime"] = ISODate(doc.starttime); 
    doc["endtime"] = ISODate(doc.endtime); 
    db.output_collection.save(doc); 
}); 

兩個集合融入output_collection,與db.output_collection.findOne()查詢會產生後:

{ 
    "_id" : 1, 
    "surveyId" : 1111, 
    "userId" : 1, 
    "starttime" : ISODate("2015-05-13T10:46:20.347Z"), 
    "endtime" : ISODate("2015-05-13T10:59:20.347Z"), 
    "survey" : [ 
     { 
      "_id" : 1111, 
      "name" : "name", 
      "questions" : [ 
       { 
        "_id" : 1, 
        "text" : "a,b, or c?", 
        "type" : "multipleChoice", 
        "options" : [ 
         "a", 
         "b", 
         "c" 
        ] 
       }, 
       { 
        "_id" : 2, 
        "text" : "what do you think", 
        "type" : "freeform" 
       } 
      ], 
      "participants" : [ 
       { 
        "_id" : 1, 
        "name" : "user 1" 
       }, 
       { 
        "_id" : 2, 
        "name" : "user 2" 
       } 
      ], 
      "results" : [ 
       { 
        "_id" : 123, 
        "userId" : 1, 
        "questionId" : 1, 
        "answer" : "a" 
       }, 
       { 
        "_id" : 124, 
        "userId" : 2, 
        "questionId" : 1, 
        "answer" : "b" 
       }, 
       { 
        "_id" : 125, 
        "userId" : 1, 
        "questionId" : 2, 
        "answer" : "this is some answer" 
       }, 
       { 
        "_id" : 126, 
        "userId" : 2, 
        "questionId" : 2, 
        "answer" : "this is another answer" 
       } 
      ] 
     } 
    ] 
} 

還可以再敷在此集合聚集。聚合流水線應由四個運算符級組成,這些運算符級從輸入文檔解構數組併爲每個元素輸出文檔。每個輸出文檔用一個元素值替換數組。

下一$project操作器級重塑每個文檔的流中,例如通過增加一個新字段duration,其計算在所述開始時間和結束時間日期字段之間分鐘的時間差,並使用Arithmetic Operators做計算。

在此之後是操作者$group流水線級,其由"survey"密鑰組輸入文件,並且使用所述accumulator expression(s)到每個組。消耗所有輸入文檔併爲每個不同的組輸出一個文檔。

所以你聚集的管道應該是這樣的:

db.output_collection.aggregate([ 
    { "$unwind": "$survey" }, 
    { "$unwind": "$survey.questions" }, 
    { "$unwind": "$survey.participants" }, 
    { "$unwind": "$survey.results" }, 
    { 
     "$project": { 
      "survey": 1, 
      "surveyId": 1, 
      "userId": 1, 
      "starttime": 1, 
      "endtime": 1, 
      "duration": { 
       "$divide": [ 
        { "$subtract": [ "$endtime", "$starttime" ] }, 
        1000 * 60 
       ] 
      } 
     } 
    }, 
    { 
     "$group": { 
      "_id": "$surveyId", 
      "survey": { "$first": "$survey.name"}, 
      "totalAverageTime": { 
       "$avg": "$duration" 
      }, 
      "fastestTime": { 
       "$min": "$duration" 
      }, 
      "slowestTime": { 
       "$max": "$duration" 
      }, 
      "questions": { 
       "$addToSet": "$survey.questions" 
      }, 
      "answers": { 
       "$addToSet": "$survey.results" 
      } 
     } 
    }, 
    { 
     "$out": "survey_results" 
    } 
]) 

db.survey_results.find()輸出

/* 0 */ 
{ 
    "result" : [ 
     { 
      "_id" : 1111, 
      "survey" : "name", 
      "totalAverageTime" : 23.18333333333334, 
      "fastestTime" : 13, 
      "slowestTime" : 33.36666666666667, 
      "questions" : [ 
       { 
        "_id" : 2, 
        "text" : "what do you think", 
        "type" : "freeform" 
       }, 
       { 
        "_id" : 1, 
        "text" : "a,b, or c?", 
        "type" : "multipleChoice", 
        "options" : [ 
         "a", 
         "b", 
         "c" 
        ] 
       } 
      ], 
      "answers" : [ 
       { 
        "_id" : 126, 
        "userId" : 2, 
        "questionId" : 2, 
        "answer" : "this is another answer" 
       }, 
       { 
        "_id" : 124, 
        "userId" : 2, 
        "questionId" : 1, 
        "answer" : "b" 
       }, 
       { 
        "_id" : 125, 
        "userId" : 1, 
        "questionId" : 2, 
        "answer" : "this is some answer" 
       }, 
       { 
        "_id" : 123, 
        "userId" : 1, 
        "questionId" : 1, 
        "answer" : "a" 
       } 
      ] 
     } 
    ], 
    "ok" : 1 
} 

UPDATE

一旦獲得一個ggregation輸出到另一個集合,通過$out聚合管道說survey_results,然後你可以用find()光標forEach()方法運用一些本地JavaScript函數,以便得到最終的目標:

db.survey_results.find().forEach(function(doc){ 
    var questions = []; 
    doc.questions.forEach(function(q){ 
     var answers = []; 
     doc.answers.forEach(function(a){ 
      if(a.questionId === q._id){ 
       delete a.questionId; 
       answers.push(a); 
      } 
     }); 
     q.answers = answers; 
     questions.push(q); 
    });  

    delete doc.answers;   
    doc.questions = questions; 
    db.survey_results.save(doc); 
}); 

輸出

/* 0 */ 
{ 
    "_id" : 1111, 
    "survey" : "name", 
    "totalAverageTime" : 23.18333333333334, 
    "fastestTime" : 13, 
    "slowestTime" : 33.36666666666667, 
    "questions" : [ 
     { 
      "_id" : 2, 
      "text" : "what do you think", 
      "type" : "freeform", 
      "answers" : [ 
       { 
        "_id" : 126, 
        "userId" : 2, 
        "answer" : "this is another answer" 
       }, 
       { 
        "_id" : 125, 
        "userId" : 1, 
        "answer" : "this is some answer" 
       } 
      ] 
     }, 
     { 
      "_id" : 1, 
      "text" : "a,b, or c?", 
      "type" : "multipleChoice", 
      "options" : [ 
       "a", 
       "b", 
       "c" 
      ], 
      "answers" : [ 
       { 
        "_id" : 124, 
        "userId" : 2, 
        "answer" : "b" 
       }, 
       { 
        "_id" : 123, 
        "userId" : 1, 
        "answer" : "a" 
       } 
      ] 
     } 
    ] 
} 
+0

這是一個夢幻般的,我真的很感謝細節。一個問題。如果我想在問題subdoc中提供每個問題的所有答案,那麼在上面的查詢中,這是否可能會在輸出上進行另一個查詢? – jonnie

+1

@jonnie是的,確實很可能。只是沒有足夠的時間來完成產生準確期望輸出的聚合管道,就像你可以在答案中作爲子文檔的答案一樣。一旦我獲得足夠的時間,會嘗試更新答案。但最重要的想法是使用**'$ addToSet' **運算符將另一個**'$ group' **管道階段添加到數組中。 – chridam

+1

@jonnie我已經更新了答案,包括一個額外的步驟,可以引導你到最終的期望的結果。 – chridam