我有以下模式內部聯接兩個字段

var User = mongoose.Schema({ 
    email:{type: String, trim: true, index: true, unique: true, sparse: true}, 
    password: String, 
    name:{type: String, trim: true, index: true, unique: true, sparse: true}, 
    gender: String, 
}); 

var Song = Schema({ 
    track: { type: Schema.Types.ObjectId, ref: 'track' },//Track can be deleted 
    author: { type: Schema.Types.ObjectId, ref: 'user' }, 
    url: String, 
    title: String, 
    photo: String, 
     publishDate: Date, 
    views: [{ type: Schema.Types.ObjectId, ref: 'user' }], 
    likes: [{ type: Schema.Types.ObjectId, ref: 'user' }], 
    collaborators: [{ type: Schema.Types.ObjectId, ref: 'user' }], 
});

我要選擇所有用戶（無密碼值），但我希望每個用戶將所有的歌曲，他是作者或一個的合作者，並在過去兩週內出版。

執行此操作（user.id和song.collaborators之間的綁定）的最佳策略是什麼？它可以在一個選擇中完成嗎？

來源

2017-10-21 Tal Humy

這是非常有可能的一個請求，這與MongoDB的基本工具是$lookup。

我認爲這實際上更有意義的從Song集合中進行查詢，因爲您的標準是它們必須在該集合的兩個屬性之一中列出。 -

最優內部加入反

。假定實際的「模型」名字是什麼上面列出：

var today = new Date.now(), 
    oneDay = 1000 * 60 * 60 * 24, 
    twoWeeksAgo = new Date(today - (oneDay * 14)); 

var userIds; // Should be assigned as an 'Array`, even if only one 

Song.aggregate([ 
    { "$match": { 
    "$or": [ 
     { "author": { "$in": userIds } }, 
     { "collaborators": { "$in": userIds } } 
    ], 
    "publishedDate": { "$gt": twoWeeksAgo } 
    }}, 
    { "$addFields": { 
    "users": { 
     "$setIntersection": [ 
     userIds, 
     { "$setUnion": [ ["$author"], "$collaborators" ] } 
     ] 
    } 
    }}, 
    { "$lookup": { 
    "from": User.collection.name, 
    "localField": "users", 
    "foreignField": "_id", 
    "as": "users" 
    }}, 
    { "$unwind": "$users" }, 
    { "$group": { 
    "_id": "$users._id", 
    "email": { "$first": "$users.email" }, 
    "name": { "$first": "$users.name" }, 
    "gender": { "$first": "$users.gender" }, 
    "songs": { 
     "$push": { 
     "_id": "$_id", 
     "track": "$track", 
     "author": "$author", 
     "url": "$url", 
     "title": "$title", 
     "photo": "$photo", 
     "publishedDate": "$publishedDate", 
     "views": "$views", 
     "likes": "$likes", 
     "collaborators": "$collaborators" 
     } 
    } 
    }} 
])

這對我來說是最合乎邏輯的過程，只要它是一個「INNER JOIN」你想從結果，這意味着「所有用戶必須提到至少有一首歌」在涉及的兩個屬性。

$setUnion將「唯一列表」（ObjectId是唯一的）組合這兩個。所以如果一個「作者」也是一個「合作者」，那麼他們只會列出一首歌。

$setIntersection將列表從該組合列表「過濾」到僅在查詢條件中指定的列表。這將刪除所有不在選擇中的其他「協作者」條目。

$lookup對該組合數據進行「加入」以獲取用戶，並且$unwind已完成，因爲您希望User成爲主要細節。所以我們基本上將「用戶數組」轉換爲結果中的「歌曲數組」。

此外，由於主要標準來自Song，因此從該集合中查詢作爲方向是有意義的。

可選LEFT JOIN

各地這樣做的另一種方式是在「左連接」被通緝，是「所有用戶」，無論是否有任何相關的歌曲或不：

User.aggregate([ 
    { "$lookup": { 
    "from": Song.collection.name, 
    "localField": "_id", 
    "foreignField": "author", 
    "as": "authors" 
    }}, 
    { "$lookup": { 
    "from": Song.collection.name, 
    "localField": "_id", 
    "foreignField": "collaborators", 
    "as": "collaborators" 
    }}, 
    { "$project": { 
    "email": 1, 
    "name": 1, 
    "gender": 1, 
    "songs": { "$setUnion": [ "$authors", "$collaborators" ] } 
    }} 
])

因此，聲明的列表「看上去」更短，但它爲了獲得可能的「作者」和「合作者」的結果而不是一個結果，迫使「兩個」階段$lookup階段。所以實際的「加入」操作在執行時間上可能會很昂貴。

其餘的很簡單，應用相同的$setUnion，但這次是「結果數組」，而不是數據的原始來源。

如果你想類似的「查詢」條件和上面的「過濾器」，爲「歌」而不是實際User文件返回，則LEFT JOIN你居然$filter數組內容「後」 $lookup：

User.aggregate([ 
    { "$lookup": { 
    "from": Song.collection.name, 
    "localField": "_id", 
    "foreignField": "author", 
    "as": "authors" 
    }}, 
    { "$lookup": { 
    "from": Song.collection.name, 
    "localField": "_id", 
    "foreignField": "collaborators", 
    "as": "collaborators" 
    }}, 
    { "$project": { 
    "email": 1, 
    "name": 1, 
    "gender": 1, 
    "songs": { 
     "$filter": { 
     "input": { "$setUnion": [ "$authors", "$collaborators" ] }, 
     "as": "s", 
     "cond": { 
      "$and": [ 
      { "$setIsSubset": [ 
       userIds 
       { "$setUnion": [ ["$$s.author"], "$$s.collaborators" ] } 
      ]}, 
      { "$gte": [ "$$s.publishedDate", oneWeekAgo ] } 
      ] 
     } 
     } 
    } 
    }} 
])

這意味着通過LEFT JOIN條件，返回所有User文檔，但僅包含任何「歌曲」的文檔將符合「過濾條件」作爲所提供的userIds的一部分的文檔。即使是列表中包含的用戶，也只會顯示publishedDate所需範圍內的「歌曲」。

$filter中的主要添加是$setIsSubset運算符，它是將userIds中提供的列表與文檔中存在的兩個字段中的「組合」列表進行比較的簡短方法。注意到由於每個$lookup的早期條件，「當前用戶」已經必須是「相關的」。

MongoDB的3.6預覽

一個新的「子流水線」語法可$lookup從MongoDB的3.6版本意味着，而非如圖所示爲LEFT JOIN變種「兩節」 $lookup階段，你可以在其實這個結構作爲「子流水線」，這也優化之前返回結果的過濾內容：

User.aggregate([ 
    { "$lookup": { 
    "from": Song.collection.name, 
    "let": { 
     "user": "$_id" 
    }, 
    "pipeline": [ 
     { "$match": { 
     "$or": [ 
      { "author": { "$in": userIds } }, 
      { "collaborators": { "$in": userIds } } 
     ], 
     "publishedDate": { "$gt": twoWeeksAgo }, 
     "$expr": { 
      "$or": [ 
      { "$eq": [ "$$user", "$author" ] }, 
      { "$setIsSubset": [ ["$$user"], "$collaborators" ] 
      ] 
     } 
     }} 
    ], 
    "as": "songs" 
    }} 
])

，這是所有有它在這種情況下，由於$expr允許的使用在"let"中聲明的變量與歌曲集合中的每個條目進行比較，以僅選擇那些與其他查詢條件相匹配的條目。結果只是那些匹配的歌曲，每個用戶或一個空數組。因此，使整個「子流水線」只是一個$match表達式，這與附加邏輯幾乎相同，而不是固定的本地和外鍵。

所以你甚至可以在$lookup之後的管道中添加一個階段來過濾掉任何「空」數組結果，從而使整個結果成爲INNER加入。

因此，我個人認爲可以採取第一種方法，只使用第二種方法。

注：這裏有幾個選擇並不真正適用。第一種是$lookup + $unwind + $match coalescence的特例，其中雖然基本情況適用於最初的INNER Join示例，但它不能用於LEFT Join Case。

這是因爲，爲了使LEFT JOIN來獲得的$unwind的使用必須與preserveNullAndEmptyArrays: true實現，這打破了該應用程序的規則unwinding和matching不能內的「捲起」 $lookup並應用於「返回結果之前」的國外集合。

因此，爲什麼它不在樣本中應用，我們在返回的數組上使用$filter，因爲沒有可用於返回結果的「之前」的外部集合的最佳操作，並且沒有任何操作會停止所有結果爲只匹配外鍵的歌曲返回。 INNER加入當然是不同的。另一種情況是.populate()與貓鼬。最重要的區別是.populate()不是單個請求，但只是一個編程「實際上發出多個查詢的簡寫」。所以無論如何，實際上會發出多個查詢，並且總是需要所有結果才能應用任何過濾。

這導致了實際應用過濾的限制，通常意味着當您使用需要應用於外部收集的條件的「客戶端連接」時，您無法真正實現「分頁」概念。

關於Querying after populate in Mongoose的更多詳細信息，以及實際演示瞭如何將基本功能作爲自定義方法連接到貓鼬模式中，但實際上使用下面的流水線處理$lookup。

來源

2017-10-21 10:45:31

非常感謝，非常感謝。 –

內部聯接兩個字段

回答

最優內部加入反

可選LEFT JOIN

MongoDB的3.6預覽

相關問題