2015-04-29 40 views
0

我是一個初學者,我爲我的管道寫了一行代碼,但是我希望將其他信息添加到我的輸出中,例如屏幕名稱或tweets.I嘗試在$ group下面添加該代碼,但給了我一個語法錯誤每次Mongodb如何在聚合時添加附加信息?

這裏是我的管道:

def make_pipeline(): 
    # complete the aggregation pipeline 
    pipeline = [ 
    { 
     '$match': { 
      "user.statuses_count": {"$gt":99 }, 
      "user.time_zone": "Brasilia" 
     } 
    }, 
    { 
     "$group": { 
      "_id": "$user.id", 
      "followers": { "$max": "$user.followers_count" } 

     } 
    }, 
    { 
     "$sort": { "followers": -1 } 
    }, 
    { 
     "$limit" : 1 
    } 
]; 

我使用它在這個例子:

{ 
    "_id" : ObjectId("5304e2e3cc9e684aa98bef97"), 
    "text" : "First week of school is over :P", 
    "in_reply_to_status_id" : null, 
    "retweet_count" : null, 
    "contributors" : null, 
    "created_at" : "Thu Sep 02 18:11:25 +0000 2010", 
    "geo" : null, 
    "source" : "web", 
    "coordinates" : null, 
    "in_reply_to_screen_name" : null, 
    "truncated" : false, 
    "entities" : { 
     "user_mentions" : [ ], 
     "urls" : [ ], 
     "hashtags" : [ ] 
    }, 
    "retweeted" : false, 
    "place" : null, 
    "user" : { 
     "friends_count" : 145, 
     "profile_sidebar_fill_color" : "E5507E", 
     "location" : "Ireland :)", 
     "verified" : false, 
     "follow_request_sent" : null, 
     "favourites_count" : 1, 
     "profile_sidebar_border_color" : "CC3366", 
     "profile_image_url" : "http://a1.twimg.com/profile_images/1107778717/phpkHoxzmAM_normal.jpg", 
     "geo_enabled" : false, 
     "created_at" : "Sun May 03 19:51:04 +0000 2009", 
     "description" : "", 
     "time_zone" : null, 
     "url" : null, 
     "screen_name" : "Catherinemull", 
     "notifications" : null, 
     "profile_background_color" : "FF6699", 
     "listed_count" : 77, 
     "lang" : "en", 
     "profile_background_image_url" : "http://a3.twimg.com/profile_background_images/138228501/149174881-8cd806890274b828ed56598091c84e71_4c6fd4d8-full.jpg", 
     "statuses_count" : 2475, 
     "following" : null, 
     "profile_text_color" : "362720", 
     "protected" : false, 
     "show_all_inline_media" : false, 
     "profile_background_tile" : true, 
     "name" : "Catherine Mullane", 
     "contributors_enabled" : false, 
     "profile_link_color" : "B40B43", 
     "followers_count" : 169, 
     "id" : 37486277, 
     "profile_use_background_image" : true, 
     "utc_offset" : null 
    }, 
    "favorited" : false, 
    "in_reply_to_user_id" : null, 
    "id" : NumberLong("22819398300") 
} 

回答

0

使用$first和您的聚集管道查詢如下:

db.collectionName.aggregate({ 
    "$match": { 
    "user.statuses_count": { 
     "$gt": 99 
    }, 
    "user.time_zone": "Brasilia" 
    } 
}, { 
    "$sort": { 
    "user.followers_count": -1 // sort followers_count first 
    } 
}, { 
    "$group": { 
    "_id": "$user.id", 
    "followers": { 
     "$first": "$user.followers_count" //use mongo $first method to get followers count or max followers count 
    }, 
    "screen_name": { 
     "$first": "$user.screen_name" 
    }, 
    "retweet_count": { 
     "$first": "$retweet_count" 
    } 
    } 
}) 

或者用$limit$project作爲

db.collectionName.aggregate({ 
    "$match": { 
    "user.statuses_count": { 
     "$gt": 99 
    }, 
    "user.time_zone": "Brasilia" 
    } 
}, { 
    "$sort": { 
    "user.followers_count": -1 // sort followers_count 
    } 
}, { 
    "$limit": 1 // Set limit 1 so get max followers_count document first 
}, { 
    "$project": { // user project here 
    "userId": "$user.id", 
    "screen_name": "$user.screen_name", 
    "retweet_count": "$retweet_count" 
    } 
}).pretty() 
0

以下聚合管道使用其引用根文檔,即頂級文檔,目前在$group聚合流水線級處理的$$ROOT系統變量。這被添加到使用$addToSet運算符的數組中。在下面的管道段,然後你可以$unwind陣列打通一個$project運營商所需的字段修改輸出文件的形式:

db.tweet.aggregate([ 
    { 
     '$match': { 
      "user.statuses_count": { "$gte": 100 }, 
      "user.time_zone": "Brasilia" 
     } 
    }, 
    { 
     "$group": { 
      "_id": "$user.id", 
      "max_followers": { "$max": "$user.followers_count" }, 
      "data": { "$addToSet": "$$ROOT" } 
     } 
    }, 
    { 
     "$unwind": "$data" 
    }, 
    { 
     "$project": { 
      "_id": "$data._id", 
      "followers": "$max_followers", 
      "screen_name": "$data.user.screen_name", 
      "tweets": "$data.user.statuses_count" 
     } 
    }, 
    { 
     "$sort": { "followers": -1 } 
    }, 
    { 
     "$limit" : 1 
    } 
]) 

下面的管道也能達到同樣的效果,但不使用$組操作員:

pipeline = [ 
    { 
     "$match": { 
      "user.statuses_count": { 
       "$gte": 100 
      }, 
      "user.time_zone": "Brasilia" 
     } 
    }, 
    { 
     "$project": {     
      "followers": "$user.followers_count", 
      "screen_name": "$user.screen_name", 
      "tweets": "$user.statuses_count" 
     } 
    }, 
    { 
     "$sort": { 
      "followers": -1 
     } 
    }, 
    {"$limit" : 1} 
] 

Pymongo輸出

{u'ok': 1.0, 
u'result': [{u'_id': ObjectId('5304e2d34149692bc5172729'), 
       u'followers': 17209, 
       u'screen_name': u'AndreHenning', 
       u'tweets': 8219}]} 
相關問題