2011-08-10 35 views
0

我得到了關於mongo map reduce的問題,我的map reduce可以在mongo shell中執行成功,但是當我嘗試執行使用pymongo時,出現以下錯誤。映射reduce使用pymongo失敗,但在mongo shell中成功

以下JSON是我的收藏JSON格式樣本

{ "_id" : ObjectId("4e41661ecacbd10e00012600"), 
    "timestamp" : "20110809", 
    "variants" : { "407" : { "number_of_ad_clicks" : 101, 
     "number_of_search_keywords" : 20, 
     "total_duration" : 4, 
     "os" : { "os_2" : 2, 
     "os_1" : 1, 
     "os_0" : 0 }, 
     "countries" : { "ge" : 2, 
     "ca" : 7, 
     "fr" : 2, 
     "uk" : 5, 
     "us" : 2 }, 
     "screen_resolutions" : { "(320, 240)" : 5, 
     "(640, 480)" : 7, 
     "(1024, 960)" : 9, 
     "(1280, 768)" : 6 }, 
     "widgets" : { "widget_1" : 1, 
     "widget_0" : 0 }, 
     "languages" : { "ua_uk" : 8, 
     "ca_en" : 6, 
     "ca_fr" : 8, 
     "us_en" : 1 }, 
     "search_keywords" : { "search_keyword_15" : 15, 
     "search_keyword_14" : 14, 
     "search_keyword_17" : 17, 
     "search_keyword_16" : 16, 
     "search_keyword_11" : 11, 
     "search_keyword_10" : 10, 
     "search_keyword_13" : 13, 
     "search_keyword_12" : 12, 
     "search_keyword_19" : 19, 
     "search_keyword_18" : 18, 
     "search_keyword_9" : 9, 
     "search_keyword_8" : 8, 
     "search_keyword_5" : 5, 
     "search_keyword_4" : 4, 
     "search_keyword_7" : 7, 
     "search_keyword_6" : 6, 
     "search_keyword_1" : 1, 
     "search_keyword_3" : 3, 
     "search_keyword_2" : 2 }, 
     "number_of_pageviews" : 38, 
     "browsers" : { "browser_4" : 4, 
     "browser_0" : 0, 
     "browser_1" : 1, 
     "browser_2" : 2, 
     "browser_3" : 3 }, 
     "keywords" : { "keyword_5" : 5, 
     "keyword_4" : 4, 
     "keyword_1" : 1, 
     "keyword_0" : 0, 
     "keyword_3" : 3, 
     "keyword_2" : 2 }, 
     "number_of_keyword_clicks" : 205, 
     "number_of_visits" : 91 }, 
    "306" : { "number_of_ad_clicks" : 29, 
     "number_of_search_keywords" : 4, 
     "total_duration" : 4, 
     "os" : { "os_2" : 2, 
     "os_1" : 1, 
     "os_0" : 0 }, 
     "countries" : { "ge" : 7, 
     "ca" : 7, 
     "fr" : 6, 
     "uk" : 1, 
     "us" : 3 }, 
     "screen_resolutions" : { "(320, 240)" : 2, 
     "(640, 480)" : 1, 
     "(1024, 960)" : 9, 
     "(1280, 768)" : 5 }, 
     "widgets" : { "widget_1" : 1, 
     "widget_0" : 0 }, 
     "languages" : { "ua_uk" : 2, 
     "ca_en" : 8, 
     "ca_fr" : 5, 
     "us_en" : 4 }, 
     "search_keywords" : { "search_keyword_1" : 1, 
     "search_keyword_3" : 3, 
     "search_keyword_2" : 2 }, 
     "number_of_pageviews" : 35, 
     "browsers" : { "browser_4" : 4, 
     "browser_0" : 0, 
     "browser_1" : 1, 
     "browser_2" : 2, 
     "browser_3" : 3 }, 
     "keywords" : { "keyword_5" : 5, 
     "keyword_4" : 4, 
     "keyword_1" : 1, 
     "keyword_0" : 0, 
     "keyword_3" : 3, 
     "keyword_2" : 2 }, 
     "number_of_keyword_clicks" : 18, 
     "number_of_visits" : 57 }, 
    "408" : { "number_of_ad_clicks" : 180, 
     "number_of_search_keywords" : 41, 
     "total_duration" : 7, 
     "os" : { "os_2" : 2, 
     "os_1" : 1, 
     "os_0" : 0 }, 
     "countries" : { "ge" : 3, 
     "ca" : 6, 
     "fr" : 3, 
     "uk" : 9, 
     "us" : 9 }, 
     "screen_resolutions" : { "(320, 240)" : 9, 
     "(640, 480)" : 9, 
     "(1024, 960)" : 5, 
     "(1280, 768)" : 10 }, 
     "widgets" : { "widget_1" : 1, 
     "widget_0" : 0 }, 
     "languages" : { "ua_uk" : 3, 
     "ca_en" : 2, 
     "ca_fr" : 10, 
     "us_en" : 7 }, 
     "search_keywords" : { "search_keyword_37" : 37, 
     "search_keyword_36" : 36, 
     "search_keyword_28" : 28, 
     "search_keyword_29" : 29, 
     "search_keyword_24" : 24, 
     "search_keyword_25" : 25, 
     "search_keyword_26" : 26, 
     "search_keyword_27" : 27, 
     "search_keyword_20" : 20, 
     "search_keyword_21" : 21, 
     "search_keyword_22" : 22, 
     "search_keyword_23" : 23, 
     "search_keyword_39" : 39, 
     "search_keyword_38" : 38, 
     "search_keyword_40" : 40, 
     "search_keyword_15" : 15, 
     "search_keyword_14" : 14, 
     "search_keyword_17" : 17, 
     "search_keyword_16" : 16, 
     "search_keyword_11" : 11, 
     "search_keyword_10" : 10, 
     "search_keyword_13" : 13, 
     "search_keyword_12" : 12, 
     "search_keyword_33" : 33, 
     "search_keyword_32" : 32, 
     "search_keyword_31" : 31, 
     "search_keyword_30" : 30, 
     "search_keyword_19" : 19, 
     "search_keyword_18" : 18, 
     "search_keyword_35" : 35, 
     "search_keyword_34" : 34, 
     "search_keyword_9" : 9, 
     "search_keyword_8" : 8, 
     "search_keyword_5" : 5, 
     "search_keyword_4" : 4, 
     "search_keyword_7" : 7, 
     "search_keyword_6" : 6, 
     "search_keyword_1" : 1, 
     "search_keyword_3" : 3, 
     "search_keyword_2" : 2 }, 
     "number_of_pageviews" : 25, 
     "browsers" : { "browser_4" : 4, 
     "browser_0" : 0, 
     "browser_1" : 1, 
     "browser_2" : 2, 
     "browser_3" : 3 }, 
     "keywords" : { "keyword_5" : 5, 
     "keyword_4" : 4, 
     "keyword_1" : 1, 
     "keyword_0" : 0, 
     "keyword_3" : 3, 
     "keyword_2" : 2 }, 
     "number_of_keyword_clicks" : 15, 
     "number_of_visits" : 19 } }, 
    "site_name" : "radiotiempo.com", 
    "number_of_variants" : 3 } 

,這裏是我的地圖降低。

map = function(){ 
    emit(1, {variants:this.variants}); 
} 

reduce = function(key, vals) { 
    var returnValue = { 
     clicks: 0, 
    }; 
    for(var j = 0 ; j < vals.length; j++){ 
     for(var i = 0 ; i < variant_ids.length; i++){ 
      try{ 
      returnValue.clicks += vals[j].variants[variant_ids[i]].number_of_ad_clicks; 
      }catch(err) 
      {} 
     } 
    } 
    return returnValue; 
} 


function emit(k, v) { 
    print("emit"); 
    print(" k:" + k + " v:" + tojson(v)); 
} 

res = db.variant_daily_collection.mapReduce(map, reduce, {"scope": {'variant_ids': ['4519','4518']}, "out" : "myoutput", "query":{"site_name": {'$in':['julie2.com','julie3.com']}, 'timestamp': {'$gte':'20110601','$lte':'20110603'}}}) 
db.myoutput.find() 

以下代碼是使用pymongo lib的我的python代碼。

map = Code("function() {" 
        " emit(1, {variants:this.variants});" 
        "}") 
     reduce = Code("function (key, values) {" 
         " var result = {" 
         "  clicks: 0" 
         " };" 
         " for (var i = 0; i < values.length; i++) {" 
         "  for(var j = 0 ; j < variant_ids.length; j++){" 
         "   result.clicks += values[i].variants[variant_ids[j]].number_of_ad_clicks;" 
         "  }" 
         " }" 
         " return result;" 
         "}") 

以下是pymongo

In [103]: reduce = Code("function (key, valudb.variant_daily_collection.map_reduce(map, reduce, out = 'output',full_response = True, fields = {"scope": {'variant_ids': ['398']}, "query":{"site_name": 'routeplanner.net', 'timestamp': '20110809'}})                             .....: ---------------------------------------------------------------------------OperationFailure       Traceback (most recent call last)/workspace/construction/<ipython console> in <module>()/workspace/construction/.ve/lib/python2.7/site-packages/pymongo-2.0-py2.7-macosx-10.7-intel.egg/pymongo/collection.py in map_reduce(self, map, reduce, out, merge_output, reduce_output, full_response, **kwargs) 
    1031   response = self.__database.command("mapreduce", self.__name, 
    1032           map=map, reduce=reduce, 
-> 1033           out=out_conf, **kwargs) 
    1034 
    1035   if full_response or not response.get('result'): 

/workspace/construction/.ve/lib/python2.7/site-packages/pymongo-2.0-py2.7-macosx-10.7-intel.egg/pymongo/database.py in command(self, command, value, check, allowable_errors, **kwargs) 
    338    msg = "command %r failed: %%s" % command 
    339    helpers._check_command_response(result, self.connection.disconnect, 
--> 340            msg, allowable_errors) 
    341 
    342   return result 

/workspace/construction/.ve/lib/python2.7/site-packages/pymongo-2.0-py2.7-macosx-10.7-intel.egg/pymongo/helpers.py in _check_command_response(response, reset, msg, allowable_errors) 
    123      ex_msg += (", assertionCode: %d" % 
    124        (response["assertionCode"],)) 
--> 125     raise OperationFailure(ex_msg, response.get("assertionCode")) 
    126    raise OperationFailure(msg % response["errmsg"]) 
    127 

OperationFailure: db assertion failure, assertion: 'invoke failed: JS Error: TypeError: values[i].variants[variant_ids[j]] has no properties nofile_b:0', assertionCode: 9004 

In [104]: 
+0

爲什麼你使用不同的代碼進行映射和減少JS與Python的比較?一目瞭然它看起來很相似,但確保它完全相同的最好方法是使其完全相同。 – dcrosta

回答

0

我看到了許多潛在的問題在這裏的錯誤返回,除了我上面的評論指出:

  1. 你覆蓋emit在你的shell例子中,這意味着你的reduce函數實際上不會運行(如果沒有發射,沒有東西可以減少)
  2. 您的emit與您的reduce函數返回的格式不同。 reduce需要在相同的格式作爲第二個參數的值返回值emit,因爲reduce可多次調用包括它自己的結果(見the Map-Reduce docs
  3. 你總是emit廷關鍵1,這可能是一個問題取決於你打算如何工作
  4. 如上所述,你的代碼似乎至少在你使用的循環變量中有所不同;我還沒有比它一行行說,如果有任何其他的差別,但你最好的選擇是你的JavaScript函數定義直接複製到Code在pymongo

此外,它會幫助,如果你能證明您調用map_reduce以及mapreduce函數。

3

問題是你正在將'scope'作爲'fields'傳遞。

這工作:

db.variant_daily_collection.map_reduce(m, r, out="myoutput", query={"site_name": {'$in':['julie2.com','julie3.com','radiotiempo.com']}, 'timestamp': {'$gte':'20110601','$lte':'20110809'}}, scope={'variant_ids': ['4519','4518']}) 

其中M是從上面的地圖功能和r爲您降低上述功能。我只是插入你的示例文件三次來測試。