2017-01-24 73 views
2

考慮下面的代碼:如何使用:H2O ddply,R?

library(h2o) 
library(plyr) 

h2o.init() 
data1x <- "x row1 
1 1 
1 2 
1 3 
1 4 
2 1 
2 2 
2 3 
3 1 
4 2" 
data1x <- read.table(textConnection(data1x), header=TRUE) 
data1xH2O <- as.h2o(data1x) 

fun = function(df) { 
    1:2 
} 

h2o.ddply(data1xH2O, "x", fun) 

ddply(data1x, "x", fun) 

ddply的H20版本給下面的錯誤。

ERROR: Unexpected HTTP Status code: 400 Bad Request (url = http://localhost:54321/99/Rapids) 

water.rapids.Rapids.IllegalASTException 
[1] "water.rapids.Rapids$IllegalASTException: Missing a number"             
[2] " water.rapids.Rapids.number(Rapids.java:312)"                
[3] " water.rapids.Rapids.parseNumList(Rapids.java:243)"              
[4] " water.rapids.Rapids.parseList(Rapids.java:208)"               
[5] " water.rapids.Rapids.parseNext(Rapids.java:140)"               
[6] " water.rapids.Rapids.parseFunctionDefinition(Rapids.java:193)"            
[7] " water.rapids.Rapids.parseNext(Rapids.java:139)"               
[8] " water.rapids.Rapids.parseFunctionApplication(Rapids.java:158)"           
[9] " water.rapids.Rapids.parseNext(Rapids.java:138)"               
[10] " water.rapids.Rapids.parseFunctionApplication(Rapids.java:158)"           
[11] " water.rapids.Rapids.parseNext(Rapids.java:138)"               
[12] " water.rapids.Rapids.parse(Rapids.java:48)"                
[13] " water.rapids.Rapids.exec(Rapids.java:81)"                 
[14] " water.api.RapidsHandler.exec(RapidsHandler.java:39)"              
[15] " sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)"            
[16] " sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)"       
[17] " sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)"     
[18] " java.lang.reflect.Method.invoke(Method.java:483)"               
[19] " water.api.Handler.handle(Handler.java:61)"                
[20] " water.api.RequestServer.serve(RequestServer.java:412)"             
[21] " water.api.RequestServer.doGeneric(RequestServer.java:263)"            
[22] " water.api.RequestServer.doPost(RequestServer.java:200)"             
[23] " javax.servlet.http.HttpServlet.service(HttpServlet.java:755)"            
[24] " javax.servlet.http.HttpServlet.service(HttpServlet.java:848)"            
[25] " org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)"         
[26] " org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:503)"        
[27] " org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)"       
[28] " org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)"        
[29] " org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)"      
[30] " org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1086)"      
[31] " org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:429)"        
[32] " org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)"       
[33] " org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1020)"      
[34] " org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)"       
[35] " org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)"     
[36] " org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)"       
[37] " org.eclipse.jetty.server.Server.handle(Server.java:370)"             
[38] " org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)"   
[39] " org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)"   
[40] " org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)"     
[41] " org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)" 
[42] " org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)"           
[43] " org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)"         
[44] " org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)"     
[45] " org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)"    
[46] " org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)"       
[47] " org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)"       
[48] " java.lang.Thread.run(Thread.java:745)"                 

Error in .h2o.doSafeREST(h2oRestApiVersion = h2oRestApiVersion, urlSuffix = page, : 

ERROR MESSAGE: 

Missing a number 

來自plyr包的ddply工作原理相同。 請給出任何建議。

+0

您的函數似乎是不正確的。你需要改變它並嘗試。我已經將函數體更改爲'sum(df [,1],na.rm = TRUE)/ nrow(df)',而不是錯誤。 –

回答

1

問題是,你的函數不尊重數據框,並根據你的函數定義不清楚你的目標是什麼。

樂趣=功能(DF){ 1:2 }

如果你看看h2o.ddply功能的文檔,你會看到,這個功能用於應用基於一定的標準對完整數據集的功能。

我有如下改變了你上面的例子,以便更好地解釋:

> data1xH2O 
    x row1 
1 1 1 
2 1 2 
3 1 3 
4 1 4 
5 2 1 
6 2 2 
> fun1 = function(df) { df[,1]} 
> h2o.ddply(data1xH2O, 1:2, fun1) 
    x row1 ddply_C1 
    1 1 1  1 
    2 1 2  1 
    3 1 3  1 
    4 1 4  1 
    5 2 1  2 
    6 2 2  2 

以上的FUN1是我們提取出列從傳遞數據幀。再次,如果我改變樂趣fun2如下:

> fun2 = function(df) { df[,2]} 
> h2o.ddply(data1xH2O, 1:2, fun2) 
    x row1 ddply_C1 
    1 1 1  1 
    2 1 2  2 
    3 1 3  3 
    4 1 4  4 
    5 2 1  1 
    6 2 2  2