2017-07-04 46 views
0

我試圖使用Apache beam-dataflow連接到安裝在雲實例中的配置單元實例。當我運行這個,我得到以下例外。當我使用Apache beam訪問這個數據庫時發生這種情況。我見過很多與apache beam或google數據流無關的相關問題。Apache Beam - org.apache.beam.sdk.util.UserCodeException:java.sql.SQLException:無法創建PoolableConnectionFactory(方法不支持)

(c9ec8fdbe9d1719a): java.lang.RuntimeException: org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException: Cannot create PoolableConnectionFactory (Method not supported) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:289) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:261) 
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:55) 
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:43) 
at com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:78) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.create(MapTaskExecutorFactory.java:152) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.doWork(DataflowWorker.java:272) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:244) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:125) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:105) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:92) 
at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:745) 

Caused by: org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException: Cannot create PoolableConnectionFactory (Method not supported) 
at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:36) 
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn$auxiliary$8CR0LcYI.invokeSetup(Unknown Source) 
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:65) 
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:47) 
at com.google.cloud.dataflow.worker.runners.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:100) 
at com.google.cloud.dataflow.worker.runners.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:70) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.createParDoOperation(MapTaskExecutorFactory.java:365) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:278) 
... 14 more 
     Caused by: java.sql.SQLException: Cannot create PoolableConnectionFactory (Method not supported) 
at org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:2294) 
at org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:2039) 
at org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:1533) 
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn.setup(JdbcIO.java:377) 
Caused by: java.sql.SQLException: Method not supported 
at org.apache.hive.jdbc.HiveConnection.isValid(HiveConnection.java:898) 
at org.apache.commons.dbcp2.DelegatingConnection.isValid(DelegatingConnection.java:918) 
at org.apache.commons.dbcp2.PoolableConnection.validate(PoolableConnection.java:283) 
at org.apache.commons.dbcp2.PoolableConnectionFactory.validateConnection(PoolableConnectionFactory.java:357) 
at org.apache.commons.dbcp2.BasicDataSource.validateConnectionFactory(BasicDataSource.java:2307) 
at org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:2290) 
at org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:2039) 
at org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:1533) 
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn.setup(JdbcIO.java:377) 
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn$auxiliary$8CR0LcYI.invokeSetup(Unknown Source) 
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:65) 
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:47) 
at com.google.cloud.dataflow.worker.runners.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:100) 
at com.google.cloud.dataflow.worker.runners.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:70) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.createParDoOperation(MapTaskExecutorFactory.java:365) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:278) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:261) 
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:55) 
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:43) 
at com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:78) 
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.create(MapTaskExecutorFactory.java:152) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.doWork(DataflowWorker.java:272) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:244) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:125) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:105) 
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:92) 
at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:745) 

使用相同的連接字符串和驅動程序文件,我可以使用普通的java-jdbc程序連接到此實例。

這個問題已經有一段時間了,我無法找到解決方案。任何人都可以請提供任何想法嗎?

請參閱代碼片段連接下面蜂巢:

PCollection<Customer> collection = dataflowPipeline.apply(JdbcIO.<Customer>read() 
      .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration 
        .create("org.apache.hive.jdbc.HiveDriver", "jdbc:hive2://<external IP of computer instance>:10000/dbtest") 
        .withUsername("username").withPassword("password"))  
      .withQuery(
        "select c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,from dbtest.customer")  
      .withRowMapper(new JdbcIO.RowMapper<Customer>() { 
       @Override 
       public Customer mapRow(ResultSet resultSet) throws Exception { 
        // TODO Auto-generated method stub 
        Customer customer = new Customer(); 
        customer.setC_customer_id(resultSet.getString("c_customer_id")); 
        customer.setC_first_name(resultSet.getString("c_first_name")); 
        customer.setC_last_name(resultSet.getString("c_last_name")); 
        customer.setC_preferred_cust_flag(resultSet.getString("c_preferred_cust_flag")); 
        customer.setC_birth_day(resultSet.getInt("c_birth_day")); 
        return customer; 
       } 
      }).withCoder(AvroCoder.of(Customer.class))); 

回答

0

的Apache DBCP BasicDataSource使用方法isValid驗證的連接,這是不是一個老版本的蜂巢JDBC驅動程序實現的 - 見JDBC to hive connection fails on invalid operation isValid()

但是,該方法在Hive 2.1.0之後的版本中實現。 https://github.com/apache/hive/commit/2d2ab0942482a6ce1523dd9dd0f4094865e93b28

你可以使用更新版本的Hive JDBC驅動程序嗎?

+0

hi @jkff。我忘了在這裏更新這個問題。看到這個問題後,我做了一些分析,並嘗試使用2.1.1版本的hive-jdbc。現在,我遇到了這個問題 - util.UserCodeException:java.sql.SQLException:無法創建PoolableConnectionFactory(無法使用JDBC開放客戶端傳輸Uri:jdbc:hive2:// :3306/db/java.net.ConnectException:連接超時) \t at dataflow.worker.runners.worker.MapTaskExecutorFactory $ 3.typedApply(MapTaskExecutorFactory.java:289)。看起來它沒有獲得連接。 – Balu

+0

這看起來像是常規網絡問題,而不是數據流問題。您是否可以使用常規Java程序中的相同參數而不是Dataflow管道連接到同一個數據庫? – jkff

+0

是的。我可以使用普通的java程序成功地拉出數據。我想知道,從db開始讀取時,數據流可以創建多少個線程。由於德比是爲配置單元配置的,因此最初未啓用Connectionpooling。後來我們修改爲mysql。但是,安裝cloudera的配置較少的vm非常慢,並且由於資源不足而無法接受共享。你認爲,我的假設是正確的嗎? – Balu

相關問題