2016-04-22 109 views
0

我正在嘗試關注Tom White的Hadoop definetive指南,並被困於從Hadoop Url讀取數據。我已經嘗試過各種調整,但我一直在爲我的文件在hdfs上的路徑收到'文件未找到錯誤'。從hadoop中讀取數據的文件沒有發現異常

下面是Java類:

package JavaInterfacePractice; 
    import java.io.InputStream; 
    import java.net.URI; 
    import java.net.URL; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IOUtils; 

public class UrlCat { 

    static{ 
     URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); 
    } 

    public static void main(String[] args) throws Exception { 
     String uri = args[0]; 
     InputStream in=null; 
     Configuration conf = new Configuration(); 
     conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/core-site.xml")); 
     conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/hdfs-site.xml")); 

     FileSystem fs = FileSystem.get(URI.create(uri),conf); 
     try{ 
      in=fs.open(new Path(uri)); 
      IOUtils.copyBytes(in, System.out, 4096,false); 

     }finally{ 
      IOUtils.closeStream(in); 
     } 
    } 

} 

這裏是我來運行這些代碼的命令:

hadoop jar /home/hduser/workspace/MaxTemperature/target/MaxTemperature-0.0.1-SNAPSHOT.jar JavaInterfacePractice.UrlCat /input/1901 

下面是錯誤我得到:

Exception in thread "main" java.io.FileNotFoundException: File does not exist: /input/1901 
    at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:71) 
    at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:61) 
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1828) 
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1799) 
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1712) 
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:587) 
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:365) 
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) 
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) 
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969) 
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049) 
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:415) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) 
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043) 

    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) 
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) 
    at java.lang.reflect.Constructor.newInstance(Constructor.java:526) 
    at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) 
    at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) 
    at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1228) 
    at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1213) 
    at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1201) 
    at org.apache.hadoop.hdfs.DFSInputStream.fetchLocatedBlocksAndGetLastBlockLength(DFSInputStream.java:306) 
    at org.apache.hadoop.hdfs.DFSInputStream.openInfo(DFSInputStream.java:272) 
    at org.apache.hadoop.hdfs.DFSInputStream.<init>(DFSInputStream.java:264) 
    at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1526) 
    at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:303) 
    at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:299) 
    at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) 
    at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:299) 
    at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767) 
    at JavaInterfacePractice.UrlCat.main(UrlCat.java:28) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:606) 
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221) 
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136) 
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.FileNotFoundException): File does not exist: /input/1901 
    at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:71) 
    at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:61) 
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1828) 
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1799) 
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1712) 
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:587) 
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:365) 
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) 
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) 
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969) 
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049) 
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:415) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) 
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043) 

    at org.apache.hadoop.ipc.Client.call(Client.java:1475) 
    at org.apache.hadoop.ipc.Client.call(Client.java:1412) 
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229) 
    at com.sun.proxy.$Proxy9.getBlockLocations(Unknown Source) 
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:255) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:606) 
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191) 
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) 
    at com.sun.proxy.$Proxy10.getBlockLocations(Unknown Source) 
    at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1226) 
    ... 18 more 

這裏是我的HDFS結構:

drwxr-xr-x - hduser supergroup   0 2016-04-20 06:50 input 
-rw-r--r-- 1 hduser supergroup  888190 2016-04-17 12:53 input/1901 
-rw-r--r-- 1 hduser supergroup  888978 2016-04-20 06:50 input/1902 
drwxr-xr-x - hduser supergroup   0 2016-04-19 22:20 output 
-rw-r--r-- 1 hduser supergroup   0 2016-04-19 22:20 output/_SUCCESS 
-rw-r--r-- 1 hduser supergroup   9 2016-04-19 22:20 output/part-r-00000 
drwxr-xr-x - hduser supergroup   0 2016-04-20 06:52 output2 
-rw-r--r-- 1 hduser supergroup   0 2016-04-20 06:52 output2/_SUCCESS 
-rw-r--r-- 1 hduser supergroup   18 2016-04-20 06:52 output2/part-r-00000 
drwxr-xr-x - hduser supergroup   0 2016-04-20 07:20 output3 
-rw-r--r-- 1 hduser supergroup   0 2016-04-20 07:20 output3/_SUCCESS 
-rw-r--r-- 1 hduser supergroup   18 2016-04-20 07:20 output3/part-r-00000 

如果文件存在於HDFS中,我爲什麼不能運行它?任何幫助將非常感激。

回答

0

假設你已經解決了這個問題,因爲你提交的10個月之前,但儘量完整的HDFS輸入:

HDFS://服務器:端口/輸入/ 1901