Maybe you can paste your flink configuration and hdfs-site.xml and check if 
there are some problems on the hdfs fileSystem related conf. Also you should 
check whether this path really exists on hdfs with a hdfs shell command(e.g. 
hdfs dfs -ls /xxx, see 
https://hadoop.apache.org/docs/r2.7.5/hadoop-project-dist/hadoop-common/FileSystemShell.html)
At 2019-10-15 01:27:39, "Pritam Sadhukhan" <sadhukhan.pri...@gmail.com> wrote:
>Hi,
>
>I am trying to use orcsourcetable to fetch data stored in hive tables on
>hdfs.
>I am able to use the orcsourcetable to fetch the data and deserialize on
>local cluster.
>
>But when I am trying to use the hdfs path, it is throwing me file not found
>error.
>
>Any help will be appreciated on the topic.
>
>Versions:
>
>Flink: 1.7.1
>Hive: 2.3.4
>
>*Code snippet:*
>
>import org.apache.flink.api.java.DataSet;
>import org.apache.flink.api.java.ExecutionEnvironment;
>import org.apache.flink.configuration.Configuration;
>import org.apache.flink.core.fs.FileSystem;
>import org.apache.flink.orc.OrcTableSource;
>import org.apache.flink.table.api.java.BatchTableEnvironment;
>import org.apache.flink.table.api.Table;
>import org.apache.flink.table.api.TableEnvironment;
>import org.apache.flink.types.Row;
>
>final ExecutionEnvironment environment = ExecutionEnvironment
>                .getExecutionEnvironment();
>BatchTableEnvironment tableEnvironment =
>TableEnvironment.getTableEnvironment(environment);
>OrcTableSource orcTS = OrcTableSource.builder()
>                .path("hdfs://host:port/logs/sa_structured_events")
>                .forOrcSchema(new
>OrcSchemaProvider().getStructuredEventsSchema())
>                .build();
>
>tableEnvironment.registerTableSource("OrcTable", orcTS);
>Table result = tableEnvironment.sqlQuery("SELECT * FROM OrcTable");
>
>DataSet<Row> rowDataSet = tableEnvironment.toDataSet(result, Row.class);
>
>tableEnvironment.execEnv().execute();
>
>
>*Error:*
>2019-10-14 16:56:26,048 INFO
> org.apache.flink.runtime.executiongraph.ExecutionGraph        - DataSource
>(OrcFile[path=hdfs://host:port/logs/sa_structured_events,
>schema=struct<customerid:string,eventid:string,subtype:st) (1/1)
>(9e1ad40a0f0b80ef0ad8d3b2fc58816d) switched from RUNNING to FAILED.
>java.io.FileNotFoundException: File
>/logs/sa_structured_events/part-00000-b2562d39-1097-490c-99dd-672ed12bbb10-c000.snappy.orc
>does not exist
>at
>org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:635)
>at
>org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:861)
>at
>org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:625)
>at
>org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:442)
>at
>org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:146)
>at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:347)
>at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:787)
>at org.apache.orc.impl.ReaderImpl.extractFileTail(ReaderImpl.java:517)
>at org.apache.orc.impl.ReaderImpl.<init>(ReaderImpl.java:364)
>at org.apache.orc.OrcFile.createReader(OrcFile.java:251)
>at org.apache.flink.orc.OrcRowInputFormat.open(OrcRowInputFormat.java:225)
>at org.apache.flink.orc.OrcRowInputFormat.open(OrcRowInputFormat.java:63)
>at
>org.apache.flink.runtime.operators.DataSourceTask.invoke(DataSourceTask.java:170)
>at org.apache.flink.runtime.taskmanager.Task.run(Task.java:704)
>at java.lang.Thread.run(Unknown Source)
>2019-10-14 16:56:26,048 INFO
> org.apache.flink.runtime.executiongraph.ExecutionGraph        - Job Flink
>Java Job at Mon Oct 14 16:56:07 IST 2019 (26a54fbcbd46cd0c4796e7308a2ba3b0)
>switched from state RUNNING to FAILING.
>java.io.FileNotFoundException: File
>/logs/sa_structured_events/part-00000-b2562d39-1097-490c-99dd-672ed12bbb10-c000.snappy.orc
>does not exist
>at
>org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:635)
>at
>org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:861)
>at
>org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:625)
>at
>org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:442)
>at
>org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:146)
>at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:347)
>at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:787)
>at org.apache.orc.impl.ReaderImpl.extractFileTail(ReaderImpl.java:517)
>at org.apache.orc.impl.ReaderImpl.<init>(ReaderImpl.java:364)
>at org.apache.orc.OrcFile.createReader(OrcFile.java:251)
>at org.apache.flink.orc.OrcRowInputFormat.open(OrcRowInputFormat.java:225)
>at org.apache.flink.orc.OrcRowInputFormat.open(OrcRowInputFormat.java:63)
>at
>org.apache.flink.runtime.operators.DataSourceTask.invoke(DataSourceTask.java:170)
>at org.apache.flink.runtime.taskmanager.Task.run(Task.java:704)
>at java.lang.Thread.run(Unknown Source)
>
>
>Regards,
>Pritam.

Reply via email to