[
https://issues.apache.org/jira/browse/CARBONDATA-4022?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Kunal Kapoor resolved CARBONDATA-4022.
--------------------------------------
Fix Version/s: 2.1.1
Resolution: Fixed
> Getting the error - "PathName is not a valid DFS filename." with index server
> and after adding carbon SDK segments and then doing select/update/delete
> operations.
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> Key: CARBONDATA-4022
> URL: https://issues.apache.org/jira/browse/CARBONDATA-4022
> Project: CarbonData
> Issue Type: Bug
> Affects Versions: 2.0.0
> Reporter: Prasanna Ravichandran
> Priority: Major
> Fix For: 2.1.1
>
> Time Spent: 3h 10m
> Remaining Estimate: 0h
>
> Getting this error - "PathName is not a valid DFS filename." during the
> update/delete/select queries on a added SDK segment table. Also the path
> represented in the error is not proper, which is the cause of error. This is
> seen only when index server is running and disable fallback is true.
> Queries and errors:
> > create table sdk_2level_1(name string, rec1
> > struct<intVal:int,StringVal:array<string>>) stored as carbondata;
> +---------+
> | Result |
> +---------+
> +---------+
> No rows selected (0.425 seconds)
> > alter table sdk_2level_1 add segment
> > options('path'='hdfs://hacluster/sdkfiles/twolevelnestedrecwitharray','format'='carbondata');
> +---------+
> | Result |
> +---------+
> +---------+
> No rows selected (0.77 seconds)
> > select * from sdk_2level_1;
> INFO : Execution ID: 1855
> Error: org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 0 in stage 600.0 failed 4 times, most recent failure: Lost task 0.3 in
> stage 600.0 (TID 21345, linux, executor 16):
> java.lang.IllegalArgumentException: Pathname
> /user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata
> from
> hdfs://hacluster/user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata
> is not a valid DFS filename.
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:249)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:332)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:328)
> at
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:340)
> at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:955)
> at
> org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:316)
> at
> org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:293)
> at
> org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:198)
> at
> org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:188)
> at org.apache.carbondata.core.reader.ThriftReader.open(ThriftReader.java:100)
> at
> org.apache.carbondata.core.reader.CarbonHeaderReader.readHeader(CarbonHeaderReader.java:60)
> at
> org.apache.carbondata.core.util.DataFileFooterConverterV3.readDataFileFooter(DataFileFooterConverterV3.java:65)
> at
> org.apache.carbondata.core.util.CarbonUtil.getDataFileFooter(CarbonUtil.java:902)
> at
> org.apache.carbondata.core.util.CarbonUtil.readMetadataFile(CarbonUtil.java:874)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:216)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:382)
> at
> org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47)
> at
> org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:117)
> at
> org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:540)
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
> Source)
> at
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:584)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:301)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:293)
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:857)
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:857)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> at org.apache.spark.scheduler.Task.run(Task.scala:123)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:410)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:416)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> > update sdk_2level_1 set (name)=("RAMU") where name="LILY";
> INFO : Execution ID: 1871
> Error: java.lang.RuntimeException: Update operation failed. Job aborted due
> to stage failure: Task 0 in stage 605.0 failed 4 times, most recent failure:
> Lost task 0.3 in stage 605.0 (TID 21357, linux, executor 27):
> java.lang.IllegalArgumentException: Pathname
> /user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata
> from
> hdfs://hacluster/user/hive/warehouse/carbon.store/rps/sdk_2level_1hdfs:/hacluster/sdkfiles/twolevelnestedrecwitharray/part-0-188852617294480_batchno0-0-null-188852332673632.carbondata
> is not a valid DFS filename.
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:249)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:332)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:328)
> at
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:340)
> at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:955)
> at
> org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:316)
> at
> org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:293)
> at
> org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:198)
> at
> org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:188)
> at org.apache.carbondata.core.reader.ThriftReader.open(ThriftReader.java:100)
> at
> org.apache.carbondata.core.reader.CarbonHeaderReader.readHeader(CarbonHeaderReader.java:60)
> at
> org.apache.carbondata.core.util.DataFileFooterConverterV3.readDataFileFooter(DataFileFooterConverterV3.java:65)
> at
> org.apache.carbondata.core.util.CarbonUtil.getDataFileFooter(CarbonUtil.java:902)
> at
> org.apache.carbondata.core.util.CarbonUtil.readMetadataFile(CarbonUtil.java:874)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:216)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:382)
> at
> org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47)
> at
> org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:117)
> at
> org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:540)
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
> Source)
> at
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:584)
> at
> org.apache.spark.sql.execution.columnar.CachedRDDBuilder$$anonfun$1$$anon$1.hasNext(InMemoryRelation.scala:125)
> at
> org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
> at
> org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299)
> at
> org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1325)
> at
> org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1316)
> at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1251)
> at
> org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1316)
> at
> org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1042)
> at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:308)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
> at
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
> at org.apache.spark.scheduler.Task.run(Task.scala:123)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:410)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:416)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> > create table external_primitive (id int, name string, rank smallint, salary
> > double, active boolean, dob date, doj timestamp, city string, dept string)
> > stored as carbondata;
> +---------+
> | Result |
> +---------+
> +---------+
> No rows selected (0.448 seconds)
> > insert into external_primitive select
> > 1,"Pr",1,10,true,"1992-12-09","1992-10-07 22:00:20.0","chennai","CSE";
> INFO : Execution ID: 1996
> +---------+
> | Result |
> +---------+
> +---------+
> No rows selected (6.365 seconds)
> >
> > select * from external_primitive;
> INFO : Execution ID: 2002
> +-----+-------+-------+---------+---------+-------------+------------------------+----------+-------+
> | id | name | rank | salary | active | dob | doj | city | dept |
> +-----+-------+-------+---------+---------+-------------+------------------------+----------+-------+
> | 1 | Pr | 1 | 10.0 | true | 1992-12-09 | 1992-10-07 22:00:20.0 | chennai |
> CSE |
> +-----+-------+-------+---------+---------+-------------+------------------------+----------+-------+
> 1 row selected (0.856 seconds)
> >
> > alter table external_primitive add segment
> > options('path'='hdfs://hacluster/sdkfiles/primitive','format'='carbon');
> +---------+
> | Result |
> +---------+
> +---------+
> No rows selected (0.529 seconds)
> > select count(*) from external_primitive;--14;
> INFO : Execution ID: 2008
> +-----------+
> | count(1) |
> +-----------+
> | 14 |
> +-----------+
> 1 row selected (0.845 seconds)
> >
> > delete from external_primitive where id =2;
> INFO : Execution ID: 2010
> Error: java.lang.RuntimeException: Delete data operation is failed. Job
> aborted due to stage failure: Task 0 in stage 685.0 failed 4 times, most
> recent failure: Lost task 0.3 in stage 685.0 (TID 21446, linux-13, executor
> 7): java.lang.IllegalArgumentException: Pathname
> /user/hive/warehouse/carbon.store/rps/external_primitivehdfs:/hacluster/sdkfiles/primitive/part-0-188354604253792_batchno0-0-null-188354313936272.carbondata
> from
> hdfs://hacluster/user/hive/warehouse/carbon.store/rps/external_primitivehdfs:/hacluster/sdkfiles/primitive/part-0-188354604253792_batchno0-0-null-188354313936272.carbondata
> is not a valid DFS filename.
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:249)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:332)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:328)
> at
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:340)
> at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:955)
> at
> org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:316)
> at
> org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getDataInputStream(AbstractDFSCarbonFile.java:293)
> at
> org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:198)
> at
> org.apache.carbondata.core.datastore.impl.FileFactory.getDataInputStream(FileFactory.java:188)
> at org.apache.carbondata.core.reader.ThriftReader.open(ThriftReader.java:100)
> at
> org.apache.carbondata.core.reader.CarbonHeaderReader.readHeader(CarbonHeaderReader.java:60)
> at
> org.apache.carbondata.core.util.DataFileFooterConverterV3.readDataFileFooter(DataFileFooterConverterV3.java:65)
> at
> org.apache.carbondata.core.util.CarbonUtil.getDataFileFooter(CarbonUtil.java:902)
> at
> org.apache.carbondata.core.util.CarbonUtil.readMetadataFile(CarbonUtil.java:874)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:216)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138)
> at
> org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:382)
> at
> org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:43)
> at
> org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:141)
> at
> org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:540)
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown
> Source)
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
> Source)
> at
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:584)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
> at
> org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:132)
> at
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:58)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
> at org.apache.spark.scheduler.Task.run(Task.scala:123)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:410)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:416)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Driver stacktrace: (state=,code=0)
--
This message was sent by Atlassian Jira
(v8.3.4#803005)