[
https://issues.apache.org/jira/browse/CARBONDATA-623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Payal closed CARBONDATA-623.
----------------------------
> If we drop table after this condition ---(Firstly we load data in table with
> single pass true and use kettle false and then in same table load data 2nd
> time with single pass true and use kettle false ), it is throwing Error:
> java.lang.NullPointerException
> ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
> Key: CARBONDATA-623
> URL: https://issues.apache.org/jira/browse/CARBONDATA-623
> Project: CarbonData
> Issue Type: Bug
> Components: data-load
> Affects Versions: 1.0.0-incubating
> Environment: spark-1.6
> Reporter: Payal
> Priority: Minor
> Attachments: 7000_UniqData.csv
>
>
> 1.Firstly we load data in table with single pass true and use kettle false
> data load successfully and we are getting result set properly.
> 2.then in same table load data in table with single pass true and use kettle
> false data load successfully and we are getting result set properly.
> 3.But after that if we drop the table ,its is throwing null pointer exception.
> Queries
> 0: jdbc:hive2://hadoop-master:10000> CREATE TABLE uniqdata_INCLUDEDICTIONARY
> (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ
> timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1
> decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double,
> Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED BY
> 'org.apache.carbondata.format'
> TBLPROPERTIES('DICTIONARY_INCLUDE'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');
> +---------+--+
> | Result |
> +---------+--+
> +---------+--+
> No rows selected (1.13 seconds)
> 0: jdbc:hive2://hadoop-master:10000> LOAD DATA INPATH
> 'hdfs://hadoop-master:54311/data/uniqdata/7000_UniqData.csv' into table
> uniqdata_INCLUDEDICTIONARY OPTIONS('DELIMITER'=',' ,
> 'QUOTECHAR'='"','BAD_RECORDS_LOGGER_ENABLE'='TRUE',
> 'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1','SINGLE_PASS'='false','USE_KETTLE'
> ='false');
> +---------+--+
> | Result |
> +---------+--+
> +---------+--+
> No rows selected (22.814 seconds)
> 0: jdbc:hive2://hadoop-master:10000>
> 0: jdbc:hive2://hadoop-master:10000> select count (distinct CUST_NAME) from
> uniqdata_INCLUDEDICTIONARY ;
> +-------+--+
> | _c0 |
> +-------+--+
> | 7002 |
> +-------+--+
> 1 row selected (3.055 seconds)
> 0: jdbc:hive2://hadoop-master:10000> select count(CUST_NAME) from
> uniqdata_INCLUDEDICTIONARY ;
> +-------+--+
> | _c0 |
> +-------+--+
> | 7013 |
> +-------+--+
> 1 row selected (0.366 seconds)
> 0: jdbc:hive2://hadoop-master:10000> LOAD DATA INPATH
> 'hdfs://hadoop-master:54311/data/uniqdata/7000_UniqData.csv' into table
> uniqdata_INCLUDEDICTIONARY OPTIONS('DELIMITER'=',' ,
> 'QUOTECHAR'='"','BAD_RECORDS_LOGGER_ENABLE'='TRUE',
> 'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1','SINGLE_PASS'='true','USE_KETTLE'
> ='false');
> +---------+--+
> | Result |
> +---------+--+
> +---------+--+
> No rows selected (4.837 seconds)
> 0: jdbc:hive2://hadoop-master:10000> select count(CUST_NAME) from
> uniqdata_INCLUDEDICTIONARY ;
> +--------+--+
> | _c0 |
> +--------+--+
> | 14026 |
> +--------+--+
> 1 row selected (0.458 seconds)
> 0: jdbc:hive2://hadoop-master:10000> select count (distinct CUST_NAME) from
> uniqdata_INCLUDEDICTIONARY ;
> +-------+--+
> | _c0 |
> +-------+--+
> | 7002 |
> +-------+--+
> 1 row selected (3.173 seconds)
> 0: jdbc:hive2://hadoop-master:10000> drop table uniqdata_includedictionary;
> Error: java.lang.NullPointerException (state=,code=0)
> Logs
> WARN 11-01 12:56:52,722 - Lost task 0.0 in stage 61.0 (TID 1740,
> hadoop-slave-2): FetchFailed(BlockManagerId(0, hadoop-slave-3, 45331),
> shuffleId=22, mapId=0, reduceId=0, message=
> org.apache.spark.shuffle.FetchFailedException: Failed to connect to
> hadoop-slave-3:45331
> at
> org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:323)
> at
> org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:300)
> at
> org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:51)
> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
> at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
> at
> org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
> at
> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:504)
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.<init>(TungstenAggregationIterator.scala:686)
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:95)
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:86)
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
> at org.apache.spark.scheduler.Task.run(Task.scala:89)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.IOException: Failed to connect to hadoop-slave-3:45331
> at
> org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:216)
> at
> org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:167)
> at
> org.apache.spark.network.netty.NettyBlockTransferService$$anon$1.createAndStart(NettyBlockTransferService.scala:90)
> at
> org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:140)
> at
> org.apache.spark.network.shuffle.RetryingBlockFetcher.access$200(RetryingBlockFetcher.java:43)
> at
> org.apache.spark.network.shuffle.RetryingBlockFetcher$1.run(RetryingBlockFetcher.java:170)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ... 3 more
> Caused by: java.nio.channels.UnresolvedAddressException
> at sun.nio.ch.Net.checkAddress(Net.java:101)
> at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:622)
> at
> io.netty.channel.socket.nio.NioSocketChannel.doConnect(NioSocketChannel.java:209)
> at
> io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.connect(AbstractNioChannel.java:207)
> at
> io.netty.channel.DefaultChannelPipeline$HeadContext.connect(DefaultChannelPipeline.java:1097)
> at
> io.netty.channel.AbstractChannelHandlerContext.invokeConnect(AbstractChannelHandlerContext.java:471)
> at
> io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:456)
> at
> io.netty.channel.ChannelOutboundHandlerAdapter.connect(ChannelOutboundHandlerAdapter.java:47)
> at
> io.netty.channel.AbstractChannelHandlerContext.invokeConnect(AbstractChannelHandlerContext.java:471)
> at
> io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:456)
> at
> io.netty.channel.ChannelDuplexHandler.connect(ChannelDuplexHandler.java:50)
> at
> io.netty.channel.AbstractChannelHandlerContext.invokeConnect(AbstractChannelHandlerContext.java:471)
> at
> io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:456)
> at
> io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:438)
> at
> io.netty.channel.DefaultChannelPipeline.connect(DefaultChannelPipeline.java:908)
> at io.netty.channel.AbstractChannel.connect(AbstractChannel.java:203)
> at io.netty.bootstrap.Bootstrap$2.run(Bootstrap.java:166)
> at
> io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:357)
> at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:357)
> at
> io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
> ... 1 more
> AUDIT 11-01 12:59:57,699 - [deepak-Vostro-3546][hduser][Thread-631]Deleted
> table [uniqdata_includedictionary] under database [default]
> ERROR 11-01 12:59:57,702 - pool-26-thread-55 Exception occured:File does not
> exist:
> hdfs://hadoop-master:54311/opt/carbonStore/default/uniqdata_includedictionary/droptable.lock
> ERROR 11-01 12:59:57,702 - Error executing query, currentState RUNNING,
> java.lang.NullPointerException
> at
> org.apache.carbondata.core.datastorage.store.filesystem.AbstractDFSCarbonFile.delete(AbstractDFSCarbonFile.java:128)
> at
> org.apache.carbondata.lcm.locks.HdfsFileLock.unlock(HdfsFileLock.java:110)
> at
> org.apache.spark.sql.execution.command.DropTableCommand.run(carbonTableSchema.scala:613)
> at
> org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
> at
> org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
> at
> org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
> at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130)
> at
> org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
> at
> org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
> at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145)
> at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130)
> at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:137)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:211)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:154)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:151)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:164)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> ERROR 11-01 12:59:57,702 - Error running hive query:
> org.apache.hive.service.cli.HiveSQLException: java.lang.NullPointerException
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:246)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:154)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:151)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
> at
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:164)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)