[
https://issues.apache.org/jira/browse/CARBONDATA-664?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15894018#comment-15894018
]
Mohammad Shahid Khan commented on CARBONDATA-664:
-------------------------------------------------
closed with PR https://github.com/apache/incubator-carbondata/pull/584
> Select queries fail when BAD_RECORDS_ACTION as FORCED is used in load query.
> ----------------------------------------------------------------------------
>
> Key: CARBONDATA-664
> URL: https://issues.apache.org/jira/browse/CARBONDATA-664
> Project: CarbonData
> Issue Type: Bug
> Components: data-query
> Affects Versions: 1.0.0-incubating
> Environment: Spark 1.6
> Reporter: Harsh Sharma
> Assignee: Mohammad Shahid Khan
> Labels: bug
> Attachments: 100_olap_C20.csv, Driver Logs, Executor Logs
>
> Time Spent: 50m
> Remaining Estimate: 0h
>
> Below scenario is working on Spark 2.1, but not on Spark 1.6
> create table VMALL_DICTIONARY_INCLUDE (imei string,deviceInformationId
> int,MAC string,deviceColor string,device_backColor string,modelId
> string,marketName string,AMSize string,ROMSize string,CUPAudit
> string,CPIClocked string,series string,productionDate timestamp,bomCode
> string,internalModels string, deliveryTime string, channelsId string,
> channelsName string , deliveryAreaId string, deliveryCountry string,
> deliveryProvince string, deliveryCity string,deliveryDistrict string,
> deliveryStreet string, oxSingleNumber string, ActiveCheckTime string,
> ActiveAreaId string, ActiveCountry string, ActiveProvince string, Activecity
> string, ActiveDistrict string, ActiveStreet string, ActiveOperatorId string,
> Active_releaseId string, Active_EMUIVersion string, Active_operaSysVersion
> string, Active_BacVerNumber string, Active_BacFlashVer string,
> Active_webUIVersion string, Active_webUITypeCarrVer
> string,Active_webTypeDataVerNumber string, Active_operatorsVersion string,
> Active_phonePADPartitionedVersions string, Latest_YEAR int, Latest_MONTH int,
> Latest_DAY Decimal(30,10), Latest_HOUR string, Latest_areaId string,
> Latest_country string, Latest_province string, Latest_city string,
> Latest_district string, Latest_street string, Latest_releaseId string,
> Latest_EMUIVersion string, Latest_operaSysVersion string, Latest_BacVerNumber
> string, Latest_BacFlashVer string, Latest_webUIVersion string,
> Latest_webUITypeCarrVer string, Latest_webTypeDataVerNumber string,
> Latest_operatorsVersion string, Latest_phonePADPartitionedVersions string,
> Latest_operatorId string, gamePointDescription string,gamePointId
> double,contractNumber BigInt) STORED BY 'org.apache.carbondata.format'
> TBLPROPERTIES('DICTIONARY_INCLUDE'='imei,deviceInformationId,productionDate,gamePointId,Latest_DAY,contractNumber');
> LOAD DATA INPATH 'hdfs://hadoop-master:54311/data/100_olap_C20.csv' INTO
> table VMALL_DICTIONARY_INCLUDE
> options('DELIMITER'=',','QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription');
> select sum(deviceinformationId) from VMALL_DICTIONARY_INCLUDE where
> deviceColor ='5Device Color' and modelId != '109' or Latest_DAY >
> '1234567890123540.0000000000' and contractNumber == '92233720368547800' or
> Active_operaSysVersion like 'Operating System Version' and gamePointId <=>
> '8.1366141918611E39' and deviceInformationId < '1000000' and productionDate
> not like '2016-07-01' and imei is null and Latest_HOUR is not null and
> channelsId <= '7' and Latest_releaseId >= '1' and Latest_MONTH between 6 and
> 8 and Latest_YEAR not between 2016 and 2017 and Latest_HOUR RLIKE '12' and
> gamePointDescription REGEXP 'Site' and imei in
> ('1AA1','1AA100','1AA10','1AA1000','1AA10000','1AA100000','1AA1000000','1AA100001','1AA100002','1AA100004','','NULL')
> and Active_BacVerNumber not in ('Background version number1','','null');
> This scenario results in the following exception,
> Error: org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 0 in stage 48.0 failed 4 times, most recent failure: Lost task 0.3 in
> stage 48.0 (TID 152, hadoop-master): java.lang.RuntimeException:
> java.util.concurrent.ExecutionException: java.lang.NullPointerException
> at
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:65)
> at
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:35)
> at
> org.apache.carbondata.scan.result.iterator.ChunkRowIterator.<init>(ChunkRowIterator.java:43)
> at
> org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:81)
> at
> org.apache.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:194)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
> at org.apache.spark.scheduler.Task.run(Task.scala:89)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.util.concurrent.ExecutionException:
> java.lang.NullPointerException
> at java.util.concurrent.FutureTask.report(FutureTask.java:122)
> at java.util.concurrent.FutureTask.get(FutureTask.java:192)
> at
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:52)
> ... 34 more
> Caused by: java.lang.NullPointerException
> at
> org.apache.carbondata.scan.result.AbstractScannedResult.getDictionaryKeyIntegerArray(AbstractScannedResult.java:187)
> at
> org.apache.carbondata.scan.result.impl.FilterQueryScannedResult.getDictionaryKeyIntegerArray(FilterQueryScannedResult.java:53)
> at
> org.apache.carbondata.scan.collector.impl.DictionaryBasedResultCollector.collectData(DictionaryBasedResultCollector.java:111)
> at
> org.apache.carbondata.scan.processor.impl.DataBlockIteratorImpl.next(DataBlockIteratorImpl.java:52)
> at
> org.apache.carbondata.scan.processor.impl.DataBlockIteratorImpl.next(DataBlockIteratorImpl.java:33)
> at
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator$1.call(DetailQueryResultIterator.java:78)
> at
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator$1.call(DetailQueryResultIterator.java:72)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ... 3 more
> Driver stacktrace: (state=,code=0)
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)