[ 
https://issues.apache.org/jira/browse/CARBONDATA-664?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ravindra Pesala updated CARBONDATA-664:
---------------------------------------
    Assignee: Mohammad Shahid Khan

> Select queries fail when BAD_RECORDS_ACTION as FORCED is used in load query.
> ----------------------------------------------------------------------------
>
>                 Key: CARBONDATA-664
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-664
>             Project: CarbonData
>          Issue Type: Bug
>          Components: data-query
>    Affects Versions: 1.0.0-incubating
>         Environment: Spark 1.6
>            Reporter: Harsh Sharma
>            Assignee: Mohammad Shahid Khan
>              Labels: bug
>         Attachments: 100_olap_C20.csv, Driver Logs, Executor Logs
>
>
> Below scenario is working on Spark 2.1, but not on Spark 1.6
> create table VMALL_DICTIONARY_INCLUDE (imei string,deviceInformationId 
> int,MAC string,deviceColor string,device_backColor string,modelId 
> string,marketName string,AMSize string,ROMSize string,CUPAudit 
> string,CPIClocked string,series string,productionDate timestamp,bomCode 
> string,internalModels string, deliveryTime string, channelsId string, 
> channelsName string , deliveryAreaId string, deliveryCountry string, 
> deliveryProvince string, deliveryCity string,deliveryDistrict string, 
> deliveryStreet string, oxSingleNumber string, ActiveCheckTime string, 
> ActiveAreaId string, ActiveCountry string, ActiveProvince string, Activecity 
> string, ActiveDistrict string, ActiveStreet string, ActiveOperatorId string, 
> Active_releaseId string, Active_EMUIVersion string, Active_operaSysVersion 
> string, Active_BacVerNumber string, Active_BacFlashVer string, 
> Active_webUIVersion string, Active_webUITypeCarrVer 
> string,Active_webTypeDataVerNumber string, Active_operatorsVersion string, 
> Active_phonePADPartitionedVersions string, Latest_YEAR int, Latest_MONTH int, 
> Latest_DAY Decimal(30,10), Latest_HOUR string, Latest_areaId string, 
> Latest_country string, Latest_province string, Latest_city string, 
> Latest_district string, Latest_street string, Latest_releaseId string, 
> Latest_EMUIVersion string, Latest_operaSysVersion string, Latest_BacVerNumber 
> string, Latest_BacFlashVer string, Latest_webUIVersion string, 
> Latest_webUITypeCarrVer string, Latest_webTypeDataVerNumber string, 
> Latest_operatorsVersion string, Latest_phonePADPartitionedVersions string, 
> Latest_operatorId string, gamePointDescription string,gamePointId 
> double,contractNumber BigInt) STORED BY 'org.apache.carbondata.format' 
> TBLPROPERTIES('DICTIONARY_INCLUDE'='imei,deviceInformationId,productionDate,gamePointId,Latest_DAY,contractNumber');
> LOAD DATA INPATH 'hdfs://hadoop-master:54311/data/100_olap_C20.csv' INTO 
> table VMALL_DICTIONARY_INCLUDE 
> options('DELIMITER'=',','QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription');
> select sum(deviceinformationId) from VMALL_DICTIONARY_INCLUDE where 
> deviceColor ='5Device Color' and modelId != '109' or Latest_DAY > 
> '1234567890123540.0000000000' and contractNumber == '92233720368547800' or 
> Active_operaSysVersion like 'Operating System Version' and gamePointId <=> 
> '8.1366141918611E39' and deviceInformationId < '1000000' and productionDate 
> not like '2016-07-01' and imei is null and Latest_HOUR is not null and 
> channelsId <= '7' and Latest_releaseId >= '1' and Latest_MONTH between 6 and 
> 8 and Latest_YEAR not between 2016 and 2017 and Latest_HOUR RLIKE '12' and 
> gamePointDescription REGEXP 'Site' and imei in 
> ('1AA1','1AA100','1AA10','1AA1000','1AA10000','1AA100000','1AA1000000','1AA100001','1AA100002','1AA100004','','NULL')
>  and Active_BacVerNumber not in ('Background version number1','','null');
> This scenario results in the following exception,
> Error: org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task 0 in stage 48.0 failed 4 times, most recent failure: Lost task 0.3 in 
> stage 48.0 (TID 152, hadoop-master): java.lang.RuntimeException: 
> java.util.concurrent.ExecutionException: java.lang.NullPointerException
>       at 
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:65)
>       at 
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:35)
>       at 
> org.apache.carbondata.scan.result.iterator.ChunkRowIterator.<init>(ChunkRowIterator.java:43)
>       at 
> org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:81)
>       at 
> org.apache.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:194)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>       at org.apache.spark.scheduler.Task.run(Task.scala:89)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.util.concurrent.ExecutionException: 
> java.lang.NullPointerException
>       at java.util.concurrent.FutureTask.report(FutureTask.java:122)
>       at java.util.concurrent.FutureTask.get(FutureTask.java:192)
>       at 
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:52)
>       ... 34 more
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.carbondata.scan.result.AbstractScannedResult.getDictionaryKeyIntegerArray(AbstractScannedResult.java:187)
>       at 
> org.apache.carbondata.scan.result.impl.FilterQueryScannedResult.getDictionaryKeyIntegerArray(FilterQueryScannedResult.java:53)
>       at 
> org.apache.carbondata.scan.collector.impl.DictionaryBasedResultCollector.collectData(DictionaryBasedResultCollector.java:111)
>       at 
> org.apache.carbondata.scan.processor.impl.DataBlockIteratorImpl.next(DataBlockIteratorImpl.java:52)
>       at 
> org.apache.carbondata.scan.processor.impl.DataBlockIteratorImpl.next(DataBlockIteratorImpl.java:33)
>       at 
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator$1.call(DetailQueryResultIterator.java:78)
>       at 
> org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator$1.call(DetailQueryResultIterator.java:72)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       ... 3 more
> Driver stacktrace: (state=,code=0)



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to