After MAJOR index lost

yixu2001 Mon, 30 Oct 2017 03:26:28 -0700

dev 
environment  spark.2.1.1 carbondata 1.1.1  hadoop 2.7.2

run  ALTER table  e_carbon.prod_inst_all_c COMPACT 'MAJOR'
CLEAN FILES FOR TABLE  e_carbon.prod_inst_all_c


17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception 
occurred:File does not exist: 
hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex


623_batchno0-0-1509233118616.carbonindex  and  
625_batchno0-0-1509233731459.carbonindex betwwen the lost  of 
624_batchno0-0-1509233731459.carbonindex

-rw-r--r--   3 e_carbon e_carbon_group       6750 2017-10-29 07:17 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      11320 2017-10-29 07:19 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group       6858 2017-10-29 07:35 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      11423 2017-10-29 07:37 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex

scala> cc.sql("select his_id,count(*) from e_carbon.prod_inst_his_c group by 
his_id having count(*)>1").show
17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception 
occurred:File does not exist: 
hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(his_id#1818, 100)
+- *HashAggregate(keys=[his_id#1818], functions=[partial_count(1), 
partial_count(1)], output=[his_id#1818, count#1967L, count#1968L])
   +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :e_carbon, 
Table name :prod_inst_his_c, Schema 
:Some(StructType(StructField(his_id,StringType,true), 
StructField(ETL_date,StringType,true), 
StructField(prod_inst_id,StringType,true), 
StructField(owner_cust_id,StringType,true), 
StructField(acc_prod_inst_id,StringType,true), 
StructField(DVERSION,StringType,true), StructField(GTID,StringType,true), 
StructField(IND,StringType,true), StructField(ODS_STATE,StringType,true), 
StructField(SRC,StringType,true), StructField(kafka_date,StringType,true), 
StructField(product_id,StringType,true), 
StructField(address_id,StringType,true), 
StructField(payment_mode_cd,StringType,true), 
StructField(product_password,StringType,true), 
StructField(important_level,StringType,true), 
StructField(area_code,StringType,true), StructField(acc_nbr,StringType,true), 
StructField(exch_id,StringType,true), 
StructField(common_region_id,StringType,true), 
StructField(remark,StringType,true), StructField(pay_cycle,StringType,true), 
StructField(begin_rent_time,StringType,true), 
StructField(stop_rent_time,StringType,true), 
StructField(finish_time,StringType,true), 
StructField(stop_status,StringType,true), 
StructField(status_cd,StringType,true), 
StructField(create_date,StringType,true), 
StructField(status_date,StringType,true), 
StructField(update_date,StringType,true), 
StructField(proc_serial,StringType,true), 
StructField(use_cust_id,StringType,true), 
StructField(ext_prod_inst_id,StringType,true), 
StructField(address_desc,StringType,true), 
StructField(area_id,StringType,true), 
StructField(update_staff,StringType,true), 
StructField(create_staff,StringType,true), 
StructField(rec_update_date,StringType,true), 
StructField(account,StringType,true), StructField(version,StringType,true), 
StructField(community_id,StringType,true), 
StructField(ext_acc_prod_inst_id,StringType,true), 
StructField(distributor_id,StringType,true), 
StructField(sharding_id,StringType,true))) ] 
e_carbon.prod_inst_his_c[his_id#1818]

  at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  at 
org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112)
  at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  at 
org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235)
  at 
org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141)
  at 
org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:124)
  at 
org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:42)
  at 
org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368)
  at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  at 
org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225)
  at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308)
  at 
org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
  at 
org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386)
  at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
  at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788)
  at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385)
  at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127)
  at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818)
  at org.apache.spark.sql.Dataset.head(Dataset.scala:2127)
  at org.apache.spark.sql.Dataset.take(Dataset.scala:2342)
  at org.apache.spark.sql.Dataset.showString(Dataset.scala:248)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:638)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:597)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:606)
  ... 50 elided
Caused by: java.io.IOException: Problem in loading segment block.
  at 
org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:102)
  at 
org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:54)
  at 
org.apache.carbondata.hadoop.CacheAccessClient.get(CacheAccessClient.java:67)
  at 
org.apache.carbondata.hadoop.CarbonInputFormat.getSegmentAbstractIndexs(CarbonInputFormat.java:543)
  at 
org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:382)
  at 
org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:321)
  at 
org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262)
  at 
org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  at 
org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  at 
org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)
  at 
org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261)
  at 
org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84)
  at 
org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121)
  at 
org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112)
  at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  ... 85 more
Caused by: java.lang.NullPointerException
  at 
org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getSize(AbstractDFSCarbonFile.java:113)
  at 
org.apache.carbondata.core.util.CarbonUtil.calculateDriverBTreeSize(CarbonUtil.java:1009)
  at 
org.apache.carbondata.core.datastore.SegmentTaskIndexStore.calculateRequiredSize(SegmentTaskIndexStore.java:304)
  at 
org.apache.carbondata.core.datastore.SegmentTaskIndexStore.loadAndGetTaskIdToSegmentsMap(SegmentTaskIndexStore.java:236)
  at 
org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:96)
  ... 112 more



[e_carbon@HETL032 testdata]$ hdfs dfs -ls 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/*.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      44859 2017-10-25 14:53 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/0_batchno0-0-1508912751699.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      43893 2017-10-25 14:55 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/1_batchno0-0-1508912751699.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      42684 2017-10-25 14:55 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/2_batchno0-0-1508912751699.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      40751 2017-10-25 14:43 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/3_batchno0-0-1508912751699.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      40212 2017-10-25 14:54 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/4_batchno0-0-1508912751699.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      11249 2017-10-29 06:37 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/618_batchno0-0-1509230160810.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group       7197 2017-10-29 06:53 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/619_batchno0-0-1509230585040.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      11516 2017-10-29 06:55 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/620_batchno0-0-1509231215665.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group       6750 2017-10-29 07:17 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      11320 2017-10-29 07:19 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group       6858 2017-10-29 07:35 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex
-rw-r--r--   3 e_carbon e_carbon_group      11423 2017-10-29 07:37 
/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex
 


yixu2001

After MAJOR index lost

Reply via email to