[ https://issues.apache.org/jira/browse/CARBONDATA-4277?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
PURUJIT CHAUGULE updated CARBONDATA-4277: ----------------------------------------- Description: *Issue 1 : Load on geospatial table from 2.1.0 table in 2.2.0(Spark 2.4.5 and 3.1.1) is failing* *STEPS:-* # create table in CarbonData 2.1.0 : create table source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS carbondata TBLPROPERTIES ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and Spark 3.1.1) clusters # refresh table source_index_2_1_0; # 0: jdbc:hive2://10.21.19.14:23040/default> LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); Error: org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.Exception: DataLoad failure: Data Loading failed for table source_index_2_1_0 at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.Exception: DataLoad failure: Data Loading failed for table source_index_2_1_0 at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:460) at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:226) at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:163) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.$anonfun$run$3(package.scala:162) at org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) at org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:155) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:168) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325) ... 16 more (state=,code=0) *Issue 2 : IN_POLYGON UDF query fails on 2.1.0 geospatial table in 2.2.0(Spark 2.4.5 and 3.1.1)* *STEPS:-* # create table in CarbonData 2.1.0 : create table source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS carbondata TBLPROPERTIES ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and Spark 3.1.1) clusters # refresh table source_index_2_1_0; # 0: jdbc:hive2://10.21.19.14:23040/default> update source_index_2_1_0 set (timevalue)=(1); Error: org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.RuntimeException: Update operation failed. DataLoad failure at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: Update operation failed. DataLoad failure at scala.sys.package$.error(package.scala:30) at org.apache.spark.sql.execution.command.mutation.CarbonProjectForUpdateCommand.processData(CarbonProjectForUpdateCommand.scala:232) at org.apache.spark.sql.execution.command.DataCommand.$anonfun$run$2(package.scala:146) at org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) at org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) at org.apache.spark.sql.execution.command.DataCommand.runWithAudit(package.scala:144) at org.apache.spark.sql.execution.command.DataCommand.run(package.scala:146) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325) ... 16 more (state=,code=0) *Issue 3 : update fails on 2.1.0 geospatial table in 2.2.0(Spark 2.4.5 and 3.1.1)* *STEPS:-*** # create table in CarbonData 2.1.0 : create table source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS carbondata TBLPROPERTIES ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and Spark 3.1.1) clusters # refresh table source_index_2_1_0; # 0: jdbc:hive2://10.21.19.14:23040/default> select timevalue,longitude,latitude from source_index_2_1_0 where IN_POLYGON('116.187332 39.979316,116.325378 39.963129,116.285807 40.084087,116.187332 39.979316'); Error: org.apache.hive.service.cli.HiveSQLException: Error running query: java.io.InvalidClassException: org.apache.carbondata.core.util.CustomIndex; local class incompatible: stream classdesc serialVersionUID = -2993752718803359018, local class serialVersionUID = 4887003928853911686 at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.InvalidClassException: org.apache.carbondata.core.util.CustomIndex; local class incompatible: stream classdesc serialVersionUID = -2993752718803359018, local class serialVersionUID = 4887003928853911686 at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1884) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1750) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1884) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1750) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2041) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1572) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:430) at org.apache.carbondata.core.util.ObjectSerializationUtil.convertStringToObject(ObjectSerializationUtil.java:99) at org.apache.carbondata.core.util.CustomIndex.getCustomInstance(CustomIndex.java:78) at org.apache.carbondata.geo.GeoUtils$.getGeoHashHandler(GeoUtils.scala:47) at org.apache.spark.sql.optimizer.CarbonFilters$.getGeoHashHandler(CarbonFilters.scala:302) at org.apache.spark.sql.optimizer.CarbonFilters$.translateUDF(CarbonFilters.scala:246) at org.apache.spark.sql.optimizer.CarbonFilters$.translateExpression(CarbonFilters.scala:83) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.$anonfun$selectFilters$3(CarbonSourceStrategy.scala:470) at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245) at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242) at scala.collection.immutable.List.flatMap(List.scala:355) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.selectFilters(CarbonSourceStrategy.scala:456) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.pruneFilterProject(CarbonSourceStrategy.scala:166) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.apply(CarbonSourceStrategy.scala:61) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78) at scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:162) at scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:162) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:162) at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:160) at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1429) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) at org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:391) at org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:104) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:104) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:97) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:117) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:117) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:110) at org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$5(QueryExecution.scala:225) at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:487) at org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:225) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:240) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:233) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:328) ... 16 more (state=,code=0) was: *Issue 1 : Load on geo table from 2.1.0 table in 2.2.0(Spark 2.4.5 and 3.1.1) is failing* *STEPS:-* # create table in CarbonData 2.1.0 : create table source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS carbondata TBLPROPERTIES ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and Spark 3.1.1) clusters # refresh table source_index_2_1_0; # 0: jdbc:hive2://10.21.19.14:23040/default> LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); Error: org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.Exception: DataLoad failure: Data Loading failed for table source_index_2_1_0 at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.Exception: DataLoad failure: Data Loading failed for table source_index_2_1_0 at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:460) at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:226) at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:163) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.$anonfun$run$3(package.scala:162) at org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) at org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:155) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:168) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325) ... 16 more (state=,code=0) *Issue 2 : IN_POLYGON UDF query fails on 2.1.0 table in 2.2.0(Spark 2.4.5 and 3.1.1)* *STEPS:-* # create table in CarbonData 2.1.0 : create table source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS carbondata TBLPROPERTIES ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and Spark 3.1.1) clusters # refresh table source_index_2_1_0; # 0: jdbc:hive2://10.21.19.14:23040/default> update source_index_2_1_0 set (timevalue)=(1); Error: org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.RuntimeException: Update operation failed. DataLoad failure at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: Update operation failed. DataLoad failure at scala.sys.package$.error(package.scala:30) at org.apache.spark.sql.execution.command.mutation.CarbonProjectForUpdateCommand.processData(CarbonProjectForUpdateCommand.scala:232) at org.apache.spark.sql.execution.command.DataCommand.$anonfun$run$2(package.scala:146) at org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) at org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) at org.apache.spark.sql.execution.command.DataCommand.runWithAudit(package.scala:144) at org.apache.spark.sql.execution.command.DataCommand.run(package.scala:146) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325) ... 16 more (state=,code=0) *Issue 3 : update fails on 2.1.0 table in 2.2.0(Spark 2.4.5 and 3.1.1)* *STEPS:-*** # create table in CarbonData 2.1.0 : create table source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS carbondata TBLPROPERTIES ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and Spark 3.1.1) clusters # refresh table source_index_2_1_0; # 0: jdbc:hive2://10.21.19.14:23040/default> select timevalue,longitude,latitude from source_index_2_1_0 where IN_POLYGON('116.187332 39.979316,116.325378 39.963129,116.285807 40.084087,116.187332 39.979316'); Error: org.apache.hive.service.cli.HiveSQLException: Error running query: java.io.InvalidClassException: org.apache.carbondata.core.util.CustomIndex; local class incompatible: stream classdesc serialVersionUID = -2993752718803359018, local class serialVersionUID = 4887003928853911686 at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.InvalidClassException: org.apache.carbondata.core.util.CustomIndex; local class incompatible: stream classdesc serialVersionUID = -2993752718803359018, local class serialVersionUID = 4887003928853911686 at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1884) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1750) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1884) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1750) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2041) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1572) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:430) at org.apache.carbondata.core.util.ObjectSerializationUtil.convertStringToObject(ObjectSerializationUtil.java:99) at org.apache.carbondata.core.util.CustomIndex.getCustomInstance(CustomIndex.java:78) at org.apache.carbondata.geo.GeoUtils$.getGeoHashHandler(GeoUtils.scala:47) at org.apache.spark.sql.optimizer.CarbonFilters$.getGeoHashHandler(CarbonFilters.scala:302) at org.apache.spark.sql.optimizer.CarbonFilters$.translateUDF(CarbonFilters.scala:246) at org.apache.spark.sql.optimizer.CarbonFilters$.translateExpression(CarbonFilters.scala:83) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.$anonfun$selectFilters$3(CarbonSourceStrategy.scala:470) at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245) at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242) at scala.collection.immutable.List.flatMap(List.scala:355) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.selectFilters(CarbonSourceStrategy.scala:456) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.pruneFilterProject(CarbonSourceStrategy.scala:166) at org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.apply(CarbonSourceStrategy.scala:61) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78) at scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:162) at scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:162) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:162) at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:160) at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1429) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) at org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:391) at org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:104) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:104) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:97) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:117) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:117) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:110) at org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$5(QueryExecution.scala:225) at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:487) at org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:225) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:240) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:233) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:328) ... 16 more (state=,code=0) > Compatibility Issue of GeoSpatial table of CarbonData 2.1.0 in CarbonData > 2.2.0 (Spark 2.4.5 and Spark 3.1.1) > ------------------------------------------------------------------------------------------------------------- > > Key: CARBONDATA-4277 > URL: https://issues.apache.org/jira/browse/CARBONDATA-4277 > Project: CarbonData > Issue Type: Bug > Affects Versions: 2.2.0 > Environment: Spark 2.4.5 > Spark 3.1.1 > Reporter: PURUJIT CHAUGULE > Priority: Minor > > > > *Issue 1 : Load on geospatial table from 2.1.0 table in 2.2.0(Spark 2.4.5 and > 3.1.1) is failing* > *STEPS:-* > # create table in CarbonData 2.1.0 : create table > source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS > carbondata TBLPROPERTIES > ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, > > latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); > # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO > TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', > 'QUOTECHAR'='|'); > # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and > Spark 3.1.1) clusters > # refresh table source_index_2_1_0; > # 0: jdbc:hive2://10.21.19.14:23040/default> LOAD DATA INPATH > 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO TABLE > source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', 'QUOTECHAR'='|'); > Error: org.apache.hive.service.cli.HiveSQLException: Error running query: > java.lang.Exception: DataLoad failure: Data Loading failed for table > source_index_2_1_0 > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.Exception: DataLoad failure: Data Loading failed for > table source_index_2_1_0 > at > org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:460) > at > org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:226) > at > org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:163) > at > org.apache.spark.sql.execution.command.AtomicRunnableCommand.$anonfun$run$3(package.scala:162) > at > org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) > at > org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) > at > org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:155) > at > org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:168) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228) > at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) > at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) > at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325) > ... 16 more (state=,code=0) > *Issue 2 : IN_POLYGON UDF query fails on 2.1.0 geospatial table in > 2.2.0(Spark 2.4.5 and 3.1.1)* > *STEPS:-* > # create table in CarbonData 2.1.0 : create table > source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS > carbondata TBLPROPERTIES > ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, > > latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); > # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO > TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', > 'QUOTECHAR'='|'); > # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and > Spark 3.1.1) clusters > # refresh table source_index_2_1_0; > # 0: jdbc:hive2://10.21.19.14:23040/default> update source_index_2_1_0 set > (timevalue)=(1); > Error: org.apache.hive.service.cli.HiveSQLException: Error running query: > java.lang.RuntimeException: Update operation failed. DataLoad failure > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: Update operation failed. DataLoad > failure > at scala.sys.package$.error(package.scala:30) > at > org.apache.spark.sql.execution.command.mutation.CarbonProjectForUpdateCommand.processData(CarbonProjectForUpdateCommand.scala:232) > at > org.apache.spark.sql.execution.command.DataCommand.$anonfun$run$2(package.scala:146) > at > org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) > at > org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) > at > org.apache.spark.sql.execution.command.DataCommand.runWithAudit(package.scala:144) > at org.apache.spark.sql.execution.command.DataCommand.run(package.scala:146) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228) > at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) > at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) > at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325) > ... 16 more (state=,code=0) > *Issue 3 : update fails on 2.1.0 geospatial table in 2.2.0(Spark 2.4.5 and > 3.1.1)* > *STEPS:-*** > # create table in CarbonData 2.1.0 : create table > source_index_2_1_0(TIMEVALUE BIGINT,LONGITUDE long,LATITUDE long) STORED AS > carbondata TBLPROPERTIES > ('SPATIAL_INDEX.mygeohash.type'='geohash','SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, > > latitude','SPATIAL_INDEX.mygeohash.originLatitude'='39.930753','SPATIAL_INDEX.mygeohash.gridSize'='50','SPATIAL_INDEX.mygeohash.minLongitude'='116.176090','SPATIAL_INDEX.mygeohash.maxLongitude'='116.736367','SPATIAL_INDEX.mygeohash.minLatitude'='39.930753','SPATIAL_INDEX.mygeohash.maxLatitude'='40.179415','SPATIAL_INDEX'='mygeohash','SPATIAL_INDEX.mygeohash.conversionRatio'='1000000'); > # LOAD DATA INPATH 'hdfs://hacluster/chetan/f_lcov_50basic_data.csv' INTO > TABLE source_index_2_1_0 OPTIONS('HEADER'='true','DELIMITER'='|', > 'QUOTECHAR'='|'); > # Take store of table the place in hdfs of CarbonData 2.2.0(Spark 2.4.5 and > Spark 3.1.1) clusters > # refresh table source_index_2_1_0; > # 0: jdbc:hive2://10.21.19.14:23040/default> select > timevalue,longitude,latitude from source_index_2_1_0 where > IN_POLYGON('116.187332 39.979316,116.325378 39.963129,116.285807 > 40.084087,116.187332 39.979316'); > Error: org.apache.hive.service.cli.HiveSQLException: Error running query: > java.io.InvalidClassException: org.apache.carbondata.core.util.CustomIndex; > local class incompatible: stream classdesc serialVersionUID = > -2993752718803359018, local class serialVersionUID = 4887003928853911686 > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.InvalidClassException: > org.apache.carbondata.core.util.CustomIndex; local class incompatible: stream > classdesc serialVersionUID = -2993752718803359018, local class > serialVersionUID = 4887003928853911686 > at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) > at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1884) > at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1750) > at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1884) > at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1750) > at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2041) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1572) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:430) > at > org.apache.carbondata.core.util.ObjectSerializationUtil.convertStringToObject(ObjectSerializationUtil.java:99) > at > org.apache.carbondata.core.util.CustomIndex.getCustomInstance(CustomIndex.java:78) > at org.apache.carbondata.geo.GeoUtils$.getGeoHashHandler(GeoUtils.scala:47) > at > org.apache.spark.sql.optimizer.CarbonFilters$.getGeoHashHandler(CarbonFilters.scala:302) > at > org.apache.spark.sql.optimizer.CarbonFilters$.translateUDF(CarbonFilters.scala:246) > at > org.apache.spark.sql.optimizer.CarbonFilters$.translateExpression(CarbonFilters.scala:83) > at > org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.$anonfun$selectFilters$3(CarbonSourceStrategy.scala:470) > at > scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245) > at scala.collection.immutable.List.foreach(List.scala:392) > at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245) > at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242) > at scala.collection.immutable.List.flatMap(List.scala:355) > at > org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.selectFilters(CarbonSourceStrategy.scala:456) > at > org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.pruneFilterProject(CarbonSourceStrategy.scala:166) > at > org.apache.spark.sql.execution.strategy.CarbonSourceStrategy$.apply(CarbonSourceStrategy.scala:61) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) > at > org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78) > at > scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:162) > at > scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:162) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) > at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:162) > at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:160) > at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1429) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) > at > org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) > at > org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:391) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:104) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:104) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:97) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:117) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:117) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:110) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$5(QueryExecution.scala:225) > at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:487) > at > org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:225) > at > org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:240) > at > org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:233) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:328) > ... 16 more (state=,code=0) > -- This message was sent by Atlassian Jira (v8.3.4#803005)