[CARBONDATA-3077] Fixed query failure in fileformat due stale cache issue

Problem
While using FileFormat API, if a table created, dropped and then recreated with 
the same name the query fails because of schema mismatch issue

Analysis
In case of carbondata used through FileFormat API, once a table is dropped and 
recreated with the same name again then because the dataMap contains the stale 
carbon table schema mismatch exception is thrown

Solution
To avoid such scenarios it is always better to update the carbon table object 
retrieved

This closes #2898


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e4843d76
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e4843d76
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e4843d76

Branch: refs/heads/branch-1.5
Commit: e4843d76907c1f2a9062a696bdb45bb86c6637a8
Parents: 6793274
Author: m00258959 <manish.gu...@huawei.com>
Authored: Mon Nov 5 15:45:46 2018 +0530
Committer: ravipesala <ravi.pes...@gmail.com>
Committed: Wed Nov 21 22:43:46 2018 +0530

----------------------------------------------------------------------
 .../core/datamap/DataMapStoreManager.java       |  6 ++
 .../core/datamap/dev/DataMapFactory.java        |  4 +
 .../src/test/resources/vardhandaterestruct.csv  | 99 ++++++++++++++++++++
 .../datasource/SparkCarbonDataSourceTest.scala  | 18 ++++
 4 files changed, 127 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/e4843d76/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index 75290d3..baf4739 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -345,6 +345,12 @@ public final class DataMapStoreManager {
     if (dataMap == null) {
       throw new RuntimeException("Datamap does not exist");
     }
+    // This is done to handle the scenario of stale cache because of which 
schema mismatch
+    // exception can be thrown. Scenario: In case of carbondata used through 
FileFormat API,
+    // once a table is dropped and recreated with the same name again then 
because the dataMap
+    // contains the stale carbon table schema mismatch exception is thrown. To 
avoid such scenarios
+    // it is always better to update the carbon table object retrieved
+    dataMap.getDataMapFactory().setCarbonTable(table);
     return dataMap;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e4843d76/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index de8dc58..ee7914d 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -54,6 +54,10 @@ public abstract class DataMapFactory<T extends DataMap> {
     return carbonTable;
   }
 
+  public void setCarbonTable(CarbonTable carbonTable) {
+    this.carbonTable = carbonTable;
+  }
+
   public DataMapSchema getDataMapSchema() {
     return dataMapSchema;
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e4843d76/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv
----------------------------------------------------------------------
diff --git 
a/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv 
b/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv
new file mode 100644
index 0000000..daa7c38
--- /dev/null
+++ b/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv
@@ -0,0 +1,99 @@
+1AA1,1,8RAM size,4,Chinese,guangzhou,2738,2014-07-01 12:07:28,2014-07-01 
12:07:28,25
+1AA10,10,4RAM size,4,Chinese,wuhan,1714,2014-07-02 12:07:28,2014-07-02 
12:07:28,26
+1AA100,100,7RAM size,6,Chinese,yichang,1271,2014-07-03 12:07:28,2014-07-03 
12:07:28,27
+1AA1000,1000,5RAM size,3,Chinese,changsha,692,2014-07-04 12:07:28,2014-07-04 
12:07:28,28
+1AA10000,10000,1RAM size,1,Chinese,xiangtan,2175,2015-07-05 
12:07:28,2015-07-05 12:07:28,29
+1AA100000,100000,4RAM size,6,Chinese,yichang,136,2015-07-06 
12:07:28,2015-07-06 12:07:28,30
+1AA1000000,1000000,4RAM size,6,Chinese,xiangtan,1600,2015-07-07 
12:07:28,2015-07-07 12:07:28,31
+1AA100001,100001,7RAM size,7,Chinese,wuhan,505,2014-07-08 12:07:28,2014-07-08 
12:07:28,32
+1AA100002,100002,0RAM size,3,Chinese,zhuzhou,1341,2015-07-09 
12:07:28,2015-07-09 12:07:28,33
+1AA100003,100003,7RAM size,3,Chinese,zhuzhou,2239,2015-07-10 
12:07:28,2015-07-10 12:07:28,34
+1AA100004,100004,8RAM size,2,Chinese,wuhan,2970,2014-07-11 12:07:28,2014-07-11 
12:07:28,35
+1AA100005,100005,0RAM size,1,Chinese,changsha,2593,2015-07-12 
12:07:28,2015-07-12 12:07:28,36
+1AA100006,100006,4RAM size,2,Chinese,changsha,2572,2015-07-13 
12:07:28,2015-07-13 12:07:28,37
+1AA100007,100007,4RAM size,3,Chinese,xiangtan,1991,2015-07-14 
12:07:28,2015-07-14 12:07:28,38
+1AA100008,100008,0RAM size,1,Chinese,wuhan,1442,2015-07-15 12:07:28,2015-07-15 
12:07:28,39
+1AA100009,100009,4RAM size,3,Chinese,xiangtan,1841,2014-07-16 
12:07:28,2014-07-16 12:07:28,40
+1AA10001,10001,6RAM size,5,Chinese,xiangtan,298,2015-07-17 12:07:28,2015-07-17 
12:07:28,41
+1AA100010,100010,0RAM size,6,Chinese,guangzhou,79,2015-07-18 
12:07:28,2015-07-18 12:07:28,42
+1AA100011,100011,1RAM size,1,Chinese,xiangtan,202,2015-07-19 
12:07:28,2015-07-19 12:07:28,43
+1AA100012,100012,6RAM size,2,Chinese,xiangtan,568,2015-07-20 
12:07:28,2015-07-20 12:07:28,44
+1AA100013,100013,8RAM size,6,Chinese,shenzhen,355,2015-07-21 
12:07:28,2015-07-21 12:07:28,45
+1AA100014,100014,7RAM size,3,Chinese,changsha,151,2015-07-22 
12:07:28,2015-07-22 12:07:28,46
+1AA100015,100015,3RAM size,1,Chinese,changsha,2863,2015-07-23 
12:07:28,2015-07-23 12:07:28,47
+1AA100016,100016,8RAM size,3,Chinese,xiangtan,1873,2015-07-24 
12:07:28,2015-07-24 12:07:28,48
+1AA100017,100017,5RAM size,3,Chinese,guangzhou,2205,2015-07-25 
12:07:28,2015-07-25 12:07:28,49
+1AA100018,100018,4RAM size,4,Chinese,yichang,441,2015-07-26 
12:07:28,2015-07-26 12:07:28,50
+1AA100019,100019,0RAM size,3,Chinese,changsha,2194,2015-07-27 
12:07:28,2015-07-27 12:07:28,51
+1AA10002,10002,8RAM size,5,Chinese,yichang,2972,2015-07-28 12:07:28,2015-07-28 
12:07:28,52
+1AA100020,100020,1RAM size,7,Chinese,shenzhen,256,2015-07-29 
12:07:28,2015-07-29 12:07:28,53
+1AA100021,100021,0RAM size,6,Chinese,changsha,1778,2015-07-30 
12:07:28,2015-07-30 12:07:28,54
+1AA100022,100022,3RAM size,5,Chinese,guangzhou,1999,2015-07-31 
12:07:28,2015-07-31 12:07:28,55
+1AA100023,100023,8RAM size,3,Chinese,yichang,2194,2015-08-01 
12:07:28,2015-08-01 12:07:28,56
+1AA100024,100024,0RAM size,2,Chinese,wuhan,2483,2015-08-02 12:07:28,2015-08-02 
12:07:28,57
+1AA100025,100025,7RAM size,1,Chinese,wuhan,1724,2015-08-03 12:07:28,2015-08-03 
12:07:28,58
+1AA100026,100026,6RAM size,7,Chinese,guangzhou,1768,2015-08-04 
12:07:28,2015-08-04 12:07:28,59
+1AA100027,100027,3RAM size,4,Chinese,zhuzhou,2436,2015-08-05 
12:07:28,2015-08-05 12:07:28,60
+1AA100028,100028,0RAM size,5,Chinese,shenzhen,2849,2015-08-06 
12:07:28,2015-08-06 12:07:28,61
+1AA100029,100029,4RAM size,5,Chinese,changsha,1691,2015-08-07 
12:07:28,2015-08-07 12:07:28,62
+1AA10003,10003,9RAM size,4,Chinese,xiangtan,2071,2015-08-08 
12:07:28,2015-08-08 12:07:28,63
+1AA100030,100030,1RAM size,3,Chinese,guangzhou,1333,2015-08-09 
12:07:28,2015-08-09 12:07:28,64
+1AA100031,100031,3RAM size,1,Chinese,xiangtan,1080,2015-08-10 
12:07:28,2015-08-10 12:07:28,65
+1AA100032,100032,3RAM size,7,Chinese,shenzhen,1053,2015-08-11 
12:07:28,2015-08-11 12:07:28,66
+1AA100033,100033,7RAM size,6,Chinese,yichang,760,2015-08-12 
12:07:28,2015-08-12 12:07:28,67
+1AA100034,100034,6RAM size,6,Chinese,changsha,2061,2015-08-13 
12:07:28,2015-08-13 12:07:28,68
+1AA100035,100035,6RAM size,7,Chinese,shenzhen,2142,2015-08-14 
12:07:28,2015-08-14 12:07:28,69
+1AA100036,100036,9RAM size,5,Chinese,changsha,2224,2015-08-15 
12:07:28,2015-08-15 12:07:28,70
+1AA100037,100037,9RAM size,6,Chinese,changsha,1015,2015-08-16 
12:07:28,2015-08-16 12:07:28,71
+1AA100038,100038,8RAM size,3,Chinese,xiangtan,1229,2015-08-17 
12:07:28,2015-08-17 12:07:28,72
+1AA100039,100039,7RAM size,1,Chinese,wuhan,1750,2015-08-18 12:07:28,2015-08-18 
12:07:28,73
+1AA10004,10004,4RAM size,4,Chinese,shenzhen,1717,2015-08-19 
12:07:28,2015-08-19 12:07:28,74
+1AA100040,100040,1RAM size,7,Chinese,yichang,2078,2015-08-20 
12:07:28,2015-08-20 12:07:28,75
+1AA100041,100041,1RAM size,1,Chinese,xiangtan,2734,2015-08-21 
12:07:28,2015-08-21 12:07:28,76
+1AA100042,100042,1RAM size,5,Chinese,zhuzhou,2745,2015-08-22 
12:07:28,2015-08-22 12:07:28,77
+1AA100043,100043,9RAM size,6,Chinese,yichang,571,2015-08-23 
12:07:28,2015-08-23 12:07:28,78
+1AA100044,100044,9RAM size,2,Chinese,shenzhen,1697,2015-08-24 
12:07:28,2015-08-24 12:07:28,79
+1AA100045,100045,4RAM size,6,Chinese,wuhan,2553,2015-08-25 12:07:28,2015-08-25 
12:07:28,80
+1AA100046,100046,4RAM size,4,Chinese,wuhan,1077,2015-08-26 12:07:28,2015-08-26 
12:07:28,81
+1AA100047,100047,6RAM size,1,Chinese,wuhan,1823,2015-08-27 12:07:28,2015-08-27 
12:07:28,82
+1AA100048,100048,1RAM size,1,Chinese,xiangtan,2399,2015-08-28 
12:07:28,2015-08-28 12:07:28,83
+1AA100049,100049,4RAM size,6,Chinese,xiangtan,2890,2015-08-29 
12:07:28,2015-08-29 12:07:28,84
+1AA10005,10005,3RAM size,2,Chinese,zhuzhou,1608,2015-08-30 12:07:28,2015-08-30 
12:07:28,85
+1AA100050,100050,4RAM size,1,Chinese,yichang,29,2015-08-31 12:07:28,2015-08-31 
12:07:28,86
+1AA100051,100051,3RAM size,7,Chinese,xiangtan,1407,2015-09-01 
12:07:28,2015-09-01 12:07:28,87
+1AA100052,100052,8RAM size,7,Chinese,zhuzhou,845,2015-09-02 
12:07:28,2015-09-02 12:07:28,88
+1AA100053,100053,3RAM size,3,Chinese,zhuzhou,1655,2015-09-03 
12:07:28,2015-09-03 12:07:28,89
+1AA100054,100054,9RAM size,2,Chinese,shenzhen,1368,2015-09-04 
12:07:28,2015-09-04 12:07:28,90
+1AA100055,100055,4RAM size,7,Chinese,guangzhou,1728,2015-09-05 
12:07:28,2015-09-05 12:07:28,91
+1AA100056,100056,0RAM size,5,Chinese,wuhan,750,2015-09-06 12:07:28,2015-09-06 
12:07:28,92
+1AA100057,100057,4RAM size,6,Chinese,changsha,2288,2015-09-07 
12:07:28,2015-09-07 12:07:28,93
+1AA100058,100058,3RAM size,4,Chinese,wuhan,2635,2015-09-08 12:07:28,2015-09-08 
12:07:28,94
+1AA100059,100059,3RAM size,7,Chinese,xiangtan,1337,2015-09-09 
12:07:28,2015-09-09 12:07:28,95
+1AA10006,10006,5RAM size,5,Chinese,wuhan,2478,2015-09-10 12:07:28,2015-09-10 
12:07:28,96
+1AA100060,100060,4RAM size,4,Chinese,shenzhen,538,2015-09-11 
12:07:28,2015-09-11 12:07:28,97
+1AA100061,100061,0RAM size,6,Chinese,wuhan,1407,2015-09-12 12:07:28,2015-09-12 
12:07:28,98
+1AA100062,100062,6RAM size,6,Chinese,zhuzhou,2952,2015-09-13 
12:07:28,2015-09-13 12:07:28,99
+1AA100063,100063,8RAM size,3,Chinese,zhuzhou,1226,2015-09-14 
12:07:28,2015-09-14 12:07:28,100
+1AA100064,100064,4RAM size,7,Chinese,changsha,865,2015-09-15 
12:07:28,2015-09-15 12:07:28,101
+1AA100065,100065,4RAM size,7,Chinese,changsha,901,2015-09-16 
12:07:28,2015-09-16 12:07:28,102
+1AA100066,100066,1RAM size,4,Chinese,yichang,1864,2015-09-17 
12:07:28,2015-09-17 12:07:28,103
+1AA100067,100067,6RAM size,7,Chinese,changsha,572,2015-09-18 
12:07:28,2015-09-18 12:07:28,104
+1AA100068,100068,4RAM size,7,Chinese,xiangtan,412,2015-09-19 
12:07:28,2015-09-19 12:07:28,105
+1AA100069,100069,3RAM size,5,Chinese,yichang,1491,2015-09-20 
12:07:28,2015-09-20 12:07:28,106
+1AA10007,10007,2RAM size,3,Chinese,xiangtan,1350,2015-09-21 
12:07:28,2015-09-21 12:07:28,107
+1AA100070,100070,9RAM size,3,Chinese,wuhan,1567,2015-09-22 12:07:28,2015-09-22 
12:07:28,108
+1AA100071,100071,2RAM size,5,Chinese,changsha,1973,2015-09-23 
12:07:28,2015-09-23 12:07:28,109
+1AA100072,100072,9RAM size,7,Chinese,xiangtan,448,2015-09-24 
12:07:28,2015-09-24 12:07:28,110
+1AA100073,100073,3RAM size,2,Chinese,shenzhen,2488,2015-09-25 
12:07:28,2015-09-25 12:07:28,111
+1AA100074,100074,3RAM size,7,Chinese,shenzhen,907,2015-09-26 
12:07:28,2015-09-26 12:07:28,112
+1AA100075,100075,5RAM size,6,Chinese,guangzhou,2507,2015-09-27 
12:07:28,2015-09-27 12:07:28,113
+1AA100076,100076,4RAM size,7,Chinese,yichang,732,2015-09-28 
12:07:28,2015-09-28 12:07:28,114
+1AA100077,100077,5RAM size,6,Chinese,changsha,2077,2015-09-29 
12:07:28,2015-09-29 12:07:28,115
+1AA100078,100078,6RAM size,5,Chinese,wuhan,1434,2015-09-30 12:07:28,2015-09-30 
12:07:28,116
+1AA100079,100079,0RAM size,6,Chinese,changsha,1098,2015-10-01 
12:07:28,2015-10-01 12:07:28,117
+1AA10008,10008,4RAM size,4,Chinese,changsha,813,2015-10-02 12:07:28,2015-10-02 
12:07:28,118
+1AA100080,100080,9RAM size,6,Chinese,xiangtan,954,2015-10-03 
12:07:28,2015-10-03 12:07:28,119
+1AA100081,100081,8RAM size,2,Chinese,zhuzhou,613,2015-10-04 
12:07:28,2015-10-04 12:07:28,120
+1AA100082,100082,9RAM size,7,Chinese,xiangtan,2348,2015-10-05 
12:07:28,2015-10-05 12:07:28,121
+1AA100083,100083,3RAM size,2,Chinese,shenzhen,2192,2015-10-06 
12:07:28,2015-10-06 12:07:28,122
+1AA100084,100084,4RAM size,4,Chinese,xiangtan,2826,2015-10-07 
12:07:28,2015-10-07 12:07:28,123

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e4843d76/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
 
b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
index 937f0d9..1e58a9e 100644
--- 
a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
+++ 
b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
@@ -1312,6 +1312,24 @@ class SparkCarbonDataSourceTest extends FunSuite with 
BeforeAndAfterAll {
     spark.sql(s"drop table if exists t_carbn01b")
   }
 
+  test("test fileformat flow with drop and query on same table") {
+    spark.sql("drop table if exists fileformat_drop")
+    spark.sql("drop table if exists fileformat_drop_hive")
+    spark.sql("create table fileformat_drop (imei string,AMSize 
string,channelsId string,ActiveCountry string, Activecity string,gamePointId 
double,deviceInformationId double,productionDate Timestamp,deliveryDate 
timestamp,deliverycharge double) using carbon 
options('table_blocksize'='1','LOCAL_DICTIONARY_ENABLE'='TRUE','LOCAL_DICTIONARY_THRESHOLD'='1000')")
+    spark.sql("create table fileformat_drop_hive(imei 
string,deviceInformationId double,AMSize string,channelsId string,ActiveCountry 
string,Activecity string,gamePointId double,productionDate 
Timestamp,deliveryDate timestamp,deliverycharge double)row format delimited 
FIELDS terminated by ',' LINES terminated by '\n' stored as textfile")
+    val sourceFile = 
FileFactory.getPath(s"$resource/vardhandaterestruct.csv").toString
+    spark.sql(s"load data local inpath '$sourceFile' into table 
fileformat_drop_hive")
+    spark.sql("insert into fileformat_drop select imei ,deviceInformationId 
,AMSize ,channelsId ,ActiveCountry ,Activecity ,gamePointId ,productionDate 
,deliveryDate ,deliverycharge from fileformat_drop_hive")
+    assert(spark.sql("select count(*) from fileformat_drop where 
imei='1AA10000'").collect().length == 1)
+
+    spark.sql("drop table if exists fileformat_drop")
+    spark.sql("create table fileformat_drop (imei string,deviceInformationId 
double,AMSize string,channelsId string,ActiveCountry string,Activecity 
string,gamePointId float,productionDate timestamp,deliveryDate 
timestamp,deliverycharge decimal(10,2)) using carbon 
options('table_blocksize'='1','LOCAL_DICTIONARY_ENABLE'='true','local_dictionary_threshold'='1000')")
+    spark.sql("insert into fileformat_drop select imei ,deviceInformationId 
,AMSize ,channelsId ,ActiveCountry ,Activecity ,gamePointId ,productionDate 
,deliveryDate ,deliverycharge from fileformat_drop_hive")
+    assert(spark.sql("select count(*) from fileformat_drop where 
imei='1AA10000'").collect().length == 1)
+    spark.sql("drop table if exists fileformat_drop")
+    spark.sql("drop table if exists fileformat_drop_hive")
+  }
+
   override protected def beforeAll(): Unit = {
     drop
     createParquetTable

Reply via email to