Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2482#discussion_r237740134
--- Diff:
integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala
---
@@ -215,43 +249,215 @@ class CarbonIndexFileMergeTestCase
Assert
.assertEquals(getIndexOrMergeIndexFileSize(table, "0",
CarbonTablePath.INDEX_FILE_EXT),
segment0.head.getIndexSize.toLong)
- new CarbonIndexFileMergeWriter(table)
- .mergeCarbonIndexFilesOfSegment("0", table.getTablePath, false,
String.valueOf(System.currentTimeMillis()))
+ sql("Alter table fileSize compact 'segment_index'")
loadMetadataDetails = SegmentStatusManager
.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(table.getTablePath))
segment0 = loadMetadataDetails.filter(x=>
x.getLoadName.equalsIgnoreCase("0"))
Assert
.assertEquals(getIndexOrMergeIndexFileSize(table, "0",
CarbonTablePath.MERGE_INDEX_FILE_EXT),
segment0.head.getIndexSize.toLong)
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT,
"true")
sql("DROP TABLE IF EXISTS fileSize")
}
- private def getIndexFileCount(tableName: String, segmentNo: String): Int
= {
- val carbonTable =
CarbonMetadata.getInstance().getCarbonTable(tableName)
- val segmentDir =
CarbonTablePath.getSegmentPath(carbonTable.getTablePath, segmentNo)
- if (FileFactory.isFileExist(segmentDir)) {
- val indexFiles = new
SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir)
- indexFiles.asScala.map { f =>
- if (f._2 == null) {
- 1
- } else {
- 0
- }
- }.sum
- } else {
- val segment = Segment.getSegment(segmentNo, carbonTable.getTablePath)
- if (segment != null) {
- val store = new SegmentFileStore(carbonTable.getTablePath,
segment.getSegmentFileName)
- store.getSegmentFile.getLocationMap.values().asScala.map { f =>
- if (f.getMergeFileName == null) {
- f.getFiles.size()
- } else {
- 0
- }
- }.sum
- } else {
- 0
+ test("Verify index merge for compacted segments MINOR - level 2") {
+ CarbonProperties.getInstance()
+
.addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT,
"false")
+ sql("DROP TABLE IF EXISTS nonindexmerge")
+ sql(
+ """
+ | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age
INT)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('SORT_COLUMNS'='city,name',
'SORT_SCOPE'='GLOBAL_SORT')
+ """.stripMargin)
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ val rows = sql("""Select count(*) from nonindexmerge""").collect()
+ assert(getIndexFileCount("default_nonindexmerge", "0") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "1") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "2") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "3") == 100)
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT,
"true")
+ sql("ALTER TABLE nonindexmerge COMPACT 'minor'").collect()
+ assert(getIndexFileCount("default_nonindexmerge", "0") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "1") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "2") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "3") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "0.1") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "2.1") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "0.2") == 0)
+ checkAnswer(sql("""Select count(*) from nonindexmerge"""), rows)
+ }
+
+ test("Verify index merge for compacted segments Auto Compaction") {
+ CarbonProperties.getInstance()
+
.addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,3")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT,
"false")
+ sql("DROP TABLE IF EXISTS nonindexmerge")
+ sql(
+ """
+ | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age
INT)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('SORT_COLUMNS'='city,name',
'SORT_SCOPE'='GLOBAL_SORT')
+ """.stripMargin)
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')")
+ val rows = sql("""Select count(*) from nonindexmerge""").collect()
+ assert(getIndexFileCount("default_nonindexmerge", "0") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "1") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "2") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "3") == 100)
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT,
"true")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE,
"true")
+ sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge
OPTIONS('header'='false', " +
+ s"'GLOBAL_SORT_PARTITIONS'='100')"
+ )
+ assert(getIndexFileCount("default_nonindexmerge", "0") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "1") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "2") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "3") == 100)
+ assert(getIndexFileCount("default_nonindexmerge", "4") == 0)
+ assert(getIndexFileCount("default_nonindexmerge", "0.1") == 0)
+ assert(getIndexFileCount("default_nonindexmerge", "2.1") == 0)
+ checkAnswer(sql("""Select count(*) from nonindexmerge"""),
Seq(Row(3000000)))
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE,
"false")
--- End diff --
Why the key is
CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE?DEFAULT_ENABLE_AUTO_LOAD_MERGE
should be value
---