Github user qiuchenjian commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/3047#discussion_r244895354
--- Diff:
integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
---
@@ -101,14 +102,21 @@ object CarbonStore {
val (dataSize, indexSize) = if (load.getFileFormat ==
FileFormat.ROW_V1) {
// for streaming segment, we should get the actual size from
the index file
// since it is continuously inserting data
- val segmentDir = CarbonTablePath.getSegmentPath(tablePath,
load.getLoadName)
+ val segmentDir = CarbonTablePath
+ .getSegmentPath(carbonTable.getTablePath, load.getLoadName)
val indexPath =
CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir)
val indices = StreamSegment.readIndexFile(indexPath,
FileFactory.getFileType(indexPath))
(indices.asScala.map(_.getFile_size).sum,
FileFactory.getCarbonFile(indexPath).getSize)
} else {
// for batch segment, we can get the data size from table
status file directly
- (if (load.getDataSize == null) 0L else load.getDataSize.toLong,
- if (load.getIndexSize == null) 0L else
load.getIndexSize.toLong)
+ if (null == load.getDataSize && null == load.getIndexSize) {
+ val dataIndexSize =
CarbonUtil.calculateDataIndexSize(carbonTable, false)
+
(dataIndexSize.get(CarbonCommonConstants.CARBON_TOTAL_DATA_SIZE).toLong,
+
dataIndexSize.get(CarbonCommonConstants.CARBON_TOTAL_INDEX_SIZE).toLong)
+ } else {
+ (load.getDataSize.toLong,
--- End diff --
if one of load.getDataSize and load.getIndexSize is null, it will throw
exception, i think this scene should be considered
---