Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2490#discussion_r202230960
--- Diff:
integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala
---
@@ -154,89 +160,112 @@ class OriginalReadSupport(dataTypes:
Array[DataType]) extends CarbonReadSupport[
*/
class RawBytesReadSupport(segmentProperties: SegmentProperties,
indexColumns: Array[CarbonColumn])
extends CarbonReadSupport[Array[Object]] {
- var columnarSplitter: ColumnarSplitter = _
+ var dimensionKeyGenerator: KeyGenerator = _
+ // for the dictionary dimensions
+ var indexCol2IdxInDictArray: Map[String, Int] = Map()
// for the non dictionary dimensions
var indexCol2IdxInNoDictArray: Map[String, Int] = Map()
// for the measures
var indexCol2IdxInMeasureArray: Map[String, Int] = Map()
- // for the dictionary/date dimensions
- var dictIndexCol2MdkIndex: Map[String, Int] = Map()
- var mdkIndex2DictIndexCol: Map[Int, String] = Map()
- var existDim = false
+
+ /**
+ * use same way as restructure based result collector to prepare
+ * key generator to get surrogate value of dict column result.
+ * Refer to `RestructureBasedRowIdRawResultCollector`
+ */
--- End diff --
please add description to explain why this modification is needed compared
with previous version. what's the scenario or intention?
---