kunal642 commented on a change in pull request #3778:
URL: https://github.com/apache/carbondata/pull/3778#discussion_r480922354
##########
File path:
integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java
##########
@@ -243,46 +258,115 @@ private void
processResult(List<CarbonIterator<RowBatch>> detailQueryResultItera
/**
* This method will prepare the data from raw object that will take part in
sorting
*/
- private Object[] prepareRowObjectForSorting(Object[] row) {
+ private Object[] prepareRowObjectForSorting(Object[] row,
+ Map<Integer, GenericQueryType> complexDimensionInfoMap, int[]
complexColumnParentBlockIndexes)
+ throws SecondaryIndexException {
ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0];
- // ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount];
+ byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray();
List<CarbonDimension> dimensions = segmentProperties.getDimensions();
Object[] preparedRow = new Object[dimensions.size() + measureCount];
+ Map<Integer, Object[]> complexDataMap = new HashMap<>();
int noDictionaryIndex = 0;
int dictionaryIndex = 0;
+ int complexIndex = 0;
int i = 0;
// loop excluding last dimension as last one is implicit column.
for (; i < dimensions.size() - 1; i++) {
CarbonDimension dims = dimensions.get(i);
- if (dims.hasEncoding(Encoding.DICTIONARY)) {
+ boolean isComplexColumn = false;
+ // check if dimension is a complex data type
+ if (!complexDimensionInfoMap.isEmpty() &&
complexColumnParentBlockIndexes.length > 0) {
+ for (GenericQueryType queryType : complexDimensionInfoMap.values()) {
+ if (queryType.getName().equalsIgnoreCase(dims.getColName())) {
+ isComplexColumn = true;
+ break;
+ }
+ }
+ }
+ if (dims.hasEncoding(Encoding.DICTIONARY) && !isComplexColumn) {
// dictionary
preparedRow[i] = wrapper.getDictionaryKeyByIndex(dictionaryIndex++);
} else {
- // no dictionary dims
- byte[] noDictionaryKeyByIndex =
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
- // no dictionary primitive columns are expected to be in original data
while loading,
- // so convert it to original data
- if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
- Object dataFromBytes =
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(
- noDictionaryKeyByIndex, dims.getDataType());
- if (null != dataFromBytes && dims.getDataType() ==
DataTypes.TIMESTAMP) {
- dataFromBytes = (long) dataFromBytes / 1000L;
+ if (isComplexColumn) {
+ byte[] complexKeyByIndex =
wrapper.getComplexKeyByIndex(complexIndex);
+ ByteBuffer byteArrayInput = ByteBuffer.wrap(complexKeyByIndex);
+ GenericQueryType genericQueryType =
+
complexDimensionInfoMap.get(complexColumnParentBlockIndexes[complexIndex++]);
+ int complexDataLength = byteArrayInput.getShort(2);
+ // In case, if array is empty
+ if (complexDataLength == 0) {
+ complexDataLength = complexDataLength + 1;
+ }
+ // get flattened array data
+ Object[] complexFlattenedData = new Object[complexDataLength];
+ Object[] data =
genericQueryType.getObjectArrayDataBasedOnDataType(byteArrayInput);
+ for (int index = 0; index < complexDataLength; index++) {
+ complexFlattenedData[index] =
+ getData(data, index, dims.getColumnSchema().getDataType());
}
- preparedRow[i] = dataFromBytes;
+ complexDataMap.put(i, complexFlattenedData);
} else {
- preparedRow[i] = noDictionaryKeyByIndex;
+ // no dictionary dims
+ byte[] noDictionaryKeyByIndex =
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
+ // no dictionary primitive columns are expected to be in original
data while loading,
+ // so convert it to original data
+ if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
+ Object dataFromBytes = DataTypeUtil
+
.getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeyByIndex,
+ dims.getDataType());
+ if (null != dataFromBytes && dims.getDataType() ==
DataTypes.TIMESTAMP) {
+ dataFromBytes = (long) dataFromBytes / 1000L;
+ }
+ preparedRow[i] = dataFromBytes;
+ } else {
+ preparedRow[i] = noDictionaryKeyByIndex;
+ }
}
}
}
// at last add implicit column position reference(PID)
+ preparedRow[i] = implicitColumnByteArray;
- preparedRow[i] = wrapper.getImplicitColumnByteArray();
+ // In case of complex array type, flatten the data and add for sorting
+ // TODO: Handle for nested array and other complex types
Review comment:
Please mention in some doc, that nexted complex is not supported
##########
File path:
integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java
##########
@@ -243,46 +258,115 @@ private void
processResult(List<CarbonIterator<RowBatch>> detailQueryResultItera
/**
* This method will prepare the data from raw object that will take part in
sorting
*/
- private Object[] prepareRowObjectForSorting(Object[] row) {
+ private Object[] prepareRowObjectForSorting(Object[] row,
+ Map<Integer, GenericQueryType> complexDimensionInfoMap, int[]
complexColumnParentBlockIndexes)
+ throws SecondaryIndexException {
ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0];
- // ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount];
+ byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray();
List<CarbonDimension> dimensions = segmentProperties.getDimensions();
Object[] preparedRow = new Object[dimensions.size() + measureCount];
+ Map<Integer, Object[]> complexDataMap = new HashMap<>();
int noDictionaryIndex = 0;
int dictionaryIndex = 0;
+ int complexIndex = 0;
int i = 0;
// loop excluding last dimension as last one is implicit column.
for (; i < dimensions.size() - 1; i++) {
CarbonDimension dims = dimensions.get(i);
- if (dims.hasEncoding(Encoding.DICTIONARY)) {
+ boolean isComplexColumn = false;
+ // check if dimension is a complex data type
+ if (!complexDimensionInfoMap.isEmpty() &&
complexColumnParentBlockIndexes.length > 0) {
+ for (GenericQueryType queryType : complexDimensionInfoMap.values()) {
+ if (queryType.getName().equalsIgnoreCase(dims.getColName())) {
+ isComplexColumn = true;
+ break;
+ }
+ }
+ }
+ if (dims.hasEncoding(Encoding.DICTIONARY) && !isComplexColumn) {
// dictionary
preparedRow[i] = wrapper.getDictionaryKeyByIndex(dictionaryIndex++);
} else {
- // no dictionary dims
- byte[] noDictionaryKeyByIndex =
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
- // no dictionary primitive columns are expected to be in original data
while loading,
- // so convert it to original data
- if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
- Object dataFromBytes =
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(
- noDictionaryKeyByIndex, dims.getDataType());
- if (null != dataFromBytes && dims.getDataType() ==
DataTypes.TIMESTAMP) {
- dataFromBytes = (long) dataFromBytes / 1000L;
+ if (isComplexColumn) {
+ byte[] complexKeyByIndex =
wrapper.getComplexKeyByIndex(complexIndex);
+ ByteBuffer byteArrayInput = ByteBuffer.wrap(complexKeyByIndex);
+ GenericQueryType genericQueryType =
+
complexDimensionInfoMap.get(complexColumnParentBlockIndexes[complexIndex++]);
+ int complexDataLength = byteArrayInput.getShort(2);
+ // In case, if array is empty
+ if (complexDataLength == 0) {
+ complexDataLength = complexDataLength + 1;
+ }
+ // get flattened array data
+ Object[] complexFlattenedData = new Object[complexDataLength];
+ Object[] data =
genericQueryType.getObjectArrayDataBasedOnDataType(byteArrayInput);
+ for (int index = 0; index < complexDataLength; index++) {
+ complexFlattenedData[index] =
+ getData(data, index, dims.getColumnSchema().getDataType());
}
- preparedRow[i] = dataFromBytes;
+ complexDataMap.put(i, complexFlattenedData);
} else {
- preparedRow[i] = noDictionaryKeyByIndex;
+ // no dictionary dims
+ byte[] noDictionaryKeyByIndex =
wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++);
+ // no dictionary primitive columns are expected to be in original
data while loading,
+ // so convert it to original data
+ if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) {
+ Object dataFromBytes = DataTypeUtil
+
.getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeyByIndex,
+ dims.getDataType());
+ if (null != dataFromBytes && dims.getDataType() ==
DataTypes.TIMESTAMP) {
+ dataFromBytes = (long) dataFromBytes / 1000L;
+ }
+ preparedRow[i] = dataFromBytes;
+ } else {
+ preparedRow[i] = noDictionaryKeyByIndex;
+ }
}
}
}
// at last add implicit column position reference(PID)
+ preparedRow[i] = implicitColumnByteArray;
- preparedRow[i] = wrapper.getImplicitColumnByteArray();
+ // In case of complex array type, flatten the data and add for sorting
+ // TODO: Handle for nested array and other complex types
Review comment:
Please mention in some doc, that nested complex is not supported
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]