Jackie-Jiang commented on code in PR #11453:
URL: https://github.com/apache/pinot/pull/11453#discussion_r1309335661
##########
pinot-common/src/main/java/org/apache/pinot/common/datablock/DataBlockUtils.java:
##########
@@ -275,43 +264,91 @@ private static Object[] extractRowFromDataBlock(DataBlock
dataBlock, int rowId,
* TODO: Add support for COLUMNAR format.
* @return int array of values in the column
*/
- public static int[] extractIntValuesForColumn(DataBlock dataBlock, int
columnIndex) {
+ public static int[] extractIntValuesForColumn(DataBlock dataBlock, int
colId) {
DataSchema dataSchema = dataBlock.getDataSchema();
- DataSchema.ColumnDataType[] columnDataTypes =
dataSchema.getColumnDataTypes();
-
- // Get null bitmap for the column.
- RoaringBitmap nullBitmap = extractNullBitmaps(dataBlock)[columnIndex];
+ ColumnDataType storedType =
dataSchema.getColumnDataType(colId).getStoredType();
+ RoaringBitmap nullBitmap = dataBlock.getNullRowIds(colId);
int numRows = dataBlock.getNumberOfRows();
-
- int[] rows = new int[numRows];
- for (int rowId = 0; rowId < numRows; rowId++) {
- if (nullBitmap != null && nullBitmap.contains(rowId)) {
- continue;
+ int[] values = new int[numRows];
+ if (nullBitmap == null) {
+ switch (storedType) {
+ case INT:
+ for (int rowId = 0; rowId < numRows; rowId++) {
+ values[rowId] = dataBlock.getInt(rowId, colId);
+ }
+ break;
+ case LONG:
+ for (int rowId = 0; rowId < numRows; rowId++) {
+ values[rowId] = (int) dataBlock.getLong(rowId, colId);
+ }
+ break;
+ case FLOAT:
+ for (int rowId = 0; rowId < numRows; rowId++) {
+ values[rowId] = (int) dataBlock.getFloat(rowId, colId);
+ }
+ break;
+ case DOUBLE:
+ for (int rowId = 0; rowId < numRows; rowId++) {
+ values[rowId] = (int) dataBlock.getDouble(rowId, colId);
+ }
+ break;
+ case BIG_DECIMAL:
+ for (int rowId = 0; rowId < numRows; rowId++) {
+ values[rowId] = dataBlock.getBigDecimal(rowId, colId).intValue();
+ }
+ break;
+ default:
+ throw new IllegalStateException(String.format("Cannot extract int
values for column: %s with stored type: %s",
+ dataSchema.getColumnName(colId), storedType));
}
-
- switch (columnDataTypes[columnIndex]) {
+ } else {
+ switch (storedType) {
case INT:
- case BOOLEAN:
- rows[rowId] = dataBlock.getInt(rowId, columnIndex);
+ for (int rowId = 0; rowId < numRows; rowId++) {
+ if (nullBitmap.contains(rowId)) {
+ continue;
+ }
+ values[rowId] = dataBlock.getInt(rowId, colId);
Review Comment:
I actually considered that, then decided not to do that for now because it
might introduce much higher overhead for larger fields (e.g. STRING and BYTES).
The changes made in this PR (most of boilerplate code is auto-generated by AI
:-P) is guaranteed to be better than the old code by using tighter loop and
reduce branching. We may open a separate thread for further performance
improvement discussion
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]