siddharthteotia commented on code in PR #9427:
URL: https://github.com/apache/pinot/pull/9427#discussion_r974757198


##########
pinot-core/src/main/java/org/apache/pinot/core/common/datablock/DataBlockBuilder.java:
##########
@@ -173,81 +183,133 @@ public static RowDataBlock buildFromRows(List<Object[]> 
rows, @Nullable RoaringB
             break;
           default:
             throw new IllegalStateException(
-                String.format("Unsupported data type: %s for column: %s", 
rowBuilder._columnDataType[i],
-                    rowBuilder._dataSchema.getColumnName(i)));
+                String.format("Unsupported data type: %s for column: %s", 
rowBuilder._columnDataTypes[colId],
+                    rowBuilder._dataSchema.getColumnName(colId)));
         }
       }
       rowBuilder._fixedSizeDataByteArrayOutputStream.write(byteBuffer.array(), 
0, byteBuffer.position());
     }
     // Write null bitmaps after writing data.
-    if (colNullBitmaps != null) {
-      for (RoaringBitmap nullBitmap : colNullBitmaps) {
-        rowBuilder.setNullRowIds(nullBitmap);
-      }
+    for (RoaringBitmap nullBitmap : nullBitmaps) {
+      rowBuilder.setNullRowIds(nullBitmap);
     }
     return buildRowBlock(rowBuilder);
   }
 
-  public static ColumnarDataBlock buildFromColumns(List<Object[]> columns, 
@Nullable RoaringBitmap[] colNullBitmaps,
-      DataSchema dataSchema)
+  public static ColumnarDataBlock buildFromColumns(List<Object[]> columns, 
DataSchema dataSchema)
       throws IOException {
     DataBlockBuilder columnarBuilder = new DataBlockBuilder(dataSchema, 
BaseDataBlock.Type.COLUMNAR);
-    for (int i = 0; i < columns.size(); i++) {
-      Object[] column = columns.get(i);
+    RoaringBitmap[] nullBitmaps = new 
RoaringBitmap[columnarBuilder._numColumns];
+    for (int i = 0; i < columnarBuilder._numColumns; i++) {
+      nullBitmaps[i] = new RoaringBitmap();
+    }
+    for (int colId = 0; colId < columns.size(); colId++) {
+      Object[] column = columns.get(colId);
       columnarBuilder._numRows = column.length;
-      ByteBuffer byteBuffer = ByteBuffer.allocate(columnarBuilder._numRows * 
columnarBuilder._columnSizeInBytes[i]);
-      switch (columnarBuilder._columnDataType[i]) {
+      ByteBuffer byteBuffer = ByteBuffer.allocate(columnarBuilder._numRows * 
columnarBuilder._columnSizeInBytes[colId]);
+      Object value;
+      switch (columnarBuilder._columnDataTypes[colId]) {
         // Single-value column
         case INT:
-          for (Object value : column) {
+          for (int rowId = 0; rowId < columnarBuilder._numRows; rowId++) {
+            value = column[rowId];
+            if (value == null) {
+              nullBitmaps[colId].add(rowId);
+              value = 
NullValueUtils.getDefaultNullValue(columnarBuilder._columnDataTypes[colId]);
+            }
             byteBuffer.putInt(((Number) value).intValue());
           }
           break;
         case LONG:
-          for (Object value : column) {
+          for (int rowId = 0; rowId < columnarBuilder._numRows; rowId++) {
+            value = column[rowId];
+            if (value == null) {
+              nullBitmaps[colId].add(rowId);
+              value = 
NullValueUtils.getDefaultNullValue(columnarBuilder._columnDataTypes[colId]);
+            }
             byteBuffer.putLong(((Number) value).longValue());
           }
           break;
         case FLOAT:
-          for (Object value : column) {
+          for (int rowId = 0; rowId < columnarBuilder._numRows; rowId++) {
+            value = column[rowId];
+            if (value == null) {
+              nullBitmaps[colId].add(rowId);
+              value = 
NullValueUtils.getDefaultNullValue(columnarBuilder._columnDataTypes[colId]);

Review Comment:
   Ideally if the null bitmap is working and bit 1 / 0 indicates whether values 
is non-NULL or NULL, then we should not be encoding the default null value in 
the data block. 
   
   The point of using null bitmaps to denote NULLs is to not necessarily have 
to encode placeholder values as long as the reader does `isNull(int index)` 
check for every row read in the column. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to