parthchandra commented on code in PR #1034:
URL: https://github.com/apache/datafusion-comet/pull/1034#discussion_r1813538729


##########
common/src/main/scala/org/apache/comet/CometConf.scala:
##########
@@ -196,6 +196,12 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(false)
 
+  val COMET_EXEC_NATIVE_COLUMNAR_TO_ROW_ENABLED: ConfigEntry[Boolean] =
+    conf("spark.comet.exec.nativeColumnarToRow.enabled")
+      .doc("Experimental support for native columnar to row for fixed width 
types")
+      .booleanConf
+      .createWithDefault(true)

Review Comment:
   Not the final version. Made it true default only to make sure all tests use 
this



##########
common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala:
##########
@@ -261,4 +262,32 @@ object Utils {
         throw new SparkException(s"Unsupported Arrow Vector for $reason: 
${valueVector.getClass}")
     }
   }
+
+  // Given an array of Comet vectors calculate the number of bytes required 
for a batch of
+  // UnsafeRow rows to hold the values of the vectors
+  def getUnsafeRowBatchSize(vectors: Array[CometVector]): Long = {
+    val bitSetWidth = UnsafeRow.calculateBitSetWidthInBytes(vectors.length)
+    val num_rows = vectors.apply(0).getValueVector.getValueCount
+    val dataBytes: Long = vectors
+      .map(v => {
+        val dt = fromArrowField(v.getValueVector.getField)
+        assert(
+          UnsafeRow.isMutable(dt) || dt.isInstanceOf[BinaryType] || 
dt.isInstanceOf[StringType])
+        // For variable length types, assuming that the vector has not been 
read from, the
+        // readable bytes are the number of bytes of data in the vector.
+        val fixedBytes = num_rows * 8L // offset (4 bytes) and length (4 bytes)
+        val varBytes = dt match {
+          case datatype if UnsafeRow.isFixedLength(datatype) => 0L
+          case DecimalType.Fixed(_, _) => num_rows * 16L
+          case BinaryType | StringType =>

Review Comment:
   This case is currently unused since only the fixed length types are 
implemented. This will probably change completely.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to