This is an automated email from the ASF dual-hosted git repository.

yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f97b26b6b9 Skip per-row convertTypes() in FunctionOperand when types 
already match (#17730)
8f97b26b6b9 is described below

commit 8f97b26b6b98fd62382b765be2146aae14858273
Author: Yash Mayya <[email protected]>
AuthorDate: Fri Mar 6 14:30:49 2026 -0800

    Skip per-row convertTypes() in FunctionOperand when types already match 
(#17730)
---
 .../pinot/perf/BenchmarkFunctionOperand.java       | 183 +++++++++++++++++++++
 .../runtime/operator/operands/FunctionOperand.java |  20 ++-
 2 files changed, 201 insertions(+), 2 deletions(-)

diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkFunctionOperand.java 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkFunctionOperand.java
new file mode 100644
index 00000000000..dade48b1a6f
--- /dev/null
+++ 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkFunctionOperand.java
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.pinot.common.function.FunctionInfo;
+import org.apache.pinot.common.function.FunctionRegistry;
+import org.apache.pinot.common.function.QueryFunctionInvoker;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.query.planner.logical.RexExpression;
+import org.apache.pinot.query.runtime.operator.operands.FunctionOperand;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+
+/**
+ * Benchmark for {@link FunctionOperand#apply} to measure the impact of 
skipping
+ * {@code convertTypes()} when operand types already match function parameter 
types.
+ *
+ * <p>The benchmark has two parts:
+ * <ul>
+ *   <li><b>End-to-end apply()</b>: Compares FunctionOperand.apply() when 
types match (convertTypes
+ *       skipped) vs when types mismatch (convertTypes called per row).</li>
+ *   <li><b>Isolated convertTypes()</b>: Directly measures the per-row cost of
+ *       convertTypes() on type-matching arguments — the exact overhead 
eliminated by the
+ *       optimization — against a no-op baseline.</li>
+ * </ul>
+ */
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(1)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 1)
+@State(Scope.Benchmark)
+public class BenchmarkFunctionOperand {
+
+  private static final int NUM_ROWS = 1000;
+
+  // End-to-end state
+  private FunctionOperand _matchingTypesOperand;
+  private FunctionOperand _mismatchedTypesOperand;
+  private List<List<Object>> _longRows;
+  private List<List<Object>> _intRows;
+
+  // Isolated convertTypes state
+  private QueryFunctionInvoker _invoker;
+  private Object[][] _matchingArgs;
+
+  @Setup(Level.Trial)
+  public void setUp() {
+    FunctionRegistry.init();
+
+    // --- End-to-end setup ---
+
+    // Scenario 1: plus(LONG, LONG) with LONG columns → types match, no 
conversion needed
+    DataSchema longSchema = new DataSchema(
+        new String[]{"col0", "col1"},
+        new ColumnDataType[]{ColumnDataType.LONG, ColumnDataType.LONG}
+    );
+    RexExpression.FunctionCall plusLong = new RexExpression.FunctionCall(
+        ColumnDataType.LONG, "plus",
+        Arrays.asList(new RexExpression.InputRef(0), new 
RexExpression.InputRef(1))
+    );
+    _matchingTypesOperand = new FunctionOperand(plusLong, longSchema);
+
+    // Scenario 2: plus(INT, INT) → resolves to longPlus(long, long), but 
argument types are INT
+    //             → types mismatch, convertTypes() called per row
+    DataSchema intSchema = new DataSchema(
+        new String[]{"col0", "col1"},
+        new ColumnDataType[]{ColumnDataType.INT, ColumnDataType.INT}
+    );
+    RexExpression.FunctionCall plusInt = new RexExpression.FunctionCall(
+        ColumnDataType.LONG, "plus",
+        Arrays.asList(new RexExpression.InputRef(0), new 
RexExpression.InputRef(1))
+    );
+    _mismatchedTypesOperand = new FunctionOperand(plusInt, intSchema);
+
+    _longRows = new ArrayList<>(NUM_ROWS);
+    _intRows = new ArrayList<>(NUM_ROWS);
+    for (int i = 0; i < NUM_ROWS; i++) {
+      _longRows.add(Arrays.asList((long) i, (long) (i + 1)));
+      _intRows.add(Arrays.asList(i, i + 1));
+    }
+
+    // --- Isolated convertTypes setup ---
+
+    // Get the longPlus(long, long) function invoker
+    ColumnDataType[] longArgTypes = {ColumnDataType.LONG, ColumnDataType.LONG};
+    FunctionInfo functionInfo = FunctionRegistry.lookupFunctionInfo(
+        FunctionRegistry.canonicalize("plus"), longArgTypes);
+    _invoker = new QueryFunctionInvoker(functionInfo);
+
+    // Pre-allocate argument arrays with Long values (matching type for long 
parameters).
+    // convertTypes() will still check isAssignableFrom + HashMap lookup per 
argument per row.
+    _matchingArgs = new Object[NUM_ROWS][];
+    for (int i = 0; i < NUM_ROWS; i++) {
+      _matchingArgs[i] = new Object[]{(long) i, (long) (i + 1)};
+    }
+  }
+
+  // --- End-to-end benchmarks ---
+
+  @Benchmark
+  public void applyTypesMatch(Blackhole bh) {
+    for (List<Object> row : _longRows) {
+      bh.consume(_matchingTypesOperand.apply(row));
+    }
+  }
+
+  @Benchmark
+  public void applyTypesNeedConversion(Blackhole bh) {
+    for (List<Object> row : _intRows) {
+      bh.consume(_mismatchedTypesOperand.apply(row));
+    }
+  }
+
+  // --- Isolated convertTypes benchmarks ---
+  // These directly measure the per-block overhead of convertTypes() on 
already-matching types,
+  // which is the exact cost eliminated by the _needsConversion optimization.
+
+  /**
+   * Baseline: invoke the function on each row without calling convertTypes().
+   * This represents the optimized path (types match, convertTypes skipped).
+   */
+  @Benchmark
+  public void invokeWithoutConvertTypes(Blackhole bh) {
+    for (Object[] args : _matchingArgs) {
+      bh.consume(_invoker.invoke(args));
+    }
+  }
+
+  /**
+   * Old path: call convertTypes() then invoke the function on each row.
+   * This represents the pre-optimization path (convertTypes always called).
+   */
+  @Benchmark
+  public void invokeWithConvertTypes(Blackhole bh) {
+    for (Object[] args : _matchingArgs) {
+      _invoker.convertTypes(args);
+      bh.consume(_invoker.invoke(args));
+    }
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+    ChainedOptionsBuilder opt = new OptionsBuilder()
+        .include(BenchmarkFunctionOperand.class.getSimpleName());
+    new Runner(opt.build()).run();
+  }
+}
diff --git 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/operands/FunctionOperand.java
 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/operands/FunctionOperand.java
index 5e58c0a3a6c..259ad4ef0f3 100644
--- 
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/operands/FunctionOperand.java
+++ 
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/operands/FunctionOperand.java
@@ -41,6 +41,7 @@ public class FunctionOperand implements TransformOperand {
   private final ColumnDataType _resultType;
   private final QueryFunctionInvoker _functionInvoker;
   private final ColumnDataType _functionInvokerResultType;
+  private final boolean _needsConversion;
   private final List<TransformOperand> _operands;
   private final Object[] _reusableOperandHolder;
 
@@ -78,10 +79,24 @@ public class FunctionOperand implements TransformOperand {
     if (!_functionInvoker.getMethod().isVarArgs()) {
       Class<?>[] parameterClasses = _functionInvoker.getParameterClasses();
       PinotDataType[] parameterTypes = _functionInvoker.getParameterTypes();
+      boolean needsConversion = false;
       for (int i = 0; i < numOperands; i++) {
         Preconditions.checkState(parameterTypes[i] != null, "Unsupported 
parameter class: %s for method: %s",
             parameterClasses[i], functionInfo.getMethod());
+        if (!needsConversion) {
+          // For array-typed parameters, always require conversion: the 
runtime Java class may
+          // differ from the canonical stored type (e.g. Double[] vs double[] 
after DataBlock
+          // deserialization in the multi-stage engine), and Method.invoke 
does not autobox arrays.
+          ColumnDataType parameterColumnType = 
FunctionUtils.getColumnDataType(parameterClasses[i]);
+          if (parameterColumnType == null || argumentTypes[i] != 
parameterColumnType
+              || parameterClasses[i].isArray()) {
+            needsConversion = true;
+          }
+        }
       }
+      _needsConversion = needsConversion;
+    } else {
+      _needsConversion = false;
     }
     ColumnDataType functionInvokerResultType = 
FunctionUtils.getColumnDataType(_functionInvoker.getResultClass());
     // Handle unrecognized result class with STRING
@@ -106,12 +121,13 @@ public class FunctionOperand implements TransformOperand {
       Object value = operand.apply(row);
       _reusableOperandHolder[i] = value != null ? 
operand.getResultType().toExternal(value) : null;
     }
-    // TODO: Optimize per record conversion
     Object result;
     if (_functionInvoker.getMethod().isVarArgs()) {
       result = _functionInvoker.invoke(new Object[]{_reusableOperandHolder});
     } else {
-      _functionInvoker.convertTypes(_reusableOperandHolder);
+      if (_needsConversion) {
+        _functionInvoker.convertTypes(_reusableOperandHolder);
+      }
       result = _functionInvoker.invoke(_reusableOperandHolder);
     }
     return result != null ? 
TypeUtils.convert(_functionInvokerResultType.toInternal(result),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to