Jackie-Jiang commented on code in PR #13573:
URL: https://github.com/apache/pinot/pull/13573#discussion_r1674516623


##########
pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java:
##########
@@ -95,108 +153,152 @@ public static void init() {
   }
 
   /**
-   * Registers a method with the name of the method.
+   * Registers a {@link PinotScalarFunction} under the given canonical name.
    */
-  public static void registerFunction(Method method, boolean 
nullableParameters, boolean isPlaceholder,
-      boolean isVarArg) {
-    registerFunction(method.getName(), method, nullableParameters, 
isPlaceholder, isVarArg);
+  private static void register(String canonicalName, PinotScalarFunction 
function,
+      Map<String, PinotScalarFunction> functionMap) {
+    Preconditions.checkState(functionMap.put(canonicalName, function) == null, 
"Function: %s is already registered",
+        canonicalName);
   }
 
   /**
-   * Registers a method with the given function name.
+   * Registers a {@link FunctionInfo} under the given canonical name.
    */
-  public static void registerFunction(String functionName, Method method, 
boolean nullableParameters,
-      boolean isPlaceholder, boolean isVarArg) {
-    if (!isPlaceholder) {
-      registerFunctionInfoMap(functionName, method, nullableParameters, 
isVarArg);
-    }
-    registerCalciteNamedFunctionMap(functionName, method, nullableParameters, 
isVarArg);
+  private static void register(String canonicalName, FunctionInfo 
functionInfo, int numArguments,
+      Map<String, Map<Integer, FunctionInfo>> functionInfoMap) {
+    Preconditions.checkState(
+        functionInfoMap.computeIfAbsent(canonicalName, k -> new 
HashMap<>()).put(numArguments, functionInfo) == null,
+        "Function: %s with %s arguments is already registered", canonicalName,
+        numArguments == VAR_ARG_KEY ? "variable" : numArguments);
   }
 
-  private static void registerFunctionInfoMap(String functionName, Method 
method, boolean nullableParameters,
-      boolean isVarArg) {
-    FunctionInfo functionInfo = new FunctionInfo(method, 
method.getDeclaringClass(), nullableParameters);
-    String canonicalName = canonicalize(functionName);
-    Map<Integer, FunctionInfo> functionInfoMap = 
FUNCTION_INFO_MAP.computeIfAbsent(canonicalName, k -> new HashMap<>());
-    if (isVarArg) {
-      FunctionInfo existFunctionInfo = functionInfoMap.put(VAR_ARG_KEY, 
functionInfo);
-      Preconditions.checkState(existFunctionInfo == null || 
existFunctionInfo.getMethod() == functionInfo.getMethod(),
-          "Function: %s with variable number of parameters is already 
registered", functionName);
-    } else {
-      FunctionInfo existFunctionInfo = 
functionInfoMap.put(method.getParameterCount(), functionInfo);
-      Preconditions.checkState(existFunctionInfo == null || 
existFunctionInfo.getMethod() == functionInfo.getMethod(),
-          "Function: %s with %s parameters is already registered", 
functionName, method.getParameterCount());
-    }
-  }
-
-  private static void registerCalciteNamedFunctionMap(String functionName, 
Method method, boolean nullableParameters,
-      boolean isVarArg) {
-    if (method.getAnnotation(Deprecated.class) == null) {
-      FUNCTION_MAP.put(functionName, ScalarFunctionImpl.create(method));
-    }
-  }
-
-  public static Map<String, List<Function>> getRegisteredCalciteFunctionMap() {
-    return FUNCTION_MAP.map();
+  /**
+   * Returns {@code true} if the given canonical name is registered, {@code 
false} otherwise.
+   */
+  public static boolean contains(String canonicalName) {
+    return FUNCTION_MAP.containsKey(canonicalName);
   }
 
-  public static Set<String> getRegisteredCalciteFunctionNames() {
-    return FUNCTION_MAP.map().keySet();
+  @Deprecated
+  public static boolean containsFunction(String name) {
+    return contains(canonicalize(name));
   }
 
   /**
-   * Returns {@code true} if the given function name is registered, {@code 
false} otherwise.
+   * Returns the {@link FunctionInfo} associated with the given canonical name 
and argument types, or {@code null} if
+   * there is no matching method. This method should be called after the 
FunctionRegistry is initialized and all methods
+   * are already registered.
    */
-  public static boolean containsFunction(String functionName) {
-    return FUNCTION_INFO_MAP.containsKey(canonicalize(functionName));
+  @Nullable
+  public static FunctionInfo lookupFunctionInfo(String canonicalName, 
ColumnDataType[] argumentTypes) {
+    PinotScalarFunction function = FUNCTION_MAP.get(canonicalName);
+    return function != null ? function.getFunctionInfo(argumentTypes) : null;
   }
 
   /**
-   * Returns the {@link FunctionInfo} associated with the given function name 
and number of parameters, or {@code null}
+   * Returns the {@link FunctionInfo} associated with the given canonical name 
and number of arguments, or {@code null}
    * if there is no matching method. This method should be called after the 
FunctionRegistry is initialized and all
    * methods are already registered.
+   * TODO: Move all usages to {@link #lookupFunctionInfo(String, 
ColumnDataType[])}.
    */
   @Nullable
-  public static FunctionInfo getFunctionInfo(String functionName, int 
numParameters) {
-    Map<Integer, FunctionInfo> functionInfoMap = 
FUNCTION_INFO_MAP.get(canonicalize(functionName));
-    if (functionInfoMap != null) {
-      FunctionInfo functionInfo = functionInfoMap.get(numParameters);
-      if (functionInfo != null) {
-        return functionInfo;
-      }
-      return functionInfoMap.get(VAR_ARG_KEY);
-    }
-    return null;
+  public static FunctionInfo lookupFunctionInfo(String canonicalName, int 
numArguments) {
+    PinotScalarFunction function = FUNCTION_MAP.get(canonicalName);
+    return function != null ? function.getFunctionInfo(numArguments) : null;
   }
 
-  private static String canonicalize(String functionName) {
-    return StringUtils.remove(functionName, '_').toLowerCase();
+  @Deprecated
+  @Nullable
+  public static FunctionInfo getFunctionInfo(String name, int numArguments) {
+    return lookupFunctionInfo(canonicalize(name), numArguments);
   }
 
-  /**
-   * Placeholders for scalar function, they register and represents the 
signature for transform and filter predicate
-   * so that v2 engine can understand and plan them correctly.
-   */
-  private static class PlaceholderScalarFunctions {
+  public static String canonicalize(String name) {
+    return StringUtils.remove(name, '_').toLowerCase();
+  }
+
+  public static class ArgumentCountBasedScalarFunction implements 
PinotScalarFunction {
+    private final String _name;
+    private final Map<Integer, FunctionInfo> _functionInfoMap;
+
+    private ArgumentCountBasedScalarFunction(String name, Map<Integer, 
FunctionInfo> functionInfoMap) {
+      _name = name.toUpperCase();
+      _functionInfoMap = functionInfoMap;
+    }
 
-    @ScalarFunction(names = {"textContains", "text_contains"}, isPlaceholder = 
true)
-    public static boolean textContains(String text, String pattern) {
-      throw new UnsupportedOperationException("Placeholder scalar function, 
should not reach here");
+    @Override
+    public String getName() {
+      return _name;
     }
 
-    @ScalarFunction(names = {"textMatch", "text_match"}, isPlaceholder = true)
-    public static boolean textMatch(String text, String pattern) {
-      throw new UnsupportedOperationException("Placeholder scalar function, 
should not reach here");
+    @Override
+    public PinotSqlFunction toPinotSqlFunction() {
+      return new PinotSqlFunction(_name, getReturnTypeInference(), 
getOperandTypeChecker());
     }
 
-    @ScalarFunction(names = {"jsonMatch", "json_match"}, isPlaceholder = true)
-    public static boolean jsonMatch(String text, String pattern) {
-      throw new UnsupportedOperationException("Placeholder scalar function, 
should not reach here");
+    private SqlReturnTypeInference getReturnTypeInference() {
+      return opBinding -> {
+        int numArguments = opBinding.getOperandCount();
+        FunctionInfo functionInfo = getFunctionInfo(numArguments);
+        Preconditions.checkState(functionInfo != null, "Failed to find 
function: %s with %s arguments", _name,
+            numArguments);
+        Method method = functionInfo.getMethod();
+        Class<?> returnClass = method.getReturnType();
+        RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
+        RelDataType returnType = returnClass == Object.class ? 
typeFactory.createSqlType(SqlTypeName.ANY)
+            : JavaTypeFactoryImpl.toSql(typeFactory, 
typeFactory.createJavaType(returnClass));
+
+        if (!functionInfo.hasNullableParameters()) {
+          // When any parameter is null, return is null
+          for (RelDataType type : opBinding.collectOperandTypes()) {
+            if (type.isNullable()) {
+              return typeFactory.createTypeWithNullability(returnType, true);
+            }
+          }
+        }
+
+        return method.isAnnotationPresent(Nullable.class) ? 
typeFactory.createTypeWithNullability(returnType, true)
+            : returnType;
+      };
+    }
+
+    private SqlOperandTypeChecker getOperandTypeChecker() {
+      if (_functionInfoMap.containsKey(VAR_ARG_KEY)) {
+        return OperandTypes.VARIADIC;
+      }
+      if (_functionInfoMap.size() == 1) {
+        return 
getOperandTypeChecker(_functionInfoMap.values().iterator().next().getMethod());
+      }
+      List<SqlOperandTypeChecker> operandTypeCheckers = new 
ArrayList<>(_functionInfoMap.size());
+      for (FunctionInfo functionInfo : _functionInfoMap.values()) {
+        
operandTypeCheckers.add(getOperandTypeChecker(functionInfo.getMethod()));
+      }
+      return OperandTypes.or(operandTypeCheckers.toArray(new 
SqlOperandTypeChecker[0]));
+    }
+
+    private static SqlTypeFamily getSqlTypeFamily(Class<?> clazz) {
+      // NOTE: Pinot allows some non-standard type conversions such as 
Timestamp <-> long, boolean <-> int etc. Do not
+      //       restrict the type family for now. We only restrict the type 
family for String so that cast can be added.
+      //       Explicit cast is required to correctly convert boolean and 
Timestamp to String.
+      // TODO: Revisit this.
+      return clazz == String.class ? SqlTypeFamily.CHARACTER : 
SqlTypeFamily.ANY;
+    }
+
+    private static SqlOperandTypeChecker getOperandTypeChecker(Method method) {
+      Class<?>[] parameterTypes = method.getParameterTypes();
+      int length = parameterTypes.length;
+      SqlTypeFamily[] typeFamilies = new SqlTypeFamily[length];
+      for (int i = 0; i < length; i++) {
+        typeFamilies[i] = getSqlTypeFamily(parameterTypes[i]);
+      }
+      return OperandTypes.family(typeFamilies);
     }
 
-    @ScalarFunction(names = {"vectorSimilarity", "vector_similarity"}, 
isPlaceholder = true)
-    public static boolean vectorSimilarity(float[] vector1, float[] vector2, 
int topk) {
-      throw new UnsupportedOperationException("Placeholder scalar function, 
should not reach here");
+    @Nullable
+    @Override
+    public FunctionInfo getFunctionInfo(int numArguments) {
+      FunctionInfo functionInfo = _functionInfoMap.get(numArguments);
+      return functionInfo != null ? functionInfo : 
_functionInfoMap.get(VAR_ARG_KEY);

Review Comment:
   There are 3 types of function implementations:
   - AggregationFunction
   - TransformFunction
   - Annotated ScalarFunction
   
   `FunctionRegistry` contains all the annotated scalar function 
implementations, and will register the converted function operator into 
`PinotOperatorTable` so that it is discoverable with multi-stage engine. We 
allow directly registering function into `PinotOperatorTable` to override the 
java class based inference. One example is `FROM_DATE_TIME` where we want to 
return TIMESTAMP in V2; it will return LONG in V1 per the function signature in 
`DateTimeFunctions.fromDateTime()`. Ideally we should unify them, but that is 
out of the scope of this PR, thus leaving a TODO.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to