cboumalh commented on code in PR #52900: URL: https://github.com/apache/spark/pull/52900#discussion_r2496016931
########## python/pyspark/sql/connect/functions/builtin.py: ########## @@ -4529,6 +4529,168 @@ def theta_intersection_agg( theta_intersection_agg.__doc__ = pysparkfuncs.theta_intersection_agg.__doc__ +def kll_sketch_agg_bigint( + col: "ColumnOrName", + k: Optional[Union[int, Column]] = None, +) -> Column: + fn = "kll_sketch_agg_bigint" + if k is None: + return _invoke_function_over_columns(fn, col) + else: + return _invoke_function_over_columns(fn, col, lit(k)) + + +kll_sketch_agg_bigint.__doc__ = pysparkfuncs.kll_sketch_agg_bigint.__doc__ + + +def kll_sketch_agg_float( + col: "ColumnOrName", + k: Optional[Union[int, Column]] = None, +) -> Column: + fn = "kll_sketch_agg_float" + if k is None: + return _invoke_function_over_columns(fn, col) + else: + return _invoke_function_over_columns(fn, col, lit(k)) + + +kll_sketch_agg_float.__doc__ = pysparkfuncs.kll_sketch_agg_float.__doc__ + + +def kll_sketch_agg_double( + col: "ColumnOrName", + k: Optional[Union[int, Column]] = None, +) -> Column: + fn = "kll_sketch_agg_double" + if k is None: + return _invoke_function_over_columns(fn, col) + else: + return _invoke_function_over_columns(fn, col, lit(k)) + + +kll_sketch_agg_double.__doc__ = pysparkfuncs.kll_sketch_agg_double.__doc__ + + +def kll_sketch_to_string_bigint(col: "ColumnOrName") -> Column: + fn = "kll_sketch_to_string_bigint" + return _invoke_function_over_columns(fn, col) + + +kll_sketch_to_string_bigint.__doc__ = pysparkfuncs.kll_sketch_to_string_bigint.__doc__ + + +def kll_sketch_to_string_float(col: "ColumnOrName") -> Column: + fn = "kll_sketch_to_string_float" + return _invoke_function_over_columns(fn, col) + + +kll_sketch_to_string_float.__doc__ = pysparkfuncs.kll_sketch_to_string_float.__doc__ + + +def kll_sketch_to_string_double(col: "ColumnOrName") -> Column: + fn = "kll_sketch_to_string_double" + return _invoke_function_over_columns(fn, col) + + +kll_sketch_to_string_double.__doc__ = pysparkfuncs.kll_sketch_to_string_double.__doc__ + + +def kll_sketch_get_n_bigint(col: "ColumnOrName") -> Column: + fn = "kll_sketch_get_n_bigint" + return _invoke_function_over_columns(fn, col) + + +kll_sketch_get_n_bigint.__doc__ = pysparkfuncs.kll_sketch_get_n_bigint.__doc__ + + +def kll_sketch_get_n_float(col: "ColumnOrName") -> Column: + fn = "kll_sketch_get_n_float" + return _invoke_function_over_columns(fn, col) + + +kll_sketch_get_n_float.__doc__ = pysparkfuncs.kll_sketch_get_n_float.__doc__ + + +def kll_sketch_get_n_double(col: "ColumnOrName") -> Column: + fn = "kll_sketch_get_n_double" + return _invoke_function_over_columns(fn, col) + + +kll_sketch_get_n_double.__doc__ = pysparkfuncs.kll_sketch_get_n_double.__doc__ + + +def kll_sketch_merge_bigint(left: "ColumnOrName", right: "ColumnOrName") -> Column: + fn = "kll_sketch_merge_bigint" + return _invoke_function_over_columns(fn, left, right) + + +kll_sketch_merge_bigint.__doc__ = pysparkfuncs.kll_sketch_merge_bigint.__doc__ + + +def kll_sketch_merge_float(left: "ColumnOrName", right: "ColumnOrName") -> Column: + fn = "kll_sketch_merge_float" + return _invoke_function_over_columns(fn, left, right) + + +kll_sketch_merge_float.__doc__ = pysparkfuncs.kll_sketch_merge_float.__doc__ + + +def kll_sketch_merge_double(left: "ColumnOrName", right: "ColumnOrName") -> Column: + fn = "kll_sketch_merge_double" + return _invoke_function_over_columns(fn, left, right) + + +kll_sketch_merge_double.__doc__ = pysparkfuncs.kll_sketch_merge_double.__doc__ + + +def kll_sketch_get_quantile_bigint(sketch: "ColumnOrName", rank: "ColumnOrName") -> Column: + fn = "kll_sketch_get_quantile_bigint" + return _invoke_function_over_columns(fn, sketch, rank) + + +kll_sketch_get_quantile_bigint.__doc__ = pysparkfuncs.kll_sketch_get_quantile_bigint.__doc__ + + +def kll_sketch_get_quantile_float(sketch: "ColumnOrName", rank: "ColumnOrName") -> Column: Review Comment: noticed that in the line below, we don't check if the ranks column type is not supported, not sure what would happen in that case. https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/kllExpressions.scala#L488 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
