This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch release-0.8 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit d78658a187a303c569d6c4f45df2894fcb106dfd Author: xuzifu666 <[email protected]> AuthorDate: Wed May 29 10:28:19 2024 +0800 [spark] SparkHilbertUDF hilbertCurvePosBytes classnotfound fix (#3415) --- .../org/apache/paimon/sort/hilbert/HilbertIndexer.java | 2 +- paimon-spark/paimon-spark-common/pom.xml | 5 ----- .../java/org/apache/paimon/spark/sort/SparkHilbertUDF.java | 14 ++++---------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java b/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java index e0e55795f..b75227309 100644 --- a/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java +++ b/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java @@ -295,7 +295,7 @@ public class HilbertIndexer implements Serializable { } } - private byte[] hilbertCurvePosBytes(Long[] points) { + public static byte[] hilbertCurvePosBytes(Long[] points) { long[] data = Arrays.stream(points).mapToLong(Long::longValue).toArray(); HilbertCurve hilbertCurve = HilbertCurve.bits(BITS_NUM).dimensions(points.length); BigInteger index = hilbertCurve.index(data); diff --git a/paimon-spark/paimon-spark-common/pom.xml b/paimon-spark/paimon-spark-common/pom.xml index e435174f8..29deec4af 100644 --- a/paimon-spark/paimon-spark-common/pom.xml +++ b/paimon-spark/paimon-spark-common/pom.xml @@ -53,11 +53,6 @@ under the License. <artifactId>scala-compiler</artifactId> <version>${scala.version}</version> </dependency> - <dependency> - <groupId>com.github.davidmoten</groupId> - <artifactId>hilbert-curve</artifactId> - <version>0.2.2</version> - </dependency> <dependency> <groupId>org.apache.spark</groupId> diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java index ed2a1bab0..f9334c097 100644 --- a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java +++ b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java @@ -18,6 +18,7 @@ package org.apache.paimon.spark.sort; +import org.apache.paimon.sort.hilbert.HilbertIndexer; import org.apache.paimon.utils.ConvertBinaryUtil; import org.apache.spark.sql.Column; @@ -37,14 +38,11 @@ import org.apache.spark.sql.types.LongType; import org.apache.spark.sql.types.ShortType; import org.apache.spark.sql.types.StringType; import org.apache.spark.sql.types.TimestampType; -import org.davidmoten.hilbert.HilbertCurve; import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; import java.math.BigDecimal; -import java.math.BigInteger; -import java.util.List; import scala.collection.JavaConverters; import scala.collection.Seq; @@ -53,8 +51,6 @@ import scala.collection.Seq; public class SparkHilbertUDF implements Serializable { private static final long PRIMITIVE_EMPTY = Long.MAX_VALUE; - private static final int BITS_NUM = 63; - SparkHilbertUDF() {} private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { @@ -62,11 +58,9 @@ public class SparkHilbertUDF implements Serializable { } byte[] hilbertCurvePosBytes(Seq<Long> points) { - List<Long> longs = JavaConverters.seqAsJavaList(points); - long[] data = longs.stream().mapToLong(Long::longValue).toArray(); - HilbertCurve hilbertCurve = HilbertCurve.bits(BITS_NUM).dimensions(points.size()); - BigInteger index = hilbertCurve.index(data); - return ConvertBinaryUtil.paddingToNByte(index.toByteArray(), BITS_NUM); + Long[] longs = JavaConverters.seqAsJavaList(points).stream().toArray(Long[]::new); + byte[] bytes = HilbertIndexer.hilbertCurvePosBytes(longs); + return bytes; } private UserDefinedFunction tinyToOrderedLongUDF() {
