This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 46cfa66a1 [spark] SparkHilbertUDF hilbertCurvePosBytes classnotfound
fix (#3415)
46cfa66a1 is described below
commit 46cfa66a1ae41fe008087f11be16d466d1d16d66
Author: xuzifu666 <[email protected]>
AuthorDate: Wed May 29 10:28:19 2024 +0800
[spark] SparkHilbertUDF hilbertCurvePosBytes classnotfound fix (#3415)
---
.../org/apache/paimon/sort/hilbert/HilbertIndexer.java | 2 +-
paimon-spark/paimon-spark-common/pom.xml | 5 -----
.../java/org/apache/paimon/spark/sort/SparkHilbertUDF.java | 14 ++++----------
3 files changed, 5 insertions(+), 16 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java
b/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java
index e0e55795f..b75227309 100644
---
a/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java
+++
b/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java
@@ -295,7 +295,7 @@ public class HilbertIndexer implements Serializable {
}
}
- private byte[] hilbertCurvePosBytes(Long[] points) {
+ public static byte[] hilbertCurvePosBytes(Long[] points) {
long[] data =
Arrays.stream(points).mapToLong(Long::longValue).toArray();
HilbertCurve hilbertCurve =
HilbertCurve.bits(BITS_NUM).dimensions(points.length);
BigInteger index = hilbertCurve.index(data);
diff --git a/paimon-spark/paimon-spark-common/pom.xml
b/paimon-spark/paimon-spark-common/pom.xml
index a3c924821..7a6f11a21 100644
--- a/paimon-spark/paimon-spark-common/pom.xml
+++ b/paimon-spark/paimon-spark-common/pom.xml
@@ -53,11 +53,6 @@ under the License.
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
</dependency>
- <dependency>
- <groupId>com.github.davidmoten</groupId>
- <artifactId>hilbert-curve</artifactId>
- <version>0.2.2</version>
- </dependency>
<dependency>
<groupId>org.apache.spark</groupId>
diff --git
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java
index ed2a1bab0..f9334c097 100644
---
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java
+++
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java
@@ -18,6 +18,7 @@
package org.apache.paimon.spark.sort;
+import org.apache.paimon.sort.hilbert.HilbertIndexer;
import org.apache.paimon.utils.ConvertBinaryUtil;
import org.apache.spark.sql.Column;
@@ -37,14 +38,11 @@ import org.apache.spark.sql.types.LongType;
import org.apache.spark.sql.types.ShortType;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.TimestampType;
-import org.davidmoten.hilbert.HilbertCurve;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.util.List;
import scala.collection.JavaConverters;
import scala.collection.Seq;
@@ -53,8 +51,6 @@ import scala.collection.Seq;
public class SparkHilbertUDF implements Serializable {
private static final long PRIMITIVE_EMPTY = Long.MAX_VALUE;
- private static final int BITS_NUM = 63;
-
SparkHilbertUDF() {}
private void readObject(ObjectInputStream in) throws IOException,
ClassNotFoundException {
@@ -62,11 +58,9 @@ public class SparkHilbertUDF implements Serializable {
}
byte[] hilbertCurvePosBytes(Seq<Long> points) {
- List<Long> longs = JavaConverters.seqAsJavaList(points);
- long[] data = longs.stream().mapToLong(Long::longValue).toArray();
- HilbertCurve hilbertCurve =
HilbertCurve.bits(BITS_NUM).dimensions(points.size());
- BigInteger index = hilbertCurve.index(data);
- return ConvertBinaryUtil.paddingToNByte(index.toByteArray(), BITS_NUM);
+ Long[] longs =
JavaConverters.seqAsJavaList(points).stream().toArray(Long[]::new);
+ byte[] bytes = HilbertIndexer.hilbertCurvePosBytes(longs);
+ return bytes;
}
private UserDefinedFunction tinyToOrderedLongUDF() {