This is an automated email from the ASF dual-hosted git repository.
vbalaji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 2dc1c60ba47 [HUDI-6369] Fix spacial curve with sample strategy fails
when 0 or 1 rows only is incoming (#9053)
2dc1c60ba47 is described below
commit 2dc1c60ba475b9596290e227fe33dc009d516b21
Author: Nicolas Paris <[email protected]>
AuthorDate: Wed Oct 18 19:10:22 2023 +0200
[HUDI-6369] Fix spacial curve with sample strategy fails when 0 or 1 rows
only is incoming (#9053)
* [HUDI-6369] Fix spacial when empty or 1 row df
* Rename unit test to follow conventions
---------
Co-authored-by: Balaji Varadarajan <[email protected]>
---
.../spark/sql/hudi/execution/RangeSample.scala | 5 +-
.../sql/hudi/execution/TestRangeSampleSort.java | 58 ++++++++++++++++++++++
2 files changed, 62 insertions(+), 1 deletion(-)
diff --git
a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/execution/RangeSample.scala
b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/execution/RangeSample.scala
index f00bb90a441..898c8dc8209 100644
---
a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/execution/RangeSample.scala
+++
b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/execution/RangeSample.scala
@@ -316,6 +316,8 @@ object RangeSampleSort {
HoodieClusteringConfig.LAYOUT_OPTIMIZE_BUILD_CURVE_SAMPLE_SIZE.defaultValue.toString).toInt
val sample = new RangeSample(zOrderBounds, sampleRdd)
val rangeBounds = sample.getRangeBounds()
+ if (rangeBounds.size <= 1)
+ return df
val sampleBounds = {
val candidateColNumber = rangeBounds.head._1.length
(0 to candidateColNumber - 1).map { i =>
@@ -479,6 +481,8 @@ object RangeSampleSort {
val sample = new RangeSample(zOrderBounds, sampleRdd)
val rangeBounds = sample.getRangeBounds()
+ if(rangeBounds.size <= 1)
+ return df
implicit val ordering1 = lazyGeneratedOrderings(0)
@@ -536,4 +540,3 @@ object RangeSampleSort {
}
}
}
-
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/spark/sql/hudi/execution/TestRangeSampleSort.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/spark/sql/hudi/execution/TestRangeSampleSort.java
new file mode 100644
index 00000000000..cedf21d3c35
--- /dev/null
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/spark/sql/hudi/execution/TestRangeSampleSort.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.hudi.execution;
+
+import org.apache.hudi.config.HoodieClusteringConfig;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+import scala.collection.JavaConversions;
+
+class TestRangeSampleSort extends HoodieClientTestBase {
+
+ @Test
+ void sortDataFrameBySampleSupportAllTypes() {
+ Dataset<Row> df = this.context.getSqlContext().sql("select 1 as id,
array(2) as content");
+ for (int i = 0; i < 2; i++) {
+ final int limit = i;
+ Assertions.assertDoesNotThrow(() ->
+
RangeSampleSort$.MODULE$.sortDataFrameBySampleSupportAllTypes(df.limit(limit),
+ JavaConversions.asScalaBuffer(Arrays.asList("id", "content")),
1), "range sort shall not fail when 0 or 1 record incoming");
+ }
+ }
+
+ @Test
+ void sortDataFrameBySample() {
+ HoodieClusteringConfig.LayoutOptimizationStrategy layoutOptStrategy =
HoodieClusteringConfig.LayoutOptimizationStrategy.HILBERT;
+ Dataset<Row> df = this.context.getSqlContext().sql("select 1 as id, 2 as
content");
+ for (int i = 0; i < 2; i++) {
+ final int limit = i;
+ Assertions.assertDoesNotThrow(() ->
+ RangeSampleSort$.MODULE$.sortDataFrameBySample(df.limit(limit),
layoutOptStrategy,
+ JavaConversions.asScalaBuffer(Arrays.asList("id", "content")),
1), "range sort shall not fail when 0 or 1 record incoming");
+ }
+ }
+}