This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 25db8b487 [spark] Fix docs, fix bug that spark sort for order should
be non-global (#2328)
25db8b487 is described below
commit 25db8b48729684c4f0933dfb04f33620bb0e93f6
Author: YeJunHao <[email protected]>
AuthorDate: Fri Nov 17 10:49:32 2023 +0800
[spark] Fix docs, fix bug that spark sort for order should be non-global
(#2328)
---
docs/content/engines/spark3.md | 4 ++--
.../src/main/java/org/apache/paimon/spark/sort/OrderSorter.java | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/content/engines/spark3.md b/docs/content/engines/spark3.md
index fd0d994c0..21af9d565 100644
--- a/docs/content/engines/spark3.md
+++ b/docs/content/engines/spark3.md
@@ -441,9 +441,9 @@ This section introduce all available spark procedures about
paimon.
<tbody style="font-size: 12px; ">
<tr>
<td>compact</td>
- <td><nobr>CALL
[paimon.]sys.compact('<identifier>','<partitions>','<sort_type>','<columns>')</nobr><br>CALL
[paimon.]sys.compact(table => '<identifier>' [,partitions =>
'<partitions>'] [, order_strategy =>'<sort_type>'] [,order_by =>
'<columns>'])</td>
+ <td><nobr>CALL [paimon.]sys.compact(table => '<identifier>'
[,partitions => '<partitions>'] </nobr><br>[, order_strategy
=>'<sort_type>'] [,order_by => '<columns>'])</td>
<td>identifier: the target table identifier. Cannot be
empty.<br><br><nobr>partitions: partition filter. Left empty for all
partitions.<br> "," means "AND"<br>";" means "OR"</nobr><br><br>order_strategy:
'order' or 'zorder' or 'none'. Left empty for 'none'.
<br><br><nobr>order_columns: the columns need to be sort. Left empty if
'order_strategy' is 'none'. </nobr><br><br>If you want sort compact two
partitions date=01 and date=02, you need to write 'date=01;date=02'<br><br>If
you wa [...]
- <td><nobr>SET spark.sql.shuffle.partitions=10; --set the sort
parallelism</nobr> <nobr>CALL
paimon.sys.compact('my_db.Orders1','f0=0,f1=1;f0=1,f1=1', 'zorder',
'f1,f2');</nobr><br><nobr>CALL paimon.sys.compact(table => 'T', partitions =>
'p=0', order_strategy => 'zorder', order_by => 'a,b')</nobr></td>
+ <td><nobr>SET spark.sql.shuffle.partitions=10; --set the compact
parallelism</nobr><br><nobr>CALL sys.compact(table => 'T', partitions => 'p=0',
order_strategy => 'zorder', order_by => 'a,b')</nobr></td>
</tr>
</tbody>
</table>
diff --git
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/OrderSorter.java
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/OrderSorter.java
index bdbf7e6d1..e2fc18f69 100644
---
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/OrderSorter.java
+++
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/OrderSorter.java
@@ -37,6 +37,6 @@ public class OrderSorter extends TableSorter {
@Override
public Dataset<Row> sort(Dataset<Row> input) {
Column[] sortColumns =
orderColNames.stream().map(input::col).toArray(Column[]::new);
- return input.repartitionByRange(sortColumns).sort(sortColumns);
+ return
input.repartitionByRange(sortColumns).sortWithinPartitions(sortColumns);
}
}