This is an automated email from the ASF dual-hosted git repository.
emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3f767ce ARROW-5719: [Java] Support in-place vector sorting
3f767ce is described below
commit 3f767ce38866b87e29aa5a964176e978ddf4899e
Author: liyafan82 <[email protected]>
AuthorDate: Sun Jul 7 21:11:31 2019 -0700
ARROW-5719: [Java] Support in-place vector sorting
Support in-place sorting for vectors. An in-place sorter sorts the vector
by directly modifying the vector data, so the input and output vectors are the
same one.
Author: liyafan82 <[email protected]>
Closes #4699 from liyafan82/fly_0625_isort and squashes the following
commits:
2b4fc8233 <liyafan82> Remove type width
1f210d9d3 <liyafan82> Remove data copier
e3400a9cb <liyafan82> Support in-place vector sorting
---
.../sort/FixedWidthInPlaceVectorSorter.java | 83 +++++++++++++++++++++
.../arrow/algorithm/sort/InPlaceVectorSorter.java | 37 ++++++++++
.../sort/TestFixedWidthInPlaceVectorSorter.java | 86 ++++++++++++++++++++++
3 files changed, 206 insertions(+)
diff --git
a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
new file mode 100644
index 0000000..a8d2b70
--- /dev/null
+++
b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+
+/**
+ * Default in-place sorter for fixed-width vectors.
+ * It is based on quick-sort, with average time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class FixedWidthInPlaceVectorSorter<V extends BaseFixedWidthVector>
implements InPlaceVectorSorter<V> {
+
+ private VectorValueComparator<V> comparator;
+
+ /**
+ * The vector to sort.
+ */
+ private V vec;
+
+ /**
+ * The buffer to hold the pivot.
+ * It always has length 1.
+ */
+ private V pivotBuffer;
+
+ @Override
+ public void sortInPlace(V vec, VectorValueComparator<V> comparator) {
+ try {
+ this.vec = vec;
+ this.comparator = comparator;
+ this.pivotBuffer = (V) vec.getField().createVector(vec.getAllocator());
+ this.pivotBuffer.allocateNew(1);
+
+ comparator.attachVectors(vec, pivotBuffer);
+ quickSort(0, vec.getValueCount() - 1);
+ } finally {
+ this.pivotBuffer.close();
+ }
+ }
+
+ private void quickSort(int low, int high) {
+ if (low < high) {
+ int mid = partition(low, high);
+ quickSort(low, mid - 1);
+ quickSort(mid + 1, high);
+ }
+ }
+
+ private int partition(int low, int high) {
+ pivotBuffer.copyFrom(low, 0, vec);
+
+ while (low < high) {
+ while (low < high && comparator.compare(high, 0) >= 0) {
+ high -= 1;
+ }
+ vec.copyFrom(high, low, vec);
+
+ while (low < high && comparator.compare(low, 0) <= 0) {
+ low += 1;
+ }
+ vec.copyFrom(low, high, vec);
+ }
+
+ vec.copyFrom(0, low, pivotBuffer);
+ return low;
+ }
+}
diff --git
a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
new file mode 100644
index 0000000..19817fe
--- /dev/null
+++
b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Basic interface for sorting a vector in-place.
+ * That is, the sorting is performed by modifying the input vector,
+ * without creating a new sorted vector.
+ *
+ * @param <V> the vector type.
+ */
+public interface InPlaceVectorSorter<V extends ValueVector> {
+
+ /**
+ * Sort a vector in-place.
+ * @param vec the vector to sort.
+ * @param comparator the criteria for sort.
+ */
+ void sortInPlace(V vec, VectorValueComparator<V> comparator);
+}
diff --git
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
new file mode 100644
index 0000000..1a71a53
--- /dev/null
+++
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link FixedWidthInPlaceVectorSorter}.
+ */
+public class TestFixedWidthInPlaceVectorSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSortInt() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthInPlaceVectorSorter sorter = new
FixedWidthInPlaceVectorSorter();
+ DefaultVectorComparators.IntComparator comparator = new
DefaultVectorComparators.IntComparator();
+
+ sorter.sortInPlace(vec, comparator);
+
+ // verify results
+ Assert.assertEquals(10, vec.getValueCount());
+
+ assertTrue(vec.isNull(0));
+ assertTrue(vec.isNull(1));
+ Assert.assertEquals(2, vec.get(2));
+ Assert.assertEquals(8, vec.get(3));
+ Assert.assertEquals(10, vec.get(4));
+ Assert.assertEquals(10, vec.get(5));
+ Assert.assertEquals(12, vec.get(6));
+ Assert.assertEquals(17, vec.get(7));
+ Assert.assertEquals(23, vec.get(8));
+ Assert.assertEquals(35, vec.get(9));
+ }
+ }
+}