This is an automated email from the ASF dual-hosted git repository.
emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e78ea91 ARROW-5844: [Java] Support comparison & sort for more numeric
types
e78ea91 is described below
commit e78ea91312db1310e00b9aaf6ec4cc913e447670
Author: liyafan82 <[email protected]>
AuthorDate: Thu Jul 11 23:47:49 2019 -0700
ARROW-5844: [Java] Support comparison & sort for more numeric types
Currently, we only support comparison & sort for 32-bit integers, in this
issue, we provide support for more numeric data types:
byte
short
long
float
double
Author: liyafan82 <[email protected]>
Closes #4799 from liyafan82/fly_0704_cmp and squashes the following commits:
a921cddaf <liyafan82> Remove if conditons in default float & double
comparators
a4b409908 <liyafan82> Replace if condition with signum function
30f946bda <liyafan82> Merge branch 'master' into fly_0704_cmp
bc880f163 <liyafan82> Merge branch 'master' into fly_0704_cmp
7cbe55601 <liyafan82> Support NaN for float and double
3860c11d1 <liyafan82> Support comparison & sort for more numeric types
---
.../algorithm/sort/DefaultVectorComparators.java | 124 +++++++++++
.../sort/TestFixedWidthOutOfPlaceVectorSorter.java | 239 ++++++++++++++++++++-
2 files changed, 362 insertions(+), 1 deletion(-)
diff --git
a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index e16b9ec..2dfa0aa 100644
---
a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++
b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -17,7 +17,12 @@
package org.apache.arrow.algorithm.sort;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
@@ -27,6 +32,42 @@ import org.apache.arrow.vector.holders.NullableVarCharHolder;
public class DefaultVectorComparators {
/**
+ * Default comparator for bytes.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class ByteComparator extends
VectorValueComparator<TinyIntVector> {
+
+ public ByteComparator() {
+ super(Byte.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ byte value1 = vector1.get(index1);
+ byte value2 = vector2.get(index2);
+ return value1 - value2;
+ }
+ }
+
+ /**
+ * Default comparator for short integers.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class ShortComparator extends
VectorValueComparator<SmallIntVector> {
+
+ public ShortComparator() {
+ super(Short.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ short value1 = vector1.get(index1);
+ short value2 = vector2.get(index2);
+ return value1 - value2;
+ }
+ }
+
+ /**
* Default comparator for 32-bit integers.
* The comparison is based on int values, with null comes first.
*/
@@ -45,6 +86,89 @@ public class DefaultVectorComparators {
}
/**
+ * Default comparator for long integers.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class LongComparator extends
VectorValueComparator<BigIntVector> {
+
+ public LongComparator() {
+ super(Long.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ long value1 = vector1.get(index1);
+ long value2 = vector2.get(index2);
+
+ return Long.signum(value1 - value2);
+ }
+ }
+
+ /**
+ * Default comparator for float type.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class Float4Comparator extends
VectorValueComparator<Float4Vector> {
+
+ public Float4Comparator() {
+ super(Float.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ float value1 = vector1.get(index1);
+ float value2 = vector2.get(index2);
+
+ boolean isNan1 = Float.isNaN(value1);
+ boolean isNan2 = Float.isNaN(value2);
+ if (isNan1 || isNan2) {
+ if (isNan1 && isNan2) {
+ return 0;
+ } else if (isNan1) {
+ // nan is greater than any normal value
+ return 1;
+ } else {
+ return -1;
+ }
+ }
+
+ return (int) Math.signum(value1 - value2);
+ }
+ }
+
+ /**
+ * Default comparator for double type.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class Float8Comparator extends
VectorValueComparator<Float8Vector> {
+
+ public Float8Comparator() {
+ super(Double.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ double value1 = vector1.get(index1);
+ double value2 = vector2.get(index2);
+
+ boolean isNan1 = Double.isNaN(value1);
+ boolean isNan2 = Double.isNaN(value2);
+ if (isNan1 || isNan2) {
+ if (isNan1 && isNan2) {
+ return 0;
+ } else if (isNan1) {
+ // nan is greater than any normal value
+ return 1;
+ } else {
+ return -1;
+ }
+ }
+
+ return (int) Math.signum(value1 - value2);
+ }
+ }
+
+ /**
* Default comparator for varchars.
* The comparison is in lexicographic order, with null comes first.
*/
diff --git
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
index 9133ab6..4fc4a7a 100644
---
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
+++
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -21,8 +21,13 @@ import static org.junit.Assert.assertTrue;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -46,6 +51,100 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
}
@Test
+ public void testSortByte() {
+ try (TinyIntVector vec = new TinyIntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new
FixedWidthOutOfPlaceVectorSorter();
+ DefaultVectorComparators.ByteComparator comparator = new
DefaultVectorComparators.ByteComparator();
+
+ TinyIntVector sortedVec =
+ (TinyIntVector)
vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals((byte) 2, sortedVec.get(2));
+ Assert.assertEquals((byte) 8, sortedVec.get(3));
+ Assert.assertEquals((byte) 10, sortedVec.get(4));
+ Assert.assertEquals((byte) 10, sortedVec.get(5));
+ Assert.assertEquals((byte) 12, sortedVec.get(6));
+ Assert.assertEquals((byte) 17, sortedVec.get(7));
+ Assert.assertEquals((byte) 23, sortedVec.get(8));
+ Assert.assertEquals((byte) 35, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortShort() {
+ try (SmallIntVector vec = new SmallIntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new
FixedWidthOutOfPlaceVectorSorter();
+ DefaultVectorComparators.ShortComparator comparator = new
DefaultVectorComparators.ShortComparator();
+
+ SmallIntVector sortedVec =
+ (SmallIntVector)
vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals((short) 2, sortedVec.get(2));
+ Assert.assertEquals((short) 8, sortedVec.get(3));
+ Assert.assertEquals((short) 10, sortedVec.get(4));
+ Assert.assertEquals((short) 10, sortedVec.get(5));
+ Assert.assertEquals((short) 12, sortedVec.get(6));
+ Assert.assertEquals((short) 17, sortedVec.get(7));
+ Assert.assertEquals((short) 23, sortedVec.get(8));
+ Assert.assertEquals((short) 35, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
public void testSortInt() {
try (IntVector vec = new IntVector("", allocator)) {
vec.allocateNew(10);
@@ -90,4 +189,142 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
sortedVec.close();
}
}
+
+ @Test
+ public void testSortLong() {
+ try (BigIntVector vec = new BigIntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10L);
+ vec.set(1, 8L);
+ vec.setNull(2);
+ vec.set(3, 10L);
+ vec.set(4, 12L);
+ vec.set(5, 17L);
+ vec.setNull(6);
+ vec.set(7, 23L);
+ vec.set(8, 1L << 35L);
+ vec.set(9, 2L);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new
FixedWidthOutOfPlaceVectorSorter();
+ DefaultVectorComparators.LongComparator comparator = new
DefaultVectorComparators.LongComparator();
+
+ BigIntVector sortedVec = (BigIntVector)
vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2L, sortedVec.get(2));
+ Assert.assertEquals(8L, sortedVec.get(3));
+ Assert.assertEquals(10L, sortedVec.get(4));
+ Assert.assertEquals(10L, sortedVec.get(5));
+ Assert.assertEquals(12L, sortedVec.get(6));
+ Assert.assertEquals(17L, sortedVec.get(7));
+ Assert.assertEquals(23L, sortedVec.get(8));
+ Assert.assertEquals(1L << 35L, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortFloat() {
+ try (Float4Vector vec = new Float4Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10f);
+ vec.set(1, 8f);
+ vec.setNull(2);
+ vec.set(3, 10f);
+ vec.set(4, 12f);
+ vec.set(5, 17f);
+ vec.setNull(6);
+ vec.set(7, 23f);
+ vec.set(8, Float.NaN);
+ vec.set(9, 2f);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new
FixedWidthOutOfPlaceVectorSorter();
+ DefaultVectorComparators.Float4Comparator comparator = new
DefaultVectorComparators.Float4Comparator();
+
+ Float4Vector sortedVec = (Float4Vector)
vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2f, sortedVec.get(2), 0f);
+ Assert.assertEquals(8f, sortedVec.get(3), 0f);
+ Assert.assertEquals(10f, sortedVec.get(4), 0f);
+ Assert.assertEquals(10f, sortedVec.get(5), 0f);
+ Assert.assertEquals(12f, sortedVec.get(6), 0f);
+ Assert.assertEquals(17f, sortedVec.get(7), 0f);
+ Assert.assertEquals(23f, sortedVec.get(8), 0f);
+ Assert.assertEquals(Float.NaN, sortedVec.get(9), 0f);
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortDobule() {
+ try (Float8Vector vec = new Float8Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, Double.NaN);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new
FixedWidthOutOfPlaceVectorSorter();
+ DefaultVectorComparators.Float8Comparator comparator = new
DefaultVectorComparators.Float8Comparator();
+
+ Float8Vector sortedVec = (Float8Vector)
vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2, sortedVec.get(2), 0);
+ Assert.assertEquals(8, sortedVec.get(3), 0);
+ Assert.assertEquals(10, sortedVec.get(4), 0);
+ Assert.assertEquals(10, sortedVec.get(5), 0);
+ Assert.assertEquals(12, sortedVec.get(6), 0);
+ Assert.assertEquals(17, sortedVec.get(7), 0);
+ Assert.assertEquals(35, sortedVec.get(8), 0);
+ Assert.assertEquals(Double.NaN, sortedVec.get(9), 0);
+
+ sortedVec.close();
+ }
+ }
}