Repository: carbondata Updated Branches: refs/heads/master 759cb31f6 -> 23bd6e9af
[CARBONDATA-2963] added support for byte as sort column Added support for byte to be included in sort column This closes #2750 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/23bd6e9a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/23bd6e9a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/23bd6e9a Branch: refs/heads/master Commit: 23bd6e9afa537b84823b5f39941f558fb2be159d Parents: 759cb31 Author: kunal642 <[email protected]> Authored: Sun Sep 23 13:19:18 2018 +0530 Committer: ravipesala <[email protected]> Committed: Mon Sep 24 16:45:50 2018 +0530 ---------------------------------------------------------------------- .../core/scan/executor/util/QueryUtil.java | 2 + .../core/util/comparator/Comparator.java | 3 ++ .../datasource/SparkCarbonDataSourceTest.scala | 56 ++++++++++++++++++++ 3 files changed, 61 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/23bd6e9a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java index 219e7a2..efe3e55 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java @@ -769,6 +769,8 @@ public class QueryUtil { vector.putBytes(vectorRow, 0, length, value); } else if (dt == DataTypes.BOOLEAN) { vector.putBoolean(vectorRow, ByteUtil.toBoolean(value[0])); + } else if (dt == DataTypes.BYTE) { + vector.putByte(vectorRow,value[0]); } else if (dt == DataTypes.SHORT) { vector.putShort(vectorRow, ByteUtil.toXorShort(value, 0, length)); } else if (dt == DataTypes.INT) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/23bd6e9a/core/src/main/java/org/apache/carbondata/core/util/comparator/Comparator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/comparator/Comparator.java b/core/src/main/java/org/apache/carbondata/core/util/comparator/Comparator.java index f4e0adb..d9bc94b 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/comparator/Comparator.java +++ b/core/src/main/java/org/apache/carbondata/core/util/comparator/Comparator.java @@ -77,6 +77,9 @@ public final class Comparator { class ByteArraySerializableComparator implements SerializableComparator { @Override public int compare(Object key1, Object key2) { + if (key1 instanceof Byte) { + return ((Byte) key1).compareTo((Byte) key2); + } return ByteUtil.compare((byte[]) key1, (byte[]) key2); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/23bd6e9a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala ---------------------------------------------------------------------- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala index 18423f2..016968c 100644 --- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala +++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala @@ -1015,6 +1015,62 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { "structfield.bytefield < 11"), Seq(Row("name10", Row(10.asInstanceOf[Byte], 10.1012.asInstanceOf[Float])))) } + test("test bytefield as sort column") { + import scala.collection.JavaConverters._ + val path = new File(warehouse1+"/sdk1").getAbsolutePath + FileFactory.deleteAllFilesOfDir(new File(warehouse1+"/sdk1")) + var fields: Array[Field] = new Array[Field](8) + // same column name, but name as boolean type + fields(0) = new Field("age", DataTypes.INT) + fields(1) = new Field("height", DataTypes.DOUBLE) + fields(2) = new Field("name", DataTypes.STRING) + fields(3) = new Field("address", DataTypes.STRING) + fields(4) = new Field("salary", DataTypes.LONG) + fields(5) = new Field("bytefield", DataTypes.BYTE) + + try { + val builder = CarbonWriter.builder() + val writer = + builder.outputPath(path) + .isTransactionalTable(false) + .uniqueIdentifier(System.nanoTime()).withBlockSize(2).sortBy(Array("bytefield")) + .buildWriterForCSVInput(new Schema(fields), spark.sparkContext.hadoopConfiguration) + + var i = 0 + while (i < 11) { + val array = Array[String]( + String.valueOf(i), + String.valueOf(i.toDouble / 2), + "name" + i, + "address" + i, + (i * 100).toString, + s"${10 - i}") + writer.write(array) + i += 1 + } + writer.close() + spark.sql("drop table if exists sorted_par") + spark.sql("drop table if exists sort_table") + spark.sql(s"create table sort_table (age int, height double, name string, address string," + + s" salary long, bytefield byte) using carbon location '$path'") + FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(s"$warehouse1/../warehouse2")) + spark.sql(s"create table sorted_par(age int, height double, name string, address " + + s"string," + + s"salary long, bytefield byte) using parquet location " + + s"'$warehouse1/../warehouse2'") + (0 to 10).foreach { + i => + spark.sql(s"insert into sorted_par select '$i', ${ i.toDouble / 2 }, 'name$i', " + + s"'address$i', ${ i * 100 }, '${ 10 - i }'") + } + checkAnswer(spark.sql("select * from sorted_par order by bytefield"), + spark.sql("select * from sort_table")) + } catch { + case ex: Exception => throw new RuntimeException(ex) + case _ => None + } + } + test("test array of float type and byte type") { import scala.collection.JavaConverters._ val path = new File(warehouse1+"/sdk1").getAbsolutePath
