This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 491d50f [CARBONDATA-3968]Added test cases for hive read complex types
and handled other issues
491d50f is described below
commit 491d50f2710373b74fa2ac6bce5c7db8562ec91d
Author: akkio-97 <[email protected]>
AuthorDate: Fri Aug 28 01:59:02 2020 +0530
[CARBONDATA-3968]Added test cases for hive read complex types and handled
other issues
Why is this PR needed?
Handled issues related to reading of byte, varchar and decimal types.
Map of primitive type with only one row inserted had issues.
Added test cases for hive read complex types.
What changes were proposed in this PR?
Byte datatype check was not present.
Varchar and decimal types were causing a problem during conversion from hive
datatypes to carbon(due to the presence of parentheses).
Handled map parser issue.
Added test cases for all primitive types excluding - binary datatype.
This closes #3906
---
.../carbondata/hive/WritableReadSupport.java | 2 +
.../apache/carbondata/hive/util/DataTypeUtil.java | 21 +++++-
.../hive/src/main/resources/array/complexArray.csv | 5 ++
.../hive/src/main/resources/map/complexMap.csv | 2 +
.../hive/src/main/resources/struct/struct.csv | 5 ++
.../org/apache/carbondata/hive/HiveCarbonTest.java | 88 ++++++++++++++++++++++
.../org/apache/carbondata/hive/HiveTestUtils.java | 16 +++-
.../loading/parser/impl/MapParserImpl.java | 9 ++-
8 files changed, 140 insertions(+), 8 deletions(-)
diff --git
a/integration/hive/src/main/java/org/apache/carbondata/hive/WritableReadSupport.java
b/integration/hive/src/main/java/org/apache/carbondata/hive/WritableReadSupport.java
index a404b78..db1aedf 100644
---
a/integration/hive/src/main/java/org/apache/carbondata/hive/WritableReadSupport.java
+++
b/integration/hive/src/main/java/org/apache/carbondata/hive/WritableReadSupport.java
@@ -222,6 +222,8 @@ public class WritableReadSupport<T> implements
CarbonReadSupport<T> {
return new LongWritable((long) obj);
} else if (dataType == DataTypes.SHORT) {
return new ShortWritable((short) obj);
+ } else if (dataType == DataTypes.BYTE) {
+ return new ByteWritable((byte) obj);
} else if (dataType == DataTypes.BOOLEAN) {
return new BooleanWritable((boolean) obj);
} else if (dataType == DataTypes.VARCHAR) {
diff --git
a/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java
b/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java
index d7fa80c..64a4376 100644
---
a/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java
+++
b/integration/hive/src/main/java/org/apache/carbondata/hive/util/DataTypeUtil.java
@@ -21,16 +21,19 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.datatype.StructField;
+import org.apache.commons.lang.ArrayUtils;
+
public class DataTypeUtil {
public static DataType convertHiveTypeToCarbon(String type) throws
SQLException {
if ("string".equalsIgnoreCase(type) || type.startsWith("char")) {
return DataTypes.STRING;
- } else if ("varchar".equalsIgnoreCase(type)) {
+ } else if (type.startsWith("varchar")) {
return DataTypes.VARCHAR;
} else if ("float".equalsIgnoreCase(type)) {
return DataTypes.FLOAT;
@@ -38,8 +41,10 @@ public class DataTypeUtil {
return DataTypes.DOUBLE;
} else if ("boolean".equalsIgnoreCase(type)) {
return DataTypes.BOOLEAN;
- } else if ("tinyint".equalsIgnoreCase(type) ||
"smallint".equalsIgnoreCase(type)) {
+ } else if ("smallint".equalsIgnoreCase(type)) {
return DataTypes.SHORT;
+ } else if ("tinyint".equalsIgnoreCase(type)) {
+ return DataTypes.BYTE;
} else if ("int".equalsIgnoreCase(type)) {
return DataTypes.INT;
} else if ("bigint".equalsIgnoreCase(type)) {
@@ -64,13 +69,23 @@ public class DataTypeUtil {
return DataTypes.createArrayType(convertHiveTypeToCarbon(subType));
} else if (type.startsWith("map<")) {
String[] subType = (type.substring(type.indexOf("<") + 1,
type.indexOf(">"))).split(",");
+ for (int i = 0; i < subType.length; i++) {
+ if (subType[i].startsWith("decimal")) {
+ subType[i] += CarbonCommonConstants.COMMA + subType[++i];
+ subType = (String[]) ArrayUtils.removeElement(subType, subType[i]);
+ }
+ }
return DataTypes
.createMapType(convertHiveTypeToCarbon(subType[0]),
convertHiveTypeToCarbon(subType[1]));
} else if (type.startsWith("struct<")) {
String[] subTypes =
(type.substring(type.indexOf("<") + 1,
type.indexOf(">"))).split(",");
List<StructField> structFieldList = new ArrayList<>();
- for (String subType : subTypes) {
+ for (int i = 0; i < subTypes.length; i++) {
+ String subType = subTypes[i];
+ if (subType.startsWith("decimal")) {
+ subType += CarbonCommonConstants.COMMA + subTypes[++i];
+ }
String[] nameAndType = subType.split(":");
structFieldList
.add(new StructField(nameAndType[0],
convertHiveTypeToCarbon(nameAndType[1])));
diff --git a/integration/hive/src/main/resources/array/complexArray.csv
b/integration/hive/src/main/resources/array/complexArray.csv
new file mode 100644
index 0000000..a5841e0
--- /dev/null
+++ b/integration/hive/src/main/resources/array/complexArray.csv
@@ -0,0 +1,5 @@
+abc$def$ghijkl,1,1$2,123456$2555$9999999,1.1$2.2,1.2323$2.3$9.98,1.2323$2.3$9.89,hello$world$china,true$false$true,k$a,122$123$124,2014-01-11$2014-02-20
+abc$pqrst,1$2,1$2,123456$2555,1.1$2.2,2.2929$2.3,2.2929$2.3$6.789,hello$world,true$false,k$a,122$123$124,2014-01-11$2014-02-20
+abc$def,1$2$3,1$2,123456$26262,1.1$2.2,2.2929$2.39,2.2929$2.39,hello$world,true$false,k$a,122$123$124,2014-01-11$2014-02-20
+abc$def,1$2$3$4,1$2,56$1555,1.1$2.2,1.2$2.999,1.2$2.999,hello$world,true$false,k$a,122$123$124,2014-01-11$2014-02-20
+abc$def,1$2$3$4$5,1$2,123456$2555,1.1$2.2,1.2$2.3$0.09,1.2$2.3,hello$world,true$false,k$a,122$123$124,2014-01-11$2014-02-20
diff --git a/integration/hive/src/main/resources/map/complexMap.csv
b/integration/hive/src/main/resources/map/complexMap.csv
new file mode 100644
index 0000000..00e8d09
--- /dev/null
+++ b/integration/hive/src/main/resources/map/complexMap.csv
@@ -0,0 +1,2 @@
+Key1@Val1$Key2@Val2,1@key1$2@key2,[email protected][email protected],1@2$3@4,1.23@varchar1$2.34@varchar2,1234@2014-01-10$124@2014-01-10$1@2014-01-10,hello@1$world@2$china@3,1234567@true$123456@false$123456789@true
+Key21@Va2l1$Key22@Va2l2,1@key1$2@key2,[email protected][email protected],1@2$3@4,1.99@varchar21$2.99@varchar22,1234@2014-01-10$124@2014-01-10$1@2014-01-10,hello@1$world@2$china@3,1234567@true$123456@false$123456789@true
diff --git a/integration/hive/src/main/resources/struct/struct.csv
b/integration/hive/src/main/resources/struct/struct.csv
new file mode 100644
index 0000000..768ddb9
--- /dev/null
+++ b/integration/hive/src/main/resources/struct/struct.csv
@@ -0,0 +1,5 @@
+Egypt$123$560066$123456789$1.23$2.34567$char$true$varchar1$123$2014-02-20$1.234
+China$123$560066$123996789$1.23$2.34567$char$true$varchar2$123$2014-02-20$1.234
+India$123$561066$123456789$1.23$2.34567$char$true$varchar3$123$2014-02-20$1.234
+Egypt$123$5066$123456989$1.23$2.34567$char$true$varchar4$123$2014-02-20$1.234
+Sri
Lanka$123$56006$123456789$1.23$2.34567$char$true$varchar5$123$2014-02-20$1.234
diff --git
a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
index 35a364a..1eac80a 100644
---
a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
+++
b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
@@ -110,6 +110,94 @@ public class HiveCarbonTest extends HiveTestUtils {
checkAnswer(carbonResult, hiveResult);
}
+ @Test
+ public void testArrayType() throws Exception {
+ String complexArrayPath = (resourceDirectoryPath + "array").replace("\\",
"/");
+ statement.execute("drop table if exists hive_table_complexArray");
+ statement.execute(String.format("CREATE external TABLE
hive_table_complexArray(arrayString ARRAY<STRING>,"
+ + " arrayShort ARRAY<SMALLINT>, arrayInt ARRAY<INT>, arrayLong
ARRAY<BIGINT>, arrayFloat ARRAY<FLOAT>,"
+ + " arrayDouble ARRAY<DOUBLE>, arrayDecimal ARRAY<DECIMAL(8,2)>,
arrayChar ARRAY<CHAR(5)>, "
+ + "arrayBoolean ARRAY<BOOLEAN>, arrayVarchar ARRAY<VARCHAR(50)>,
arrayByte ARRAY<TINYINT>, arrayDate ARRAY<DATE>)"
+ + " ROW FORMAT SERDE
'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe' WITH SERDEPROPERTIES
('field.delim'=',', 'collection.delim'='$', 'mapkey.delim'='@') location '%s'
TBLPROPERTIES('external.table.purge'='false')", complexArrayPath));
+
+ statement.execute("drop table if exists hive_carbon_table7");
+ statement.execute(
+ "CREATE TABLE hive_carbon_table7(arrayString ARRAY<STRING>,
arrayShort ARRAY<SMALLINT>, arrayInt ARRAY<INT>, "
+ + "arrayLong ARRAY<BIGINT>, arrayFloat ARRAY<FLOAT>, arrayDouble
ARRAY<DOUBLE>, "
+ + "arrayDecimal ARRAY<DECIMAL(8,2)>, arrayChar ARRAY<CHAR(5)>,
arrayBoolean ARRAY<BOOLEAN>, arrayVarchar ARRAY<VARCHAR(50)>, "
+ + "arrayByte ARRAY<TINYINT>, arrayDate ARRAY<DATE>) "
+ + "stored by 'org.apache.carbondata.hive.CarbonStorageHandler'
TBLPROPERTIES ('complex_delimiter'='$,@')");
+
+ statement.execute(
+ "insert into hive_carbon_table7 select * from
hive_table_complexArray");
+
+ ResultSet hiveResult = connection.createStatement().executeQuery("select *
from hive_table_complexArray");
+ ResultSet carbonResult = connection.createStatement().executeQuery("select
* from hive_carbon_table7");
+ checkAnswer(carbonResult, hiveResult);
+ }
+
+
+ @Test
+ public void arrayOfTimestamp() throws Exception {
+ statement.execute("drop table if exists hivee");
+ statement.execute("CREATE external TABLE hivee(arrayInt ARRAY<timestamp>)"
+ + " ROW FORMAT SERDE
'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe' WITH SERDEPROPERTIES
('field.delim'=',', 'collection.delim'='$', 'mapkey.delim'='@') location '%s'
TBLPROPERTIES('external.table.purge'='false')");
+ statement.execute("insert into table hivee values
(array(Timestamp('2000-03-12 15:00:00'),Timestamp('2001-04-15
15:58:00'),Timestamp('2002-05-27 15:20:00')))");
+ statement.execute("drop table if exists carbonn");
+ statement.execute(
+ "CREATE TABLE carbonn(timestampField array<timestamp>) "
+ + "stored by 'org.apache.carbondata.hive.CarbonStorageHandler'
TBLPROPERTIES ('complex_delimiter'='$,@', 'BAD_RECORDS_LOGGER_ENABLE' =
'TRUE')");
+ statement.execute("insert into carbonn select * from hivee");
+ ResultSet resultSet = connection.createStatement()
+ .executeQuery("select * from carbonn");
+ ResultSet hiveResults = connection.createStatement()
+ .executeQuery("select * from hivee");
+ checkAnswer(resultSet, hiveResults);
+ }
+
+ @Test
+ public void testMapType() throws Exception {
+ String complexMapPath = (resourceDirectoryPath + "map").replace("\\", "/");
+ statement.execute("drop table if exists hive_table_complexMap");
+ statement.execute(String.format("CREATE external TABLE
hive_table_complexMap(mapField1 MAP<STRING, STRING>, mapField2 MAP<INT,
STRING>, mapField3 MAP<DOUBLE, "
+ + "FLOAT>,mapField4 MAP<TINYINT, SMALLINT>, mapField5
MAP<DECIMAL(10,2), VARCHAR(50)>, mapField6 MAP<BIGINT,DATE>,mapField7
MAP<CHAR(5),INT>, "
+ + "mapField8 MAP<BIGINT,BOOLEAN>)"
+ + " ROW FORMAT SERDE
'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe' WITH SERDEPROPERTIES
('field.delim'=',', 'collection.delim'='$', 'mapkey.delim'='@') location '%s'
TBLPROPERTIES('external.table.purge'='false')", complexMapPath));
+
+ statement.execute("drop table if exists hive_carbon_table8");
+ statement.execute(
+ "CREATE TABLE hive_carbon_table8(mapField1 MAP<STRING, STRING>,
mapField2 MAP<INT, STRING>, mapField3 MAP<DOUBLE, FLOAT>,mapField4 MAP<TINYINT,
SMALLINT>, "
+ + "mapField5 MAP<DECIMAL(10,2), VARCHAR(50)>, mapField6
MAP<BIGINT, DATE>,mapField7 MAP<CHAR(5), INT>, mapField8 MAP<BIGINT,BOOLEAN>) "
+ + "stored by 'org.apache.carbondata.hive.CarbonStorageHandler'
TBLPROPERTIES ('complex_delimiter'='$,@')");
+ statement.execute(
+ "insert into hive_carbon_table8 select * from hive_table_complexMap");
+ ResultSet hiveResult = connection.createStatement().executeQuery("select *
from hive_table_complexMap");
+ ResultSet carbonResult = connection.createStatement().executeQuery("select
* from hive_carbon_table8");
+ checkAnswer(carbonResult, hiveResult);
+ }
+
+ @Test
+ public void testStructType() throws Exception {
+ String complexStructPath = (resourceDirectoryPath +
"struct").replace("\\", "/");
+ statement.execute("drop table if exists hive_table_complexSTRUCT");
+ statement.execute(String.format("CREATE external TABLE
hive_table_complexSTRUCT(structField STRUCT<stringfield: STRING, shortfield:
SMALLINT, intfield: INT, "
+ + "longfield: BIGINT, floatfield: FLOAT, doublefield: DOUBLE,
charfield: CHAR(4), boolfield: BOOLEAN, varcharfield: VARCHAR(50), bytefield:
TINYINT, "
+ + "datefield: DATE, decimalfield: DECIMAL(8,2)>) "
+ + "ROW FORMAT SERDE
'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe' WITH SERDEPROPERTIES "
+ + "('field.delim'=',', 'collection.delim'='$', 'mapkey.delim'='@')
location '%s' TBLPROPERTIES('external.table.purge'='false')",
complexStructPath));
+
+ statement.execute("drop table if exists hive_carbon_table9");
+ statement.execute(
+ "CREATE TABLE hive_carbon_table9(structField STRUCT<stringfield:
STRING, shortfield: SMALLINT, intfield: INT, longfield: BIGINT, floatfield:
FLOAT, "
+ + "doublefield: DOUBLE, charfield: CHAR(4), boolfield: BOOLEAN,
varcharfield: VARCHAR(50), bytefield: TINYINT, datefield: DATE, decimalfield:
DECIMAL(8,2)>) "
+ + "stored by 'org.apache.carbondata.hive.CarbonStorageHandler'
TBLPROPERTIES ('complex_delimiter'='$,@')");
+ statement.execute(
+ "insert into hive_carbon_table9 select * from
hive_table_complexSTRUCT");
+ ResultSet hiveResult = connection.createStatement().executeQuery("select *
from hive_table_complexSTRUCT");
+ ResultSet carbonResult = connection.createStatement().executeQuery("select
* from hive_carbon_table9");
+ checkAnswer(carbonResult, hiveResult);
+ }
+
@AfterClass
public static void tearDown() {
try {
diff --git
a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
index 44c19b7..44ec330 100644
---
a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
+++
b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveTestUtils.java
@@ -24,6 +24,7 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Arrays;
import org.apache.carbondata.hive.test.server.HiveEmbeddedServer2;
@@ -56,6 +57,13 @@ public abstract class HiveTestUtils {
}
}
+ public boolean checkMapKeyPairsAfterSorting(String a, String b) {
+ String[] actual = a.substring(1, a.length() - 1).split(",");
+ String[] expected = b.substring(1, a.length() - 1).split(",");
+ Arrays.sort(actual);Arrays.sort(expected);
+ return Arrays.equals(actual, expected);
+ }
+
public boolean checkAnswer(ResultSet actual, ResultSet expected) throws
SQLException {
Assert.assertEquals("Row Count Mismatch: ", expected.getFetchSize(),
actual.getFetchSize());
int rowCountExpected = 0;
@@ -70,8 +78,12 @@ public abstract class HiveTestUtils {
Assert.assertTrue(numOfColumnsExpected > 0);
Assert.assertEquals(actual.getMetaData().getColumnCount(),
numOfColumnsExpected);
for (int i = 1; i <= numOfColumnsExpected; i++) {
- expectedValuesList.add(expected.getString(i));
- actualValuesList.add(actual.getString(i));
+ if (actual.getString(i).contains(":")) {
+ Assert.assertTrue(checkMapKeyPairsAfterSorting(actual.getString(i),
expected.getString(i)));
+ } else {
+ expectedValuesList.add(expected.getString(i));
+ actualValuesList.add(actual.getString(i));
+ }
}
}
Collections.sort(expectedValuesList);Collections.sort(actualValuesList);
diff --git
a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java
b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java
index d2207d2..c5bc7a5 100644
---
a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java
+++
b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/MapParserImpl.java
@@ -73,9 +73,12 @@ public class MapParserImpl extends ArrayParserImpl {
@Override
public ArrayObject parseRaw(Object data) {
- Object keyArray = ((Object[]) data)[0];
- Object valueArray = ((Object[]) data)[1];
- return new ArrayObject(new Object[]{child.parseRaw(keyArray),
child.parseRaw(valueArray)});
+ Object[] keyValuePairs = ((Object[]) data);
+ Object[] objectArray = new Object[keyValuePairs.length];
+ for (int i = 0; i < ((Object[]) data).length; i++) {
+ objectArray[i] = child.parseRaw(keyValuePairs[i]);
+ }
+ return new ArrayObject(objectArray);
}
}