Repository: incubator-hawq Updated Branches: refs/heads/HAWQ-992 [created] 24f5e363d
HAWQ-992. PXF Hive data type check in Fragmenter too restrictive. Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/24f5e363 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/24f5e363 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/24f5e363 Branch: refs/heads/HAWQ-992 Commit: 24f5e363d48f13108cb86750a5a5f2e76844a2f3 Parents: c2280de Author: Oleksandr Diachenko <[email protected]> Authored: Wed Aug 24 16:54:55 2016 -0700 Committer: Oleksandr Diachenko <[email protected]> Committed: Wed Aug 24 16:54:55 2016 -0700 ---------------------------------------------------------------------- .../pxf/api/utilities/ColumnDescriptor.java | 15 ++++- .../hawq/pxf/api/utilities/EnumHawqType.java | 49 ++++++++------ .../plugins/hive/HiveColumnarSerdeResolver.java | 3 +- .../plugins/hive/HiveInputFormatFragmenter.java | 70 ++------------------ .../hive/utilities/EnumHiveToHawqType.java | 12 ++++ .../plugins/hive/utilities/HiveUtilities.java | 43 ++++++++++++ .../pxf/service/utilities/ProtocolData.java | 16 ++++- 7 files changed, 116 insertions(+), 92 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java index baaca1d..ff85672 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java @@ -30,6 +30,7 @@ public class ColumnDescriptor { String gpdbColumnName; String gpdbColumnTypeName; int gpdbColumnIndex; + String[] gpdbColumnTypeModifiers; /** * Reserved word for a table record key. @@ -44,12 +45,14 @@ public class ColumnDescriptor { * @param typecode OID * @param index column index * @param typename type name + * @param typemods type modifiers */ - public ColumnDescriptor(String name, int typecode, int index, String typename) { + public ColumnDescriptor(String name, int typecode, int index, String typename, String[] typemods) { gpdbColumnTypeCode = typecode; gpdbColumnTypeName = typename; gpdbColumnName = name; gpdbColumnIndex = index; + gpdbColumnTypeModifiers = typemods; } /** @@ -62,6 +65,9 @@ public class ColumnDescriptor { this.gpdbColumnName = copy.gpdbColumnName; this.gpdbColumnIndex = copy.gpdbColumnIndex; this.gpdbColumnTypeName = copy.gpdbColumnTypeName; + System.arraycopy(this.gpdbColumnTypeModifiers, 0, + copy.gpdbColumnTypeModifiers, 0, + this.gpdbColumnTypeModifiers.length); } public String columnName() { @@ -80,6 +86,10 @@ public class ColumnDescriptor { return gpdbColumnTypeName; } + public String[] columnTypeModifiers() { + return gpdbColumnTypeModifiers; + } + /** * Returns <tt>true</tt> if {@link #gpdbColumnName} is a {@link #RECORD_KEY_NAME}. * @@ -94,6 +104,7 @@ public class ColumnDescriptor { return "ColumnDescriptor [gpdbColumnTypeCode=" + gpdbColumnTypeCode + ", gpdbColumnName=" + gpdbColumnName + ", gpdbColumnTypeName=" + gpdbColumnTypeName - + ", gpdbColumnIndex=" + gpdbColumnIndex + "]"; + + ", gpdbColumnIndex=" + gpdbColumnIndex + + ", gpdbColumnTypeModifiers=" + gpdbColumnTypeModifiers + "]"; } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java index b5a94c6..01d40f0 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java @@ -20,6 +20,8 @@ package org.apache.hawq.pxf.api.utilities; import java.io.IOException; + +import org.apache.hawq.pxf.api.io.DataType; import org.codehaus.jackson.JsonGenerator; import org.codehaus.jackson.map.JsonSerializer; import org.codehaus.jackson.map.annotate.JsonSerialize; @@ -43,35 +45,32 @@ class EnumHawqTypeSerializer extends JsonSerializer<EnumHawqType> { */ @JsonSerialize(using = EnumHawqTypeSerializer.class) public enum EnumHawqType { - Int2Type("int2"), - Int4Type("int4"), - Int8Type("int8"), - Float4Type("float4"), - Float8Type("float8"), - TextType("text"), - VarcharType("varchar", (byte) 1, true), - ByteaType("bytea"), - DateType("date"), - TimestampType("timestamp"), - BoolType("bool"), - NumericType("numeric", (byte) 2, true), - BpcharType("bpchar", (byte) 1, true); + Int2Type("int2", DataType.SMALLINT), + Int4Type("int4", DataType.INTEGER), + Int8Type("int8", DataType.BIGINT), + Float4Type("float4", DataType.REAL), + Float8Type("float8", DataType.FLOAT8), + TextType("text", DataType.TEXT), + VarcharType("varchar", DataType.VARCHAR, (byte) 1, true), + ByteaType("bytea", DataType.BYTEA), + DateType("date", DataType.DATE), + TimestampType("timestamp", DataType.TIMESTAMP), + BoolType("bool", DataType.BOOLEAN), + NumericType("numeric", DataType.NUMERIC, (byte) 2, true), + BpcharType("bpchar", DataType.BPCHAR, (byte) 1, true); + private DataType dataType; private String typeName; private byte modifiersNum; private boolean validateIntegerModifiers; - EnumHawqType(String typeName) { + EnumHawqType(String typeName, DataType dataType) { this.typeName = typeName; + this.dataType = dataType; } - EnumHawqType(String typeName, byte modifiersNum) { - this(typeName); - this.modifiersNum = modifiersNum; - } - - EnumHawqType(String typeName, byte modifiersNum, boolean validateIntegerModifiers) { - this(typeName); + EnumHawqType(String typeName, DataType dataType, byte modifiersNum, boolean validateIntegerModifiers) { + this(typeName, dataType); this.modifiersNum = modifiersNum; this.validateIntegerModifiers = validateIntegerModifiers; } @@ -99,6 +98,14 @@ public enum EnumHawqType { public boolean getValidateIntegerModifiers() { return this.validateIntegerModifiers; } + + /** + * + * @return data type + */ + public DataType getDataType() { + return this.dataType; + } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java index d298bac..43e3b65 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java @@ -28,6 +28,7 @@ import org.apache.hawq.pxf.api.io.DataType; import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; import org.apache.hawq.pxf.api.utilities.InputData; import org.apache.hawq.pxf.api.utilities.Utilities; +import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -126,7 +127,7 @@ public class HiveColumnarSerdeResolver extends HiveResolver { for (int i = 0; i < numberOfDataColumns; i++) { ColumnDescriptor column = input.getColumn(i); String columnName = column.columnName(); - String columnType = HiveInputFormatFragmenter.toHiveType(DataType.get(column.columnTypeCode()), columnName); + String columnType = HiveUtilities.toHiveType(DataType.get(column.columnTypeCode())); columnNames.append(delim).append(columnName); columnTypes.append(delim).append(columnType); delim = ","; http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java index a666b8b..b944206 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java @@ -26,6 +26,8 @@ import org.apache.hawq.pxf.api.UserDataException; import org.apache.hawq.pxf.api.io.DataType; import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; import org.apache.hawq.pxf.api.utilities.InputData; +import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType; +import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -146,82 +148,18 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter { for (FieldSchema hiveCol : hiveColumns) { ColumnDescriptor colDesc = inputData.getColumn(index++); DataType colType = DataType.get(colDesc.columnTypeCode()); - compareTypes(colType, hiveCol.getType(), colDesc.columnName()); + HiveUtilities.compareTypes(colType, hiveCol.getType(), colDesc.columnName()); } // check partition fields List<FieldSchema> hivePartitions = tbl.getPartitionKeys(); for (FieldSchema hivePart : hivePartitions) { ColumnDescriptor colDesc = inputData.getColumn(index++); DataType colType = DataType.get(colDesc.columnTypeCode()); - compareTypes(colType, hivePart.getType(), colDesc.columnName()); + HiveUtilities.compareTypes(colType, hivePart.getType(), colDesc.columnName()); } } - private void compareTypes(DataType type, String hiveType, String fieldName) { - String convertedHive = toHiveType(type, fieldName); - if (!convertedHive.equals(hiveType) - && !(convertedHive.equals("smallint") && hiveType.equals("tinyint"))) { - throw new UnsupportedTypeException( - "Schema mismatch definition: Field " + fieldName - + " (Hive type " + hiveType + ", HAWQ type " - + type.toString() + ")"); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Field " + fieldName + ": Hive type " + hiveType - + ", HAWQ type " + type.toString()); - } - } - - /** - * Converts HAWQ type to hive type. The supported mappings are:<ul> - * <li>{@code BOOLEAN -> boolean}</li> - * <li>{@code SMALLINT -> smallint (tinyint is converted to smallint)}</li> - * <li>{@code BIGINT -> bigint}</li> - * <li>{@code TIMESTAMP, TIME -> timestamp}</li> - * <li>{@code NUMERIC -> decimal}</li> - * <li>{@code BYTEA -> binary}</li> - * <li>{@code INTERGER -> int}</li> - * <li>{@code TEXT -> string}</li> - * <li>{@code REAL -> float}</li> - * <li>{@code FLOAT8 -> double}</li> - * </ul> - * All other types (both in HAWQ and in HIVE) are not supported. - * - * @param type HAWQ data type - * @param name field name - * @return Hive type - * @throws UnsupportedTypeException if type is not supported - */ - public static String toHiveType(DataType type, String name) { - switch (type) { - case BOOLEAN: - case SMALLINT: - case BIGINT: - case TIMESTAMP: - return type.toString().toLowerCase(); - case NUMERIC: - return "decimal"; - case BYTEA: - return "binary"; - case INTEGER: - return "int"; - case TEXT: - return "string"; - case REAL: - return "float"; - case FLOAT8: - return "double"; - case TIME: - return "timestamp"; - default: - throw new UnsupportedTypeException( - type.toString() - + " conversion is not supported by HiveInputFormatFragmenter (Field " - + name + ")"); - } - } - /* * Validates that partition format corresponds to PXF supported formats and * transforms the class name to an enumeration for writing it to the http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java index a747bd5..1cedaa8 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java @@ -19,6 +19,7 @@ package org.apache.hawq.pxf.plugins.hive.utilities; +import org.apache.hawq.pxf.api.io.DataType; import org.apache.hawq.pxf.api.utilities.EnumHawqType; import org.apache.hawq.pxf.api.UnsupportedTypeException; @@ -110,4 +111,15 @@ public enum EnumHiveToHawqType { + hiveType + " to HAWQ's type"); } + public static EnumHiveToHawqType getHawqToHiveType(DataType dataType) { + + for (EnumHiveToHawqType t : values()) { + + if (t.getHawqType().getDataType().equals(dataType)) { + return t; + } + } + throw new UnsupportedTypeException("Unable to map HAWQ's type: " + + dataType + " to Hive's type"); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java index 096c0ff..579ab0b 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hawq.pxf.api.Metadata; import org.apache.hawq.pxf.api.UnsupportedTypeException; import org.apache.hawq.pxf.api.utilities.EnumHawqType; +import org.apache.hawq.pxf.api.io.DataType; import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType; /** @@ -256,4 +257,46 @@ public class HiveUtilities { throw new RuntimeException("Failed connecting to Hive MetaStore service: " + cause.getMessage(), cause); } } + + + /** + * Converts HAWQ type to hive type. The supported mappings are:<ul> + * <li>{@code BOOLEAN -> boolean}</li> + * <li>{@code SMALLINT -> smallint (tinyint is converted to smallint)}</li> + * <li>{@code BIGINT -> bigint}</li> + * <li>{@code TIMESTAMP, TIME -> timestamp}</li> + * <li>{@code NUMERIC -> decimal}</li> + * <li>{@code BYTEA -> binary}</li> + * <li>{@code INTERGER -> int}</li> + * <li>{@code TEXT -> string}</li> + * <li>{@code REAL -> float}</li> + * <li>{@code FLOAT8 -> double}</li> + * </ul> + * All other types (both in HAWQ and in HIVE) are not supported. + * + * @param type HAWQ data type + * @param name field name + * @return Hive type + * @throws UnsupportedTypeException if type is not supported + */ + public static String toHiveType(DataType type) { + + EnumHiveToHawqType hiveToHawqType = EnumHiveToHawqType.getHawqToHiveType(type); + return hiveToHawqType.getTypeName(); + } + + public static void compareTypes(DataType type, String hiveType, String columnName) { + String convertedHive = toHiveType(type); + if (!convertedHive.equals(hiveType) + && !(convertedHive.equals("smallint") && hiveType.equals("tinyint"))) { + throw new UnsupportedTypeException( + "Schema mismatch definition:" + + " (Hive type " + hiveType + ", HAWQ type " + + type.toString() + ")"); + } + if (LOG.isDebugEnabled()) { + LOG.debug(" Hive type " + hiveType + + ", HAWQ type " + type.toString()); + } + } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java index 0337937..5e6f6c4 100644 --- a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java +++ b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java @@ -29,7 +29,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.security.UserGroupInformation; - import org.apache.hawq.pxf.api.OutputFormat; import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; import org.apache.hawq.pxf.api.utilities.InputData; @@ -390,9 +389,10 @@ public class ProtocolData extends InputData { String columnName = getProperty("ATTR-NAME" + i); int columnTypeCode = getIntProperty("ATTR-TYPECODE" + i); String columnTypeName = getProperty("ATTR-TYPENAME" + i); + String[] columnTypeMods = parseTypeMods(i); ColumnDescriptor column = new ColumnDescriptor(columnName, - columnTypeCode, i, columnTypeName); + columnTypeCode, i, columnTypeName, columnTypeMods); tupleDescription.add(column); if (columnName.equalsIgnoreCase(ColumnDescriptor.RECORD_KEY_NAME)) { @@ -401,6 +401,18 @@ public class ProtocolData extends InputData { } } + private String[] parseTypeMods(int columnIndex) { + Integer typeModeCount = Integer.parseInt(getOptionalProperty("ATTR-TYPEMOD" + columnIndex + "COUNT")); + String[] result = null; + if (typeModeCount > 0) { + result = new String[typeModeCount]; + for (int i = 0; i < typeModeCount; i++) { + result[i] = getProperty("ATTR-TYPEMOD" + columnIndex + "-" + i); + } + } + return result; + } + /** * Sets the index of the allocated data fragment *
