HAWQ-1228. Support custom delimiter, flat serialization of complex types in HiveText profile.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/8ba2e161 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/8ba2e161 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/8ba2e161 Branch: refs/heads/HAWQ-1228 Commit: 8ba2e16180064b92d7b1d8a987a63a0ba6aff704 Parents: 8fe2703 Author: Oleksandr Diachenko <[email protected]> Authored: Thu Jan 19 17:20:20 2017 -0800 Committer: Oleksandr Diachenko <[email protected]> Committed: Thu Jan 19 17:20:20 2017 -0800 ---------------------------------------------------------------------- .../java/org/apache/hawq/pxf/api/Metadata.java | 14 ++++++-- .../hawq/pxf/api/utilities/InputData.java | 2 ++ .../plugins/hive/HiveColumnarSerdeResolver.java | 1 - .../pxf/plugins/hive/HiveMetadataFetcher.java | 20 +++++++++++- .../hawq/pxf/plugins/hive/HiveResolver.java | 6 ++-- .../plugins/hive/HiveStringPassResolver.java | 18 ++++++++--- .../hawq/pxf/plugins/hive/HiveUserData.java | 34 +++++++++++++++++--- .../plugins/hive/utilities/HiveUtilities.java | 9 ++++-- .../pxf/plugins/hive/HiveORCAccessorTest.java | 2 +- .../apache/hawq/pxf/service/ProfileFactory.java | 1 + src/backend/catalog/external/externalmd.c | 26 +++++++++++---- src/include/catalog/external/itemmd.h | 2 ++ 12 files changed, 106 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/Metadata.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/Metadata.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/Metadata.java index 7e3b92e..a920e4f 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/Metadata.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/Metadata.java @@ -22,6 +22,7 @@ package org.apache.hawq.pxf.api; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.hawq.pxf.api.utilities.EnumHawqType; @@ -124,18 +125,25 @@ public class Metadata { * Item's fields */ private List<Metadata.Field> fields; - - private Set<OutputFormat> outputFormats; + private Map<String, String> outputParameters; public Set<OutputFormat> getOutputFormats() { return outputFormats; } - public void setFormats(Set<OutputFormat> outputFormats) { + public void setOutputFormats(Set<OutputFormat> outputFormats) { this.outputFormats = outputFormats; } + public Map<String, String> getOutputParameters() { + return outputParameters; + } + + public void setOutputParameters(Map<String, String> outputParameters) { + this.outputParameters = outputParameters; + } + /** * Constructs an item's Metadata. * http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/InputData.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/InputData.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/InputData.java index 5afedca..0ecb9eb 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/InputData.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/InputData.java @@ -31,6 +31,8 @@ import java.util.*; */ public class InputData { + public static final String DELIMITER_KEY = "DELIMITER"; + public static final int INVALID_SPLIT_IDX = -1; private static final Log LOG = LogFactory.getLog(InputData.class); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java index 8fd37e0..157f723 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java @@ -59,7 +59,6 @@ import static org.apache.hawq.pxf.api.io.DataType.VARCHAR; */ public class HiveColumnarSerdeResolver extends HiveResolver { private static final Log LOG = LogFactory.getLog(HiveColumnarSerdeResolver.class); - //private ColumnarSerDeBase deserializer; private boolean firstColumn; private StringBuilder builder; private StringBuilder parts; http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveMetadataFetcher.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveMetadataFetcher.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveMetadataFetcher.java index 76e8e18..90943fc 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveMetadataFetcher.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveMetadataFetcher.java @@ -21,8 +21,11 @@ package org.apache.hawq.pxf.plugins.hive; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; @@ -32,6 +35,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.InputFormat; import org.apache.hawq.pxf.api.Metadata; @@ -48,6 +52,11 @@ import org.apache.hawq.pxf.service.ProfileFactory; */ public class HiveMetadataFetcher extends MetadataFetcher { + private static final String DELIM_COLLECTION = "DELIM.COLLECTION"; + private static final String DELIM_MAPKEY = "DELIM.MAPKEY"; + private static final String DELIM_LINE = "DELIM.LINE"; + private static final String DELIM_FIELD = InputData.DELIMITER_KEY; + private static final Log LOG = LogFactory.getLog(HiveMetadataFetcher.class); private HiveMetaStoreClient client; private JobConf jobConf; @@ -108,7 +117,16 @@ public class HiveMetadataFetcher extends MetadataFetcher { OutputFormat outputFormat = getOutputFormat(inputFormat); formats.add(outputFormat); } - metadata.setFormats(formats); + metadata.setOutputFormats(formats); + if (tbl != null && tbl.getSd() != null && tbl.getSd().getSerdeInfo() != null) { + Map<String, String> outputParameters = new HashMap<String, String>(); + Map<String, String> serdeParameters = tbl.getSd().getSerdeInfo().getParameters(); + //outputParameters.put(DELIM_COLLECTION, String.valueOf((int) serdeParameters.get(serdeConstants.COLLECTION_DELIM).charAt(0))); + //outputParameters.put(DELIM_MAPKEY, String.valueOf((int) serdeParameters.get(serdeConstants.MAPKEY_DELIM).charAt(0))); + //outputParameters.put(DELIM_LINE, String.valueOf((int) serdeParameters.get(serdeConstants.LINE_DELIM).charAt(0))); + outputParameters.put(DELIM_FIELD, String.valueOf((int) serdeParameters.get(serdeConstants.FIELD_DELIM).charAt(0))); + metadata.setOutputParameters(outputParameters); + } } catch (UnsupportedTypeException | UnsupportedOperationException e) { if(ignoreErrors) { LOG.warn("Metadata fetch for " + tblDesc.toString() + " failed. " + e.getMessage()); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java index b0dc2fb..16b08d7 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java @@ -134,8 +134,6 @@ public class HiveResolver extends Plugin implements ReadResolver { /* Parses user data string (arrived from fragmenter). */ void parseUserData(InputData input) throws Exception { - final int EXPECTED_NUM_OF_TOKS = 5; - HiveUserData hiveUserData = HiveUtilities.parseHiveUserData(input); serdeClassName = hiveUserData.getSerdeClassName(); @@ -620,10 +618,10 @@ public class HiveResolver extends Plugin implements ReadResolver { */ void parseDelimiterChar(InputData input) { - String userDelim = input.getUserProperty("DELIMITER"); + String userDelim = input.getUserProperty(InputData.DELIMITER_KEY); if (userDelim == null) { - throw new IllegalArgumentException("DELIMITER is a required option"); + throw new IllegalArgumentException(InputData.DELIMITER_KEY + " is a required option"); } final int VALID_LENGTH = 1; http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveStringPassResolver.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveStringPassResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveStringPassResolver.java index 8c91d47..65bce98 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveStringPassResolver.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveStringPassResolver.java @@ -23,6 +23,7 @@ package org.apache.hawq.pxf.plugins.hive; import org.apache.hawq.pxf.api.OneField; import org.apache.hawq.pxf.api.OneRow; import org.apache.hawq.pxf.api.OutputFormat; +import org.apache.hawq.pxf.api.UserDataException; import org.apache.hawq.pxf.api.utilities.InputData; import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities; import org.apache.hawq.pxf.service.utilities.ProtocolData; @@ -50,6 +51,8 @@ public class HiveStringPassResolver extends HiveResolver { parts = new StringBuilder(); partitionKeys = hiveUserData.getPartitionKeys(); serdeClassName = hiveUserData.getSerdeClassName(); + collectionDelim = input.getUserProperty("COLLECTION_DELIM") == null ? COLLECTION_DELIM : input.getUserProperty("COLLECTION_DELIM"); + mapkeyDelim = input.getUserProperty("MAPKEY_DELIM") == null ? MAPKEY_DELIM : input.getUserProperty("MAPKEY_DELIM"); /* Needed only for BINARY format*/ if (((ProtocolData) inputData).outputFormat() == OutputFormat.BINARY) { @@ -84,15 +87,22 @@ public class HiveStringPassResolver extends HiveResolver { public List<OneField> getFields(OneRow onerow) throws Exception { if (((ProtocolData) inputData).outputFormat() == OutputFormat.TEXT) { String line = (onerow.getData()).toString(); - + String replacedLine = replaceComplexSpecCharacters(line); /* We follow Hive convention. Partition fields are always added at the end of the record */ - return Collections.singletonList(new OneField(VARCHAR.getOID(), line + parts)); + return Collections.singletonList(new OneField(VARCHAR.getOID(), replacedLine + parts)); } else { return super.getFields(onerow); } } - void parseDelimiterChar(InputData input) { - delimiter = 44; //, + private String replaceComplexSpecCharacters(String line) throws UserDataException { + HiveUserData hiveUserData = HiveUtilities.parseHiveUserData(inputData); + char collectionDelimChar = (char)Integer.valueOf(hiveUserData.getCollectionDelim()).intValue(); + char mapKeyDelimChar = (char)Integer.valueOf(hiveUserData.getMapKeyDelim()).intValue(); + String replacedLine = line; + replacedLine = line.replace(Character.toString(collectionDelimChar), collectionDelim); + replacedLine = replacedLine.replace(Character.toString(mapKeyDelimChar), mapkeyDelim); + return replacedLine; } + } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveUserData.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveUserData.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveUserData.java index 710700a..07159ca 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveUserData.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveUserData.java @@ -25,12 +25,17 @@ public class HiveUserData { public HiveUserData(String inputFormatName, String serdeClassName, String propertiesString, String partitionKeys, - boolean filterInFragmenter) { + boolean filterInFragmenter, + String collectionDelim, + String mapKeyDelim) { + this.inputFormatName = inputFormatName; this.serdeClassName = serdeClassName; this.propertiesString = propertiesString; this.partitionKeys = partitionKeys; this.filterInFragmenter = filterInFragmenter; + this.collectionDelim = collectionDelim; + this.mapKeyDelim = mapKeyDelim; } public String getInputFormatName() { @@ -53,19 +58,38 @@ public class HiveUserData { return filterInFragmenter; } + public String getCollectionDelim() { + return collectionDelim; + } + + public void setCollectionDelim(String collectionDelim) { + this.collectionDelim = collectionDelim; + } + + public String getMapKeyDelim() { + return mapKeyDelim; + } + + public void setMapKeyDelim(String mapKeyDelim) { + this.mapKeyDelim = mapKeyDelim; + } + private String inputFormatName; private String serdeClassName; private String propertiesString; private String partitionKeys; private boolean filterInFragmenter; + private String collectionDelim; + private String mapKeyDelim; @Override public String toString() { - return inputFormatName + HiveUserData.HIVE_UD_DELIM + return inputFormatName + HiveUserData.HIVE_UD_DELIM + serdeClassName + HiveUserData.HIVE_UD_DELIM + propertiesString + HiveUserData.HIVE_UD_DELIM - + partitionKeys + HiveUserData.HIVE_UD_DELIM - + filterInFragmenter; + + partitionKeys + HiveUserData.HIVE_UD_DELIM + + filterInFragmenter + HiveUserData.HIVE_UD_DELIM + + collectionDelim + HiveUserData.HIVE_UD_DELIM + + mapKeyDelim; } - } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java index b78c379..f8d12ab 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hawq.pxf.api.Fragmenter; import org.apache.hawq.pxf.api.Metadata; import org.apache.hawq.pxf.api.UnsupportedTypeException; @@ -99,7 +100,7 @@ public class HiveUtilities { static final String STR_RC_FILE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; static final String STR_TEXT_FILE_INPUT_FORMAT = "org.apache.hadoop.mapred.TextInputFormat"; static final String STR_ORC_FILE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"; - private static final int EXPECTED_NUM_OF_TOKS = 5; + private static final int EXPECTED_NUM_OF_TOKS = 7; /** * Initializes the HiveMetaStoreClient @@ -451,12 +452,14 @@ public class HiveUtilities { String serdeClassName = partData.storageDesc.getSerdeInfo().getSerializationLib(); String propertiesString = serializeProperties(partData.properties); String partitionKeys = serializePartitionKeys(partData); + String collectionDelim = String.valueOf((int) partData.storageDesc.getSerdeInfo().getParameters().get(serdeConstants.COLLECTION_DELIM).charAt(0)); + String mapKeyDelim = String.valueOf((int) partData.storageDesc.getSerdeInfo().getParameters().get(serdeConstants.MAPKEY_DELIM).charAt(0)); if (HiveInputFormatFragmenter.class.isAssignableFrom(fragmenterClass)) { assertFileType(inputFormatName, partData); } - hiveUserData = new HiveUserData(inputFormatName, serdeClassName, propertiesString, partitionKeys, filterInFragmenter); + hiveUserData = new HiveUserData(inputFormatName, serdeClassName, propertiesString, partitionKeys, filterInFragmenter, collectionDelim, mapKeyDelim); return hiveUserData.toString().getBytes(); } @@ -470,7 +473,7 @@ public class HiveUtilities { + EXPECTED_NUM_OF_TOKS + " tokens, but got " + toks.length); } - HiveUserData hiveUserData = new HiveUserData(toks[0], toks[1], toks[2], toks[3], Boolean.valueOf(toks[4])); + HiveUserData hiveUserData = new HiveUserData(toks[0], toks[1], toks[2], toks[3], Boolean.valueOf(toks[4]), toks[5], toks[6]); if (supportedSerdes.length > 0) { /* Make sure this serde is supported */ http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java index 1d90d01..30233a4 100644 --- a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java @@ -64,7 +64,7 @@ public class HiveORCAccessorTest { PowerMockito.whenNew(JobConf.class).withAnyArguments().thenReturn(jobConf); PowerMockito.mockStatic(HiveUtilities.class); - PowerMockito.when(HiveUtilities.parseHiveUserData(any(InputData.class), any(PXF_HIVE_SERDES[].class))).thenReturn(new HiveUserData("", "", null, HiveDataFragmenter.HIVE_NO_PART_TBL, true)); + PowerMockito.when(HiveUtilities.parseHiveUserData(any(InputData.class), any(PXF_HIVE_SERDES[].class))).thenReturn(new HiveUserData("", "", null, HiveDataFragmenter.HIVE_NO_PART_TBL, true, "2", "3")); PowerMockito.mockStatic(HdfsUtilities.class); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/ProfileFactory.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/ProfileFactory.java b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/ProfileFactory.java index d053760..092f89e 100644 --- a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/ProfileFactory.java +++ b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/ProfileFactory.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hawq.pxf.api.Metadata; public class ProfileFactory { http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/src/backend/catalog/external/externalmd.c ---------------------------------------------------------------------- diff --git a/src/backend/catalog/external/externalmd.c b/src/backend/catalog/external/externalmd.c index c6cfb69..4150f42 100644 --- a/src/backend/catalog/external/externalmd.c +++ b/src/backend/catalog/external/externalmd.c @@ -58,7 +58,7 @@ static void LoadExtTable(Oid relid, PxfItem *pxfItem); static void LoadColumns(Oid relid, List *columns); static int ComputeTypeMod(Oid typeOid, const char *colname, int *typemod, int nTypeMod); static Datum GetFormatTypeForProfile(const List *outputFormats); -static Datum GetFormatOptionsForProfile(const List *outputFormats); +static Datum GetFormatOptionsForProfile(const List *outputFormats, int delimiter); const int maxNumTypeModifiers = 2; /* @@ -126,7 +126,6 @@ static PxfItem *ParsePxfItem(struct json_object *pxfMD, char* profile) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("Could not parse PXF item, expected not null value for attribute \"name\""))); - pxfItem->profile = profile; pxfItem->path = pstrdup(json_object_get_string(itemPath)); pxfItem->name = pstrdup(json_object_get_string(itemName)); @@ -146,6 +145,14 @@ static PxfItem *ParsePxfItem(struct json_object *pxfMD, char* profile) } } + /* parse delimiter */ + struct json_object *jsonOutputParameters = json_object_object_get(pxfMD, "outputParameters"); + if (NULL != jsonOutputParameters) + { + struct json_object *outputParameterDelimiter = json_object_object_get(jsonOutputParameters, "DELIMITER"); + pxfItem->delimiter = atoi(pstrdup(json_object_get_string(outputParameterDelimiter))); + } + elog(DEBUG1, "Parsed item %s, namespace %s", itemName, itemPath); /* parse columns */ @@ -466,8 +473,8 @@ static void LoadExtTable(Oid relid, PxfItem *pxfItem) * pxf://<ip:port/namaservice>/<hive db>.<hive table>?Profile=Hive */ StringInfoData locationStr; initStringInfo(&locationStr); - appendStringInfo(&locationStr, "pxf://%s/%s.%s?Profile=%s", - pxf_service_address, pxfItem->path, pxfItem->name, pxfItem->profile); + appendStringInfo(&locationStr, "pxf://%s/%s.%s?Profile=%s&delimiter=%cx%02x", + pxf_service_address, pxfItem->path, pxfItem->name, pxfItem->profile, '\\', pxfItem->delimiter); Size len = VARHDRSZ + locationStr.len; /* +1 leaves room for sprintf's trailing null */ text *t = (text *) palloc(len + 1); @@ -484,7 +491,7 @@ static void LoadExtTable(Oid relid, PxfItem *pxfItem) values[Anum_pg_exttable_reloid - 1] = ObjectIdGetDatum(relid); values[Anum_pg_exttable_location - 1] = location; values[Anum_pg_exttable_fmttype - 1] = GetFormatTypeForProfile(pxfItem->outputFormats); - values[Anum_pg_exttable_fmtopts - 1] = GetFormatOptionsForProfile(pxfItem->outputFormats); + values[Anum_pg_exttable_fmtopts - 1] = GetFormatOptionsForProfile(pxfItem->outputFormats, pxfItem->delimiter); nulls[Anum_pg_exttable_command - 1] = true; nulls[Anum_pg_exttable_rejectlimit - 1] = true; nulls[Anum_pg_exttable_rejectlimittype - 1] = true; @@ -653,13 +660,18 @@ static Datum GetFormatTypeForProfile(const List *outputFormats) } } -static Datum GetFormatOptionsForProfile(const List *outputFormats) +static Datum GetFormatOptionsForProfile(const List *outputFormats, int delimiter) { StringInfoData formatStr; initStringInfo(&formatStr); + /* "delimiter 'delimiter' null '\\N' escape '\\'"*/ + char formatArr[35] = { 0x64, 0x65, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x65, + 0x72, 0x20, 0x27, delimiter, 0x27, 0x20, 0x6e, 0x75, 0x6c, 0x6c, + 0x20, 0x27, 0x5c, 0x4e, 0x27, 0x20, 0x65, 0x73, 0x63, 0x61, 0x70, + 0x65, 0x20, 0x27, 0x5c, 0x27, 0x00 }; if (list_length(outputFormats) == 1 && strcmp(lfirst(list_head(outputFormats)),"TEXT") == 0) { - appendStringInfo(&formatStr, "delimiter '\x2C' null '\\N' escape '\\'"); + appendStringInfo(&formatStr, "%s", formatArr); } else { appendStringInfo(&formatStr, "formatter 'pxfwritable_import'"); } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8ba2e161/src/include/catalog/external/itemmd.h ---------------------------------------------------------------------- diff --git a/src/include/catalog/external/itemmd.h b/src/include/catalog/external/itemmd.h index 5717a53..d9f8721 100644 --- a/src/include/catalog/external/itemmd.h +++ b/src/include/catalog/external/itemmd.h @@ -70,6 +70,8 @@ typedef struct PxfItem /* output formats*/ List *outputFormats; + + int delimiter; } PxfItem;
