Author: travis
Date: Wed Jul 18 21:21:24 2012
New Revision: 1363122
URL: http://svn.apache.org/viewvc?rev=1363122&view=rev
Log:
HCATALOG-440 : pig field names for arrays should be configurable
Added:
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
Modified:
incubator/hcatalog/trunk/CHANGES.txt
incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
Modified: incubator/hcatalog/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Wed Jul 18 21:21:24 2012
@@ -26,6 +26,8 @@ Trunk (unreleased changes)
HCAT-328 HCatLoader should report its input size so pig can estimate the
number of reducers (traviscrawford via gates)
IMPROVEMENTS
+ HCAT-440 pig field names for arrays should be configurable (traviscrawford)
+
HCAT-434 Package HCatalog pig support as a separate jar (traviscrawford)
HCAT-341 InitializeInput improvements (traviscrawford)
Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml Wed Jul 18 21:21:24
2012
@@ -38,7 +38,11 @@
<dependency org="org.slf4j" name="slf4j-log4j12" rev="${slf4j.version}"/>
<!-- Test dependencies -->
+ <dependency org="org.apache.hive" name="hive-builtins"
+ rev="${hive.version}" conf="test->default"/>
<dependency org="org.apache.hive" name="hive-cli"
rev="${hive.version}" conf="test->default"/>
+ <dependency org="org.apache.commons" name="commons-compress"
+ rev="${commons-compress.version}" conf="test->default"/>
</dependencies>
</ivy-module>
Modified:
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
---
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
(original)
+++
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
Wed Jul 18 21:21:24 2012
@@ -194,14 +194,27 @@ public class PigHCatUtil {
return rfSchema;
}
- private static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws
IOException {
+ protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws
IOException {
// there are two cases - array<Type> and array<struct<...>>
// in either case the element type of the array is represented in a
// tuple field schema in the bag's field schema - the second case (struct)
// more naturally translates to the tuple - in the first case (array<Type>)
// we simulate the tuple by putting the single field in a tuple
+
+ Properties props = UDFContext.getUDFContext().getClientSystemProps();
+ String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
+ if (props != null &&
props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) {
+ innerTupleName =
props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)
+ .replaceAll("FIELDNAME", hfs.getName());
+ }
+ String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
+ if (props != null &&
props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) {
+ innerFieldName =
props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)
+ .replaceAll("FIELDNAME", hfs.getName());
+ }
+
ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
- bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
+ bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
.setDescription("The tuple in the bag")
.setType(DataType.TUPLE);
HCatFieldSchema arrayElementFieldSchema =
hfs.getArrayElementSchema().get(0);
@@ -214,7 +227,7 @@ public class PigHCatUtil {
bagSubFieldSchemas[0].setSchema(s);
} else {
ResourceFieldSchema[] innerTupleFieldSchemas = new
ResourceFieldSchema[1];
- innerTupleFieldSchemas[0] = new
ResourceFieldSchema().setName("innerfield")
+ innerTupleFieldSchemas[0] = new
ResourceFieldSchema().setName(innerFieldName)
.setDescription("The inner field in the tuple in the bag")
.setType(getPigType(arrayElementFieldSchema))
.setSchema(null); // the element type is not a tuple - so no subschema
Added:
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java?rev=1363122&view=auto
==============================================================================
---
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
(added)
+++
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
Wed Jul 18 21:21:24 2012
@@ -0,0 +1,72 @@
+package org.apache.hcatalog.pig;
+
+import com.google.common.collect.Lists;
+import junit.framework.Assert;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.util.UDFContext;
+import org.junit.Test;
+
+public class TestPigHCatUtil {
+
+ @Test
+ public void testGetBagSubSchema() throws Exception {
+
+ // Define the expected schema.
+ ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
+ bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
+ .setDescription("The tuple in the bag").setType(DataType.TUPLE);
+
+ ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
+ innerTupleFieldSchemas[0] =
+ new
ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);
+
+ bagSubFieldSchemas[0].setSchema(new
ResourceSchema().setFields(innerTupleFieldSchemas));
+ ResourceSchema expected = new
ResourceSchema().setFields(bagSubFieldSchemas);
+
+ // Get the actual converted schema.
+ HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(
+ new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
+ HCatFieldSchema hCatFieldSchema =
+ new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema,
null);
+ ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);
+
+ Assert.assertEquals(expected.toString(), actual.toString());
+ }
+
+ @Test
+ public void testGetBagSubSchemaConfigured() throws Exception {
+
+ // NOTE: pig-0.8 sets client system properties by actually getting the
client
+ // system properties. Starting in pig-0.9 you must pass the properties in.
+ // When updating our pig dependency this will need updated.
+ System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
+ System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME,
"FIELDNAME_tuple");
+ UDFContext.getUDFContext().setClientSystemProps();
+
+ // Define the expected schema.
+ ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
+ bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t")
+ .setDescription("The tuple in the bag").setType(DataType.TUPLE);
+
+ ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
+ innerTupleFieldSchemas[0] =
+ new
ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
+
+ bagSubFieldSchemas[0].setSchema(new
ResourceSchema().setFields(innerTupleFieldSchemas));
+ ResourceSchema expected = new
ResourceSchema().setFields(bagSubFieldSchemas);
+
+ // Get the actual converted schema.
+ HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(
+ new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
+ HCatFieldSchema actualHCatFieldSchema =
+ new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY,
actualHCatSchema, null);
+ ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
+
+ Assert.assertEquals(expected.toString(), actual.toString());
+ }
+}
Modified:
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
---
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
(original)
+++
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
Wed Jul 18 21:21:24 2012
@@ -38,6 +38,10 @@ public final class HCatConstants {
public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter";
public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ",";
public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER +
".location.set" ;
+ public static final String HCAT_PIG_INNER_TUPLE_NAME =
"hcat.pig.inner.tuple.name";
+ public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple";
+ public static final String HCAT_PIG_INNER_FIELD_NAME =
"hcat.pig.inner.field.name";
+ public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield";
//The keys used to store info into the job Configuration
public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat";