Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt?rev=706704&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt (added) +++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt Tue Oct 21 11:11:05 2008 @@ -0,0 +1,866 @@ +options { + MULTI=true; + STATIC = false; + NODE_PREFIX = "DynamicSerDe"; +} + + +PARSER_BEGIN(thrift_grammar) + +package org.apache.hadoop.hive.serde2.dynamic_type; + +import java.util.*; +import java.io.*; +import java.net.*; +import com.facebook.thrift.protocol.*; +import com.facebook.thrift.transport.*; +import org.apache.hadoop.hive.serde2.dynamic_type.*; + +public class thrift_grammar { + + private List<String> include_path = null; + + // for computing the autogenerated field ids in thrift + private int field_val; + + // store types and tables + // separately because one cannot use a table (ie service.method) as a Struct like type. + protected Map<String,DynamicSerDeSimpleNode> types; + protected Map<String,DynamicSerDeSimpleNode> tables; + + // system include path + final private static String default_include_path[] = { "/usr/local/include","/usr/include","/usr/local/include/thrift/if","/usr/local/include/fb303/if" }; + + // need three params to differentiate between this and 2 param method auto generated since + // some calls in the autogenerated code use null param for 2nd param and thus ambiguous. + protected thrift_grammar(InputStream is, List<String> include_path, boolean junk) { + this(is,null); + this.types = new HashMap<String,DynamicSerDeSimpleNode> () ; + this.tables = new HashMap<String,DynamicSerDeSimpleNode> () ; + this.include_path = include_path; + this.field_val = -1; + } + + // find the file on the include path + private static File findFile(String fname, List<String> include_path) { + for(String path: include_path) { + final String full = path + "/" + fname; + File f = new File(full); + if(f.exists()) { + return f; + } + } + return null; + } + + public static void main(String args[]) { + String filename = null; + List<String> include_path = new ArrayList<String>(); + + for(String path: default_include_path) { + include_path.add(path); + } + for(int i = 0; i < args.length; i++) { + String arg = args[i]; + if(arg.equals("--include") && i + 1 < args.length) { + include_path.add(args[++i]); + } + if(arg.equals("--file") && i + 1 < args.length) { + filename = args[++i]; + } + } + + InputStream is = System.in; + if(filename != null) { + try { + is = new FileInputStream(findFile(filename, include_path)); + } catch(IOException e) { + } + } + thrift_grammar t = new thrift_grammar(is,include_path,false); + + try { + t.Start(); + } catch (Exception e) { + System.out.println("Parse error."); + System.out.println(e.getMessage()); + e.printStackTrace(); + } + } +} + +PARSER_END(thrift_grammar) + + + +SKIP : +{ + " " +| "\t" +| "\n" +| "\r" +| <"#"(~["\n"])* ("\n"|"\r"|"\r\n")> +| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")> +| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/"> +} + + +/** + * HELPER DEFINITIONS, COMMENTS, CONSTANTS, AND WHATNOT + */ + +TOKEN: +{ +<tok_const: "const">| +<tok_namespace : "namespace"> | +<tok_cpp_namespace: "cpp_namespace">| +<tok_cpp_include : "cpp_include">| +<tok_cpp_type: "cpp_type">| +<tok_java_package : "java_package">| +<tok_cocoa_prefix: "cocoa_prefix">| +<tok_csharp_namespace: "csharp_namespace">| +<tok_php_namespace: "php_namespace">| +<tok_py_module: "py_module">| +<tok_perl_package: "perl_package">| +<tok_ruby_namespace: "ruby_namespace">| +<tok_smalltalk_category: "smalltalk_category">| +<tok_smalltalk_prefix: "smalltalk_prefix">| +<tok_xsd_all: "xsd_all">| +<tok_xsd_optional: "xsd_optional">| +<tok_xsd_nillable: "xsd_nillable">| +<tok_xsd_namespace: "xsd_namespace">| +<tok_xsd_attrs: "xsd_attrs">| +<tok_include : "include">| +<tok_void : "void">| +<tok_bool : "bool">| +<tok_byte: "byte">| +<tok_i16: "i16">| +<tok_i32: "i32">| +<tok_i64: "i64">| +<tok_double: "double">| +<tok_string: "string">| +<tok_slist : "slist">| +<tok_senum: "senum">| +<tok_map: "map"> | +<tok_list: "list"> | +<tok_set: "set"> | +<tok_async: "async"> | +<tok_typedef: "typedef"> | +<tok_struct: "struct"> | +<tok_exception: "exception"> | +<tok_extends: "extends"> | +<tok_throws: "throws"> | +<tok_service: "service"> | +<tok_enum: "enum"> | +<tok_required: "required"> | +<tok_optional: "optional"> +} + +TOKEN: { + +<tok_int_constant : (["+","-"])?(["0"-"9"])+> +| +<tok_double_constant: ["+","-"](<DIGIT>)*"."(<DIGIT>)+(["e","E"](["+","-"])?(<DIGIT>)+)?> +| +<IDENTIFIER: <LETTER>(<LETTER>|<DIGIT>|"."|"_")*> +| +<#LETTER: (["a"-"z", "A"-"Z" ]) > +| +<#DIGIT: ["0"-"9"] > +| +<tok_literal: "\""(~["\""])*"\""|"'"(~["'"])*"'"> +| +<tok_st_identifier: ["a"-"z","A"-"Z","-"]([".","a"-"z","A"-"Z","_","0"-"9","-"])*> +} + + +SimpleNode Start() : {} +{ + HeaderList() (Definition())+ + { + return jjtThis; + } +} + +SimpleNode HeaderList() : {} +{ + (Header())* + { + return jjtThis; + } + +} + +SimpleNode Header() : {} +{ + Include() + { + return jjtThis; + } +| Namespace() + { + return jjtThis; + } +} + +SimpleNode Namespace() : {} +{ + <tok_namespace> <IDENTIFIER> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_cpp_namespace> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_cpp_include> <tok_literal> +{ + return jjtThis; +} +| +<tok_php_namespace> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_py_module> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_perl_package> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_ruby_namespace> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_smalltalk_category> <tok_st_identifier> +{ + return jjtThis; +} +| +<tok_smalltalk_prefix> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_java_package> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_cocoa_prefix> <IDENTIFIER> +{ + return jjtThis; +} +| +<tok_xsd_namespace> <tok_literal> +{ + return jjtThis; +} +| +<tok_csharp_namespace> <IDENTIFIER> +{ + return jjtThis; +} +} + + +SimpleNode Include() : { + String fname; + boolean found = false; +} +{ + <tok_include> + fname=<tok_literal>.image +{ + // bugbug somewhat fragile below substring expression + fname = fname.substring(1,fname.length() - 1); + + // try to find the file on the include path + File f = thrift_grammar.findFile(fname, this.include_path); + if(f != null) { + found = true; + try { + FileInputStream fis = new FileInputStream(f); + thrift_grammar t = new thrift_grammar(fis,this.include_path, false); + t.Start(); + fis.close(); + found = true; + // add in what we found to our type and table tables. + this.tables.putAll(t.tables); + this.types.putAll(t.types); + } catch (Exception e) { + System.out.println("File: " + fname + " - Oops."); + System.out.println(e.getMessage()); + e.printStackTrace(); + } + } + if(!found) { + throw new RuntimeException("include file not found: " + fname); + } + return jjtThis; +} +} + + +SimpleNode Definition() : {} +{ + Const() + { + return jjtThis; + } +| Service() + { + return jjtThis; + } +| TypeDefinition() + { + return jjtThis; + } +} + +SimpleNode TypeDefinition() : {} +{ + Typedef() + { + return jjtThis; + } +| Enum() + { + return jjtThis; + } +| Senum() + { + return jjtThis; + } +| Struct() + { + return jjtThis; + } +| Xception() + { + return jjtThis; + } + +} + +DynamicSerDeTypedef Typedef() : {} +{ + <tok_typedef> + DefinitionType() + jjtThis.name = <IDENTIFIER>.image + { + // store the type for later retrieval + this.types.put(jjtThis.name, jjtThis); + return jjtThis; + } +} + + +// returning void because we ignore this production. +void CommaOrSemicolon() : {} +{ + "," +| + ";" +{ +} +} + +SimpleNode Enum() : {} +{ + <tok_enum> <IDENTIFIER> "{" EnumDefList() "}" + { + return jjtThis; + } +} + +SimpleNode EnumDefList() : {} +{ + (EnumDef())+ + { + return jjtThis; + } +} + +SimpleNode EnumDef() : {} +{ + <IDENTIFIER> ["=" <tok_int_constant>] [CommaOrSemicolon()] + { + return jjtThis; + } +} + +SimpleNode Senum() : {} +{ + <tok_senum> <IDENTIFIER> "{" SenumDefList() "}" + { + return jjtThis; + } +} + +SimpleNode SenumDefList() : {} +{ + (SenumDef())+ + { + return jjtThis; + } +} + +SimpleNode SenumDef() : {} +{ + <tok_literal> [CommaOrSemicolon()] + { + return jjtThis; + } +} + + +SimpleNode Const() : {} +{ + <tok_const> FieldType() <IDENTIFIER> "=" ConstValue() [CommaOrSemicolon()] + { + return jjtThis; + } +} + +SimpleNode ConstValue() : {} +{ + <tok_int_constant> + { + } +| <tok_double_constant> + { + } +| <tok_literal> + { + } +| <IDENTIFIER> + { + } +| ConstList() + { + } +| ConstMap() + { + return jjtThis; + } +} + +SimpleNode ConstList() : {} +{ + "[" ConstListContents() "]" + { + return jjtThis; + } +} + +SimpleNode ConstListContents() : {} +{ + (ConstValue() [CommaOrSemicolon()])+ + { + return jjtThis; + } +} + +SimpleNode ConstMap() : {} +{ + "{" ConstMapContents() "}" + { + return jjtThis; + } +} + +SimpleNode ConstMapContents() : {} +{ + (ConstValue() ":" ConstValue() [CommaOrSemicolon()])+ + { + } +| + { + return jjtThis; + } +} + +DynamicSerDeStruct Struct() : { + +} +{ + <tok_struct> + jjtThis.name = <IDENTIFIER>.image + "{" + FieldList() + "}" + { + this.types.put(jjtThis.name,jjtThis); + return jjtThis; + } +} + + +SimpleNode Xception() : {} +{ + <tok_exception> <IDENTIFIER> "{" FieldList() "}" + { + return jjtThis; + } +} + + +SimpleNode Service() : {} +{ + <tok_service> + <IDENTIFIER> + Extends() + "{" + FlagArgs() + (Function())+ + UnflagArgs() + "}" + { + // at some point, these should be inserted as a "db" + return jjtThis; + } +} + +SimpleNode FlagArgs() : {} +{ + { + return jjtThis; + } +} + +SimpleNode UnflagArgs() : {} +{ + { + return jjtThis; + } +} + +SimpleNode Extends() : {} +{ + <tok_extends> <IDENTIFIER> + { + return jjtThis; + } +| + { + return jjtThis; + } +} + + +DynamicSerDeFunction Function() : {} +{ + // metastore ignores async and type + Async() + FunctionType() + + // the name of the function/table + jjtThis.name = <IDENTIFIER>.image + "(" + FieldList() + ")" + Throws() + [CommaOrSemicolon()] + + { + this.tables.put(jjtThis.name, jjtThis); + return jjtThis; + } +} + +void Async() : {} +{ + <tok_async> +| +{} +} + +void Throws() : {} +{ + <tok_throws> "(" FieldList() ")" +| +{} +} + + +// nothing special - just use the DynamicSerDeFieldList's children methods to access the fields +DynamicSerDeFieldList FieldList() : { + this.field_val = -1; +} +{ + (Field())* { + return jjtThis; + } +} + + +DynamicSerDeField Field() : { + + String fidnum = ""; + String fid; +} +{ + + // parse the field id which is optional + [fidnum=<tok_int_constant>.image ":"] + + // is this field required or optional? default is optional + FieldRequiredness() + + // field type - obviously not optional + FieldType() + + // the name of the field - not optional + jjtThis.name = <IDENTIFIER>.image + + // does it have = some value? + FieldValue() + + // take it or leave it + [CommaOrSemicolon()] + + { + if(fidnum.length() > 0) { + int fidInt = Integer.valueOf(fidnum); + jjtThis.fieldid = fidInt; + } else { + jjtThis.fieldid = this.field_val--; + } + return jjtThis; + } +} + + + +SimpleNode FieldRequiredness() : {} +{ + <tok_required> + { + return jjtThis; + } +| <tok_optional> + { + return jjtThis; + } +| + { + return jjtThis; + } +} + +SimpleNode FieldValue() : {} +{ + "=" + ConstValue() + { + return jjtThis; + } +| +{ + return jjtThis; +} +} + +SimpleNode DefinitionType() : {} +{ +// BaseType() xxx + TypeString() + { + return jjtThis; + } +| TypeBool() + { + return jjtThis; + } +| Typei16() + { + return jjtThis; + } +| Typei32() + { + return jjtThis; + } +| Typei64() + { + return jjtThis; + } +| TypeDouble() + { + return jjtThis; + } +| TypeMap() + { + return jjtThis; + } +| TypeSet() + { + return jjtThis; + } +| TypeList() + { + return jjtThis; + } +} + +void FunctionType() : {} +{ + FieldType() +| <tok_void> +{} +} + +DynamicSerDeFieldType FieldType() : { +} + +{ + TypeString() + { + return jjtThis; + } +| TypeBool() + { + return jjtThis; + } +| Typei16() + { + return jjtThis; + } +| Typei32() + { + return jjtThis; + } +| Typei64() + { + return jjtThis; + } +| TypeDouble() + { + return jjtThis; + } +| + TypeMap() + { + return jjtThis; + } +| + TypeSet() + { + return jjtThis; + } +| + TypeList() + { + return jjtThis; + } +| + jjtThis.name = <IDENTIFIER>.image + { + return jjtThis; + } +} + +DynamicSerDeTypeString TypeString() : {} +{ + <tok_string> + { + return jjtThis; + } +} + +DynamicSerDeTypeByte TypeByte() : { +} +{ + <tok_byte> + { + return jjtThis; + } +} + +DynamicSerDeTypei16 Typei16() : { +} +{ + <tok_i16> + { + return jjtThis; + } +} + +DynamicSerDeTypei32 Typei32() : {} +{ + <tok_i32> + { + return jjtThis; + } +} + +DynamicSerDeTypei64 Typei64() : {} +{ + <tok_i64> + { + return jjtThis; + } +} + +DynamicSerDeTypeDouble TypeDouble() : {} +{ + <tok_double> + { + return jjtThis; + } +} + +DynamicSerDeTypeBool TypeBool() : {} +{ + <tok_bool> + { + return jjtThis; + } +} + +DynamicSerDeTypeMap TypeMap() : {} +{ + <tok_map> + "<" + FieldType() + "," + FieldType() + ">" + { + return jjtThis; + } +} + +DynamicSerDeTypeSet TypeSet() : {} +{ + <tok_set> + "<" + + FieldType() + + ">" + { + return jjtThis; + } +} + +DynamicSerDeTypeList TypeList() : {} +{ + <tok_list> + "<" + + FieldType() + + ">" + { + return jjtThis; + } +}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java?rev=706704&r1=706703&r2=706704&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java (original) +++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java Tue Oct 21 11:11:05 2008 @@ -116,11 +116,16 @@ if (data == null) { return null; } + if (!(fieldRef instanceof MyField)) { + throw new RuntimeException("fieldRef has to be of MyField"); + } + MyField f = (MyField) fieldRef; try { - MyField f = (MyField) fieldRef; - return f.field.get(data); + Object r = f.field.get(data); + return r; } catch (Exception e) { - throw new RuntimeException(e); + throw new RuntimeException("cannot get field " + f.field + " from " + + data.getClass() + " " + data); } } public List<Object> getStructFieldsDataAsList(Object data) { Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java?rev=706704&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java (added) +++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java Tue Oct 21 11:11:05 2008 @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.thrift; + +import org.apache.hadoop.conf.Configuration; +import java.util.Properties; +import com.facebook.thrift.TException; + +/** + * An interface for TProtocols that need to have properties passed in to + * initialize them. e.g., separators for TCTLSeparatedProtocol. + * If there was a regex like deserializer, the regex could be passed in + * in this manner. + */ +public interface ConfigurableTProtocol { + /** + * Initialize the TProtocol + * @param conf System properties + * @param tbl table properties + * @throws TException + */ + public void initialize(Configuration conf, Properties tbl) throws TException; + +} Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=706704&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (added) +++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Tue Oct 21 11:11:05 2008 @@ -0,0 +1,576 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.hive.serde2.thrift; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde.Constants; +import com.facebook.thrift.TException; +import com.facebook.thrift.transport.*; +import com.facebook.thrift.*; +import com.facebook.thrift.protocol.*; +import java.util.*; +import java.util.regex.Pattern; +import java.io.*; +import org.apache.hadoop.conf.Configuration; +import java.util.Properties; + +/** + * + * An implementation of the Thrift Protocol for ctl separated + * records. + * This is not thrift compliant in that it doesn't write out field ids + * so things cannot actually be versioned. + */ +public class TCTLSeparatedProtocol extends TProtocol implements ConfigurableTProtocol { + + final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName()); + + /** + * Factory for JSON protocol objects + */ + public static class Factory implements TProtocolFactory { + + public TProtocol getProtocol(TTransport trans) { + return new TCTLSeparatedProtocol(trans); + } + + } + + /** + * These are defaults, but for now leaving them like this + */ + final static protected byte defaultPrimarySeparatorByte = 1; + final static protected byte defaultSecondarySeparatorByte = 2; + final static protected byte defaultRowSeparatorByte = (byte)'\n'; + final static protected byte defaultMapSeparatorByte = 3; + + /** + * The separators for this instance + */ + protected byte primarySeparatorByte; + protected byte secondarySeparatorByte; + protected byte rowSeparatorByte; + protected byte mapSeparatorByte; + protected Pattern primaryPattern; + protected Pattern secondaryPattern; + protected Pattern mapPattern; + + /** + * Inspect the separators this instance is configured with. + */ + public byte getPrimarySeparator() { return primarySeparatorByte; } + public byte getSecondarySeparator() { return secondarySeparatorByte; } + public byte getRowSeparator() { return rowSeparatorByte; } + public byte getMapSeparator() { return mapSeparatorByte; } + + + /** + * The transport stream is tokenized on the row separator + */ + protected SimpleTransportTokenizer transportTokenizer; + + /** + * For a single row, the split on the primary separator + */ + protected String columns[]; + + /** + * An index into what column we're on + */ + + protected int index; + + /** + * For a single column, a split on the secondary separator + */ + protected String fields[]; + + /** + * An index into what field within a column we're on + */ + protected int innerIndex; + + + /** + * Is this the first field we're writing + */ + protected boolean firstField; + + /** + * Is this the first list/map/set field we're writing for the current element + */ + protected boolean firstInnerField; + + + /** + * Are we writing a map and need to worry about k/v separator? + */ + protected boolean isMap; + + + /** + * For writes, on what element are we on so we know when to use normal list separator or + * for a map know when to use the k/v separator + */ + protected long elemIndex; + + + /** + * Are we currently on the top-level columns or parsing a column itself + */ + protected boolean inner; + + + /** + * For places where the separators are back to back, should we return a null or an empty string since it is ambiguous. + * This also applies to extra columns that are read but aren't in the current record. + */ + protected boolean returnNulls; + + /** + * The transport being wrapped. + * + */ + final protected TTransport innerTransport; + + + /** + * Strings used to lookup the various configurable paramaters of this protocol. + */ + public final static String ReturnNullsKey = "separators.return_nulls"; + public final static String BufferSizeKey = "separators.buffer_size"; + + /** + * The size of the internal buffer to use. + */ + protected int bufferSize; + + /** + * A convenience class for tokenizing a TTransport + */ + + class SimpleTransportTokenizer { + + TTransport trans; + StringTokenizer tokenizer; + final String separator; + byte buf[]; + + public SimpleTransportTokenizer(TTransport trans, byte separator, int buffer_length) { + this.trans = trans; + byte [] separators = new byte[1]; + separators[0] = separator; + this.separator = new String(separators); + buf = new byte[buffer_length]; + fillTokenizer(); + } + + private boolean fillTokenizer() { + try { + int length = trans.read(buf, 0, buf.length); + if(length <=0 ) { + tokenizer = new StringTokenizer("", separator, true); + return false; + } + String row = new String(buf, 0, length); + tokenizer = new StringTokenizer(row, new String(separator), true); + } catch(TTransportException e) { + e.printStackTrace(); + tokenizer = null; + return false; + } + return true; + } + + public String nextToken() throws EOFException { + StringBuffer ret = null; + boolean done = false; + + while(! done) { + + if(! tokenizer.hasMoreTokens()) { + if(! fillTokenizer()) { + break; + } + } + + try { + final String nextToken = tokenizer.nextToken(); + + if(nextToken.equals(separator)) { + done = true; + } else if(ret == null) { + ret = new StringBuffer(nextToken); + } else { + ret.append(nextToken); + } + } catch(NoSuchElementException e) { + if (ret == null) { + throw new EOFException(e.getMessage()); + } + done = true; + } + } // while ! done + return ret == null ? null : ret.toString(); + } + }; + + + /** + * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and to return empty strings for empty fields. + * + * @param trans - the ttransport to use as input or output + * + */ + + public TCTLSeparatedProtocol(TTransport trans) { + this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, 4096); + } + + public TCTLSeparatedProtocol(TTransport trans, int buffer_size) { + this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, buffer_size); + } + + /** + * @param trans - the ttransport to use as input or output + * @param primarySeparatorByte the separator between columns (aka fields) + * @param secondarySeparatorByte the separator within a field for things like sets and maps and lists + * @param mapSeparatorByte - the key/value separator + * @param rowSeparatorByte - the record separator + * @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back) + */ + + public TCTLSeparatedProtocol(TTransport trans, byte primarySeparatorByte, byte secondarySeparatorByte, byte mapSeparatorByte, byte rowSeparatorByte, + boolean returnNulls, + int bufferSize) { + super(trans); + + returnNulls = returnNulls; + + + this.primarySeparatorByte = primarySeparatorByte; + this.secondarySeparatorByte = secondarySeparatorByte; + this.rowSeparatorByte = rowSeparatorByte; + this.mapSeparatorByte = mapSeparatorByte; + + this.innerTransport = trans; + this.bufferSize = bufferSize; + + internalInitialize(); + } + + + /** + * Sets the internal separator patterns and creates the internal tokenizer. + */ + protected void internalInitialize() { + byte []primarySeparator = new byte[1]; + byte []secondarySeparator = new byte[1]; + primarySeparator[0] = primarySeparatorByte; + secondarySeparator[0] = secondarySeparatorByte; + + primaryPattern = Pattern.compile(new String(primarySeparator)); + secondaryPattern = Pattern.compile(new String(secondarySeparator)); + mapPattern = Pattern.compile("\\0" + secondarySeparatorByte + "|\\0" + mapSeparatorByte); + + transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparatorByte, bufferSize); + } + + /** + * Initialize the TProtocol + * @param conf System properties + * @param tbl table properties + * @throws TException + */ + public void initialize(Configuration conf, Properties tbl) throws TException { + primarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.FIELD_DELIM, String.valueOf(primarySeparatorByte))).byteValue(); + LOG.debug("collections delim=<" + tbl.getProperty(Constants.COLLECTION_DELIM) + ">" ); + secondarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.COLLECTION_DELIM, String.valueOf(secondarySeparatorByte))).byteValue(); + rowSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.LINE_DELIM, String.valueOf(rowSeparatorByte))).byteValue(); + mapSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.MAPKEY_DELIM, String.valueOf(mapSeparatorByte))).byteValue(); + returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue(); + bufferSize = Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue(); + + internalInitialize(); + + } + + public void writeMessageBegin(TMessage message) throws TException { + } + + public void writeMessageEnd() throws TException { + } + + public void writeStructBegin(TStruct struct) throws TException { + firstField = true; + } + + public void writeStructEnd() throws TException { + // We don't write rowSeparatorByte because that should be handled by file format. + } + + public void writeFieldBegin(TField field) throws TException { + if(! firstField) { + writeByte(primarySeparatorByte); + } + firstField = false; + } + + public void writeFieldEnd() throws TException { + } + + public void writeFieldStop() { + } + + public void writeMapBegin(TMap map) throws TException { + // nesting not allowed! + if(map.keyType == TType.STRUCT || + map.keyType == TType.MAP || + map.keyType == TType.LIST || + map.keyType == TType.SET) { + throw new TException("Not implemented: nested structures"); + } + // nesting not allowed! + if(map.valueType == TType.STRUCT || + map.valueType == TType.MAP || + map.valueType == TType.LIST || + map.valueType == TType.SET) { + throw new TException("Not implemented: nested structures"); + } + + firstInnerField = true; + isMap = true; + inner = true; + elemIndex = 0; + } + + public void writeMapEnd() throws TException { + isMap = false; + inner = false; + } + + public void writeListBegin(TList list) throws TException { + if(list.elemType == TType.STRUCT || + list.elemType == TType.MAP || + list.elemType == TType.LIST || + list.elemType == TType.SET) { + throw new TException("Not implemented: nested structures"); + } + firstInnerField = true; + inner = true; + } + + public void writeListEnd() throws TException { + inner = false; + } + + public void writeSetBegin(TSet set) throws TException { + if(set.elemType == TType.STRUCT || + set.elemType == TType.MAP || + set.elemType == TType.LIST || + set.elemType == TType.SET) { + throw new TException("Not implemented: nested structures"); + } + firstInnerField = true; + inner = true; + } + + public void writeSetEnd() throws TException { + inner = false; + } + + public void writeBool(boolean b) throws TException { + writeString(String.valueOf(b)); + } + + // for writing out single byte + private byte buf[] = new byte[1]; + public void writeByte(byte b) throws TException { + buf[0] = b; + trans_.write(buf); + } + + public void writeI16(short i16) throws TException { + writeString(String.valueOf(i16)); + } + + public void writeI32(int i32) throws TException { + writeString(String.valueOf(i32)); + } + + public void writeI64(long i64) throws TException { + writeString(String.valueOf(i64)); + } + + public void writeDouble(double dub) throws TException { + writeString(String.valueOf(dub)); + } + + public void writeString(String str) throws TException { + if(inner) { + if(!firstInnerField) { + // super hack city notice the mod plus only happens after firstfield hit, so == 0 is right. + if(isMap && elemIndex++ % 2 == 0) { + writeByte(mapSeparatorByte); + } else { + writeByte(secondarySeparatorByte); + } + } else { + firstInnerField = false; + } + } + final byte buf[] = str.getBytes(); + trans_.write(buf, 0, buf.length); + } + + public void writeBinary(byte[] bin) throws TException { + throw new TException("Ctl separated protocol cannot support writing Binary data!"); + } + + public TMessage readMessageBegin() throws TException { + return new TMessage(); + } + + public void readMessageEnd() throws TException { + } + + public TStruct readStructBegin() throws TException { + assert(!inner); + try { + final String tmp = transportTokenizer.nextToken(); + columns = primaryPattern.split(tmp); + index = 0; + return new TStruct(); + } catch(EOFException e) { + return null; + } + } + + public void readStructEnd() throws TException { + columns = null; + } + + public TField readFieldBegin() throws TException { + assert( !inner); + TField f = new TField(); + // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered. + f.type = -1; + return f; + } + + public void readFieldEnd() throws TException { + fields = null; + } + + public TMap readMapBegin() throws TException { + assert( !inner); + TMap map = new TMap(); + fields = mapPattern.split(columns[index++]); + if(fields != null) { + map.size = fields.length/2; + } else { + map.size = 0; + } + innerIndex = 0; + inner = true; + isMap = true; + return map; + } + + public void readMapEnd() throws TException { + inner = false; + isMap = false; + } + + public TList readListBegin() throws TException { + assert( !inner); + TList list = new TList(); + fields = secondaryPattern.split(columns[index++]); + if(fields != null) { + list.size = fields.length ; + } else { + list.size = 0; + } + innerIndex = 0; + inner = true; + return list; + } + + public void readListEnd() throws TException { + inner = false; + } + + public TSet readSetBegin() throws TException { + assert( !inner); + TSet set = new TSet(); + fields = secondaryPattern.split(columns[index++]); + if(fields != null) { + set.size = fields.length ; + } else { + set.size = 0; + } + inner = true; + innerIndex = 0; + return set; + } + + public void readSetEnd() throws TException { + inner = false; + } + public boolean readBool() throws TException { + return Boolean.valueOf(readString()).booleanValue(); + } + + public byte readByte() throws TException { + return Byte.valueOf(readString()).byteValue(); + } + + public short readI16() throws TException { + return Short.valueOf(readString()).shortValue(); + } + + public int readI32() throws TException { + return Integer.valueOf(readString()).intValue(); + } + + public long readI64() throws TException { + return Long.valueOf(readString()).longValue(); + } + + public double readDouble() throws TException { + return Double.valueOf(readString()).doubleValue(); + } + + protected String [] curMapPair; + public String readString() throws TException { + String ret; + if(!inner) { + ret = columns != null && index < columns.length ? columns[index++] : null; + } else { + ret = fields != null && innerIndex < fields.length ? fields[innerIndex++] : null; + } + return ret == null && ! returnNulls ? "" : ret; + } + + public byte[] readBinary() throws TException { + throw new TException("Not implemented for control separated data"); + } +} Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java?rev=706704&r1=706703&r2=706704&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java (original) +++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java Tue Oct 21 11:11:05 2008 @@ -31,7 +31,7 @@ public void testLookupSerDe() throws Exception { try { - String name = ThriftSerDe.shortName(); + String name = ThriftSerDe.class.getName(); SerDe s = SerDeUtils.lookupSerDe(name); assertTrue(s.getClass().getName().equals(org.apache.hadoop.hive.serde.thrift.ThriftSerDe.class.getName())); } catch(Exception e) { Added: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java?rev=706704&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java (added) +++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java Tue Oct 21 11:11:05 2008 @@ -0,0 +1,245 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import junit.framework.TestCase; +import java.io.*; +import org.apache.hadoop.hive.serde2.*; +import org.apache.hadoop.hive.serde2.thrift.*; +import java.util.*; +import com.facebook.thrift.TException; +import com.facebook.thrift.transport.*; +import com.facebook.thrift.*; +import com.facebook.thrift.protocol.*; + +public class TestTCTLSeparatedProtocol extends TestCase { + + public TestTCTLSeparatedProtocol() throws Exception { + } + + public void testReads() throws Exception { + try { + TMemoryBuffer trans = new TMemoryBuffer(1024); + String foo = "Hello"; + String bar = "World!"; + + String key = "22"; + String value = "TheValue"; + String key2 = "24"; + String value2 = "TheValueAgain"; + + byte columnSeparator [] = { 1 }; + byte elementSeparator [] = { 2 }; + byte kvSeparator [] = { 3 }; + + + trans.write(foo.getBytes(), 0, foo.getBytes().length); + trans.write(columnSeparator, 0, 1); + + trans.write(columnSeparator, 0, 1); + + trans.write(bar.getBytes(), 0, bar.getBytes().length); + trans.write(columnSeparator, 0, 1); + + trans.write(key.getBytes(), 0, key.getBytes().length); + trans.write(kvSeparator, 0, 1); + trans.write(value.getBytes(), 0, value.getBytes().length); + trans.write(elementSeparator, 0, 1); + + trans.write(key2.getBytes(), 0, key2.getBytes().length); + trans.write(kvSeparator, 0, 1); + trans.write(value2.getBytes(), 0, value2.getBytes().length); + + + trans.flush(); + + + // use 3 as the row buffer size to force lots of re-buffering. + TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3); + + prot.readStructBegin(); + + prot.readFieldBegin(); + String hello = prot.readString(); + prot.readFieldEnd(); + + assertTrue(hello.equals(foo)); + + prot.readFieldBegin(); + assertTrue(prot.readString().equals("")); + prot.readFieldEnd(); + + prot.readFieldBegin(); + assertTrue(prot.readString().equals(bar)); + prot.readFieldEnd(); + + prot.readFieldBegin(); + TMap mapHeader = prot.readMapBegin(); + assertTrue(mapHeader.size == 2); + + assertTrue(prot.readI32() == 22); + assertTrue(prot.readString().equals(value)); + assertTrue(prot.readI32() == 24); + assertTrue(prot.readString().equals(value2)); + prot.readMapEnd(); + prot.readFieldEnd(); + + prot.readFieldBegin(); + hello = prot.readString(); + prot.readFieldEnd(); + assertTrue(hello.length() == 0); + + prot.readStructEnd(); + + } catch(Exception e) { + e.printStackTrace(); + } + } + + + public void testWrites() throws Exception { + try { + TMemoryBuffer trans = new TMemoryBuffer(1024); + TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3); + + prot.writeStructBegin(new TStruct()); + prot.writeFieldBegin(new TField()); + prot.writeI32(100); + prot.writeFieldEnd(); + + prot.writeFieldBegin(new TField()); + prot.writeListBegin(new TList()); + prot.writeDouble(348.55); + prot.writeDouble(234.22); + prot.writeListEnd(); + prot.writeFieldEnd(); + + prot.writeFieldBegin(new TField()); + prot.writeString("hello world!"); + prot.writeFieldEnd(); + + prot.writeFieldBegin(new TField()); + prot.writeMapBegin(new TMap()); + prot.writeString("key1"); + prot.writeString("val1"); + prot.writeString("key2"); + prot.writeString("val2"); + prot.writeString("key3"); + prot.writeString("val3"); + prot.writeMapEnd(); + prot.writeFieldEnd(); + + prot.writeFieldBegin(new TField()); + prot.writeListBegin(new TList()); + prot.writeString("elem1"); + prot.writeString("elem2"); + prot.writeListEnd(); + prot.writeFieldEnd(); + + + prot.writeFieldBegin(new TField()); + prot.writeString("bye!"); + prot.writeFieldEnd(); + + prot.writeStructEnd(); + trans.flush(); + byte b[] = new byte[3*1024]; + int len = trans.read(b,0,b.length); + String test = new String(b, 0, len); + + String testRef = "100348.55234.22hello world!key1val1key2val2key3val3elem1elem2bye!"; + // System.err.println("test=" + test + ">"); + // System.err.println(" ref=" + testRef + ">"); + assertTrue(test.equals(testRef)); + + trans = new TMemoryBuffer(1023); + trans.write(b, 0, len); + + // + // read back! + // + + prot = new TCTLSeparatedProtocol(trans, 10); + + // 100 is the start + prot.readStructBegin(); + prot.readFieldBegin(); + assertTrue(prot.readI32() == 100); + prot.readFieldEnd(); + + // let's see if doubles work ok + prot.readFieldBegin(); + TList l = prot.readListBegin(); + assertTrue(l.size == 2); + assertTrue(prot.readDouble() == 348.55); + assertTrue(prot.readDouble() == 234.22); + prot.readListEnd(); + prot.readFieldEnd(); + + // nice message + prot.readFieldBegin(); + assertTrue(prot.readString().equals("hello world!")); + prot.readFieldEnd(); + + // 3 element map + prot.readFieldBegin(); + TMap m = prot.readMapBegin(); + assertTrue(m.size == 3); + assertTrue(prot.readString().equals("key1")); + assertTrue(prot.readString().equals("val1")); + assertTrue(prot.readString().equals("key2")); + assertTrue(prot.readString().equals("val2")); + assertTrue(prot.readString().equals("key3")); + assertTrue(prot.readString().equals("val3")); + prot.readMapEnd(); + prot.readFieldEnd(); + + // the 2 element list + prot.readFieldBegin(); + l = prot.readListBegin(); + assertTrue(l.size == 2); + assertTrue(prot.readString().equals("elem1")); + assertTrue(prot.readString().equals("elem2")); + prot.readListEnd(); + prot.readFieldEnd(); + + // final string + prot.readFieldBegin(); + assertTrue(prot.readString().equals("bye!")); + prot.readFieldEnd(); + + // shouldl return nulls at end + prot.readFieldBegin(); + assertTrue(prot.readString().equals("")); + prot.readFieldEnd(); + + // shouldl return nulls at end + prot.readFieldBegin(); + assertTrue(prot.readString().equals("")); + prot.readFieldEnd(); + + prot.readStructEnd(); + + + } catch(Exception e) { + e.printStackTrace(); + } + } + +} Added: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java?rev=706704&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java (added) +++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java Tue Oct 21 11:11:05 2008 @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.dynamic_type; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol; +import org.apache.hadoop.hive.serde.Constants; + +import junit.framework.TestCase; +import org.apache.hadoop.io.BytesWritable; + +public class TestDynamicSerDe extends TestCase { + + public void testDynamicSerDe() throws Throwable { + try { + + // Try to construct an object + ArrayList<String> bye = new ArrayList<String>(); + bye.add("firstString"); + bye.add("secondString"); + HashMap<String, Integer> another = new HashMap<String, Integer>(); + another.put("firstKey", 1); + another.put("secondKey", 2); + ArrayList<Object> struct = new ArrayList<Object>(); + struct.add(Integer.valueOf(234)); + struct.add(bye); + struct.add(another); + + // All protocols + ArrayList<String> protocols = new ArrayList<String>(); + ArrayList<Boolean> isBinaries = new ArrayList<Boolean>(); + + protocols.add(com.facebook.thrift.protocol.TBinaryProtocol.class.getName()); + isBinaries.add(true); + + protocols.add(com.facebook.thrift.protocol.TJSONProtocol.class.getName()); + isBinaries.add(false); + + // TSimpleJSONProtocol does not support deserialization. + // protocols.add(com.facebook.thrift.protocol.TSimpleJSONProtocol.class.getName()); + // isBinaries.add(false); + + // TCTLSeparatedProtocol is not done yet. + protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName()); + isBinaries.add(false); + + System.out.println("input struct = " + struct); + + for(int pp = 0; pp<protocols.size(); pp++) { + + String protocol = protocols.get(pp); + boolean isBinary = isBinaries.get(pp); + + System.out.println("Testing protocol: " + protocol); + Properties schema = new Properties(); + schema.setProperty(Constants.SERIALIZATION_FORMAT, protocol); + schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test"); + schema.setProperty(Constants.SERIALIZATION_DDL, + "struct test { i32 hello, list<string> bye, map<string,i32> another}"); + schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString()); + + DynamicSerDe serde = new DynamicSerDe(); + serde.initialize(new Configuration(), schema); + + // Try getObjectInspector + ObjectInspector oi = serde.getObjectInspector(); + System.out.println("TypeName = " + oi.getTypeName()); + + + // Try to serialize + BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi); + + StringBuilder sb = new StringBuilder(); + for (int i=0; i<bytes.getSize(); i++) { + byte b = bytes.get()[i]; + int v = (b<0 ? 256 + b : b); + sb.append(String.format("x%02x", v)); + } + System.out.println("bytes =" + sb); + + if (!isBinary) { + System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize())); + } + + // Try to deserialize + Object o = serde.deserialize(bytes); + System.out.println("o class = " + o.getClass()); + List<?> olist = (List<?>)o; + System.out.println("o size = " + olist.size()); + System.out.println("o[0] class = " + olist.get(0).getClass()); + System.out.println("o[1] class = " + olist.get(1).getClass()); + System.out.println("o[2] class = " + olist.get(2).getClass()); + System.out.println("o = " + o); + + assertEquals(o, struct); + } + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + + } + + + + + public void testConfigurableTCTLSeparated() throws Throwable { + try { + + + // Try to construct an object + ArrayList<String> bye = new ArrayList<String>(); + bye.add("firstString"); + bye.add("secondString"); + HashMap<String, Integer> another = new HashMap<String, Integer>(); + another.put("firstKey", 1); + another.put("secondKey", 2); + ArrayList<Object> struct = new ArrayList<Object>(); + struct.add(Integer.valueOf(234)); + struct.add(bye); + struct.add(another); + + Properties schema = new Properties(); + schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName()); + schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test"); + schema.setProperty(Constants.SERIALIZATION_DDL, + "struct test { i32 hello, list<string> bye, map<string,i32> another}"); + schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString()); + + schema.setProperty(Constants.FIELD_DELIM, "9"); + schema.setProperty(Constants.COLLECTION_DELIM, "1"); + schema.setProperty(Constants.LINE_DELIM, "2"); + schema.setProperty(Constants.MAPKEY_DELIM, "4"); + + DynamicSerDe serde = new DynamicSerDe(); + serde.initialize(new Configuration(), schema); + + TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_; + assertTrue(prot.getPrimarySeparator() == 9); + + ObjectInspector oi = serde.getObjectInspector(); + + // Try to serialize + BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi); + + StringBuilder sb = new StringBuilder(); + for (int i=0; i<bytes.getSize(); i++) { + byte b = bytes.get()[i]; + int v = (b<0 ? 256 + b : b); + sb.append(String.format("x%02x", v)); + } + System.out.println("bytes =" + sb); + + String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2"; + + System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">"); + System.out.println("compare to =" + compare + ">"); + + assertTrue(compare.equals( new String(bytes.get(), 0, bytes.getSize()))); + + // Try to deserialize + Object o = serde.deserialize(bytes); + System.out.println("o class = " + o.getClass()); + List<?> olist = (List<?>)o; + System.out.println("o size = " + olist.size()); + System.out.println("o[0] class = " + olist.get(0).getClass()); + System.out.println("o[1] class = " + olist.get(1).getClass()); + System.out.println("o[2] class = " + olist.get(2).getClass()); + System.out.println("o = " + o); + + assertEquals(o, struct); + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + + } +} Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java?rev=706704&r1=706703&r2=706704&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java (original) +++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java Tue Oct 21 11:11:05 2008 @@ -22,8 +22,8 @@ import java.util.List; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.thrift_test.Complex; -import org.apache.hadoop.hive.serde2.thrift_test.IntString; +import org.apache.hadoop.hive.serde2.thrift.test.Complex; +import org.apache.hadoop.hive.serde2.thrift.test.IntString; import junit.framework.TestCase; @@ -50,14 +50,14 @@ // real object Complex cc = new Complex(); cc.aint = 1; - cc.astring = "test"; + cc.aString = "test"; List<Integer> c2 = Arrays.asList(new Integer[]{1,2,3}); cc.lint = c2; List<String> c3 = Arrays.asList(new String[]{"one", "two"}); - cc.lstring = c3; + cc.lString = c3; List<IntString> c4 = new ArrayList<IntString>(); - cc.lintstring = c4; - cc.mstringstring = null; + cc.lintString = c4; + cc.mStringString = null; // standard object Object c = ObjectInspectorUtils.getStandardObject(cc, oi1); Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java?rev=706704&r1=706703&r2=706704&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java (original) +++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java Tue Oct 21 11:11:05 2008 @@ -22,8 +22,8 @@ import java.util.List; import junit.framework.TestCase; -import org.apache.hadoop.hive.serde2.thrift_test.Complex; -import org.apache.hadoop.hive.serde2.thrift_test.IntString; +import org.apache.hadoop.hive.serde2.thrift.test.Complex; +import org.apache.hadoop.hive.serde2.thrift.test.IntString; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -53,14 +53,14 @@ // real object Complex c = new Complex(); c.aint = 1; - c.astring = "test"; + c.aString = "test"; List<Integer> c2 = Arrays.asList(new Integer[]{1,2,3}); c.lint = c2; List<String> c3 = Arrays.asList(new String[]{"one", "two"}); - c.lstring = c3; + c.lString = c3; List<IntString> c4 = new ArrayList<IntString>(); - c.lintstring = c4; - c.mstringstring = null; + c.lintString = c4; + c.mStringString = null; assertEquals(1, soi.getStructFieldData(c, fields.get(0))); assertEquals("test", soi.getStructFieldData(c, fields.get(1))); Modified: hadoop/core/trunk/src/contrib/hive/testutils/run_tests URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/testutils/run_tests?rev=706704&r1=706703&r2=706704&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/testutils/run_tests (original) +++ hadoop/core/trunk/src/contrib/hive/testutils/run_tests Tue Oct 21 11:11:05 2008 @@ -2,9 +2,11 @@ d=`mktemp -d /tmp/hivetest_XXXX` for i in `find . -name Test\*\.* | grep -v svn | egrep "java$|vm$" | sed 's/.*\/Test/Test/g' | sed 's/\.java//g' | sed 's/\.vm//g' | sort`; do - cmd="ant -lib ../../../lib -Dtestcase=$i clean-test test -logfile $d/$i.log" - echo $cmd; - $cmd; + if [ "$i" != "TestSerDe" ]; then + cmd="ant -lib ../../../lib -Dtestcase=$i clean-test test -logfile $d/$i.log" + echo $cmd; + $cmd; + fi done cat $d/*.log | grep junit | egrep "Running org|Tests run" echo Logs at $d
