Propchange: hive/branches/spark-new/ ------------------------------------------------------------------------------ Merged /hive/branches/cbo:r1605012-1627125 Merged /hive/trunk:r1626482-1629477
Modified: hive/branches/spark-new/accumulo-handler/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark-new/accumulo-handler/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/accumulo-handler/pom.xml (original) +++ hive/branches/spark-new/accumulo-handler/pom.xml Sun Oct 5 22:26:43 2014 @@ -112,6 +112,12 @@ <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop-20S.version}</version> + <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>${hadoop-20S.version}</version> <optional>true</optional> @@ -123,6 +129,12 @@ <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop-23.version}</version> + <optional>true</optional> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop-23.version}</version> <optional>true</optional> Modified: hive/branches/spark-new/beeline/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark-new/beeline/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/beeline/pom.xml (original) +++ hive/branches/spark-new/beeline/pom.xml Sun Oct 5 22:26:43 2014 @@ -49,6 +49,11 @@ <artifactId>hive-shims</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-jdbc</artifactId> + <version>${project.version}</version> + </dependency> <!-- inter-project --> <dependency> <groupId>commons-cli</groupId> @@ -88,12 +93,6 @@ <!-- test intra-project --> <dependency> <groupId>org.apache.hive</groupId> - <artifactId>hive-jdbc</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>${project.version}</version> <classifier>tests</classifier> Modified: hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java (original) +++ hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java Sun Oct 5 22:26:43 2014 @@ -692,10 +692,6 @@ public class BeeLine implements Closeabl int code = 0; if (!commands.isEmpty()) { - // for single command execute, disable color - getOpts().setColor(false); - getOpts().setHeaderInterval(-1); - for (Iterator<String> i = commands.iterator(); i.hasNext();) { String command = i.next().toString(); debug(loc("executing-command", command)); Modified: hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java (original) +++ hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java Sun Oct 5 22:26:43 2014 @@ -38,6 +38,7 @@ import java.sql.Driver; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; +import java.sql.SQLWarning; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; @@ -47,10 +48,13 @@ import java.util.Set; import java.util.TreeSet; import org.apache.hadoop.hive.common.cli.ShellCmdExecutor; +import org.apache.hive.jdbc.HiveStatement; public class Commands { private final BeeLine beeLine; + private static final int DEFAULT_QUERY_PROGRESS_INTERVAL = 1000; + private static final int DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT = 10 * 1000; /** * @param beeLine @@ -728,7 +732,7 @@ public class Commands { beeLine.handleException(e); } - + line = line.trim(); if (line.endsWith(";")) { line = line.substring(0, line.length() - 1); } @@ -758,6 +762,7 @@ public class Commands { try { Statement stmnt = null; boolean hasResults; + Thread logThread = null; try { long start = System.currentTimeMillis(); @@ -767,7 +772,15 @@ public class Commands { hasResults = ((CallableStatement) stmnt).execute(); } else { stmnt = beeLine.createStatement(); - hasResults = stmnt.execute(sql); + if (beeLine.getOpts().isSilent()) { + hasResults = stmnt.execute(sql); + } else { + logThread = new Thread(createLogRunnable(stmnt)); + logThread.setDaemon(true); + logThread.start(); + hasResults = stmnt.execute(sql); + logThread.interrupt(); + } } beeLine.showWarnings(); @@ -782,6 +795,11 @@ public class Commands { beeLine.info(beeLine.loc("rows-selected", count) + " " + beeLine.locElapsedTime(end - start)); } finally { + if (logThread != null) { + logThread.join(DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT); + showRemainingLogsIfAny(stmnt); + logThread = null; + } rs.close(); } } while (BeeLine.getMoreResults(stmnt)); @@ -792,6 +810,13 @@ public class Commands { + " " + beeLine.locElapsedTime(end - start)); } } finally { + if (logThread != null) { + if (!logThread.isInterrupted()) { + logThread.interrupt(); + } + logThread.join(DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT); + showRemainingLogsIfAny(stmnt); + } if (stmnt != null) { stmnt.close(); } @@ -803,6 +828,61 @@ public class Commands { return true; } + private Runnable createLogRunnable(Statement statement) { + if (statement instanceof HiveStatement) { + final HiveStatement hiveStatement = (HiveStatement) statement; + + Runnable runnable = new Runnable() { + @Override + public void run() { + while (hiveStatement.hasMoreLogs()) { + try { + // fetch the log periodically and output to beeline console + for (String log : hiveStatement.getQueryLog()) { + beeLine.info(log); + } + Thread.sleep(DEFAULT_QUERY_PROGRESS_INTERVAL); + } catch (SQLException e) { + beeLine.error(new SQLWarning(e)); + return; + } catch (InterruptedException e) { + beeLine.debug("Getting log thread is interrupted, since query is done!"); + return; + } + } + } + }; + return runnable; + } else { + beeLine.debug("The statement instance is not HiveStatement type: " + statement.getClass()); + return new Runnable() { + @Override + public void run() { + // do nothing. + } + }; + } + } + + private void showRemainingLogsIfAny(Statement statement) { + if (statement instanceof HiveStatement) { + HiveStatement hiveStatement = (HiveStatement) statement; + List<String> logs; + do { + try { + logs = hiveStatement.getQueryLog(); + } catch (SQLException e) { + beeLine.error(new SQLWarning(e)); + return; + } + for (String log : logs) { + beeLine.info(log); + } + } while (logs.size() > 0); + } else { + beeLine.debug("The statement instance is not HiveStatement type: " + statement.getClass()); + } + } public boolean quit(String line) { beeLine.setExit(true); Modified: hive/branches/spark-new/bin/beeline.cmd URL: http://svn.apache.org/viewvc/hive/branches/spark-new/bin/beeline.cmd?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/bin/beeline.cmd (original) +++ hive/branches/spark-new/bin/beeline.cmd Sun Oct 5 22:26:43 2014 @@ -43,7 +43,22 @@ if not exist %HADOOP_HOME%\libexec\hadoo @rem supress the HADOOP_HOME warnings in 1.x.x set HADOOP_HOME_WARN_SUPPRESS=true call %HADOOP_HOME%\libexec\hadoop-config.cmd -set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\*; + +@rem include only the beeline client jar and its dependencies +pushd %HIVE_HOME%\lib +for /f %%a IN ('dir /b hive-beeline-**.jar') do ( + set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a +) +for /f %%a IN ('dir /b super-csv-**.jar') do ( + set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a +) +for /f %%a IN ('dir /b jline-**.jar') do ( + set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a +) +for /f %%a IN ('dir /b hive-jdbc-**-standalone.jar') do ( + set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a +) +popd call %JAVA_HOME%\bin\java %JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %CLASSPATH% org.apache.hive.beeline.BeeLine %* Modified: hive/branches/spark-new/bin/ext/beeline.sh URL: http://svn.apache.org/viewvc/hive/branches/spark-new/bin/ext/beeline.sh?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/bin/ext/beeline.sh (original) +++ hive/branches/spark-new/bin/ext/beeline.sh Sun Oct 5 22:26:43 2014 @@ -19,11 +19,17 @@ export SERVICE_LIST="${SERVICE_LIST}${TH beeline () { CLASS=org.apache.hive.beeline.BeeLine; - execHiveCmd $CLASS "$@" + + # include only the beeline client jar and its dependencies + beelineJarPath=`ls ${HIVE_LIB}/hive-beeline-*.jar` + superCsvJarPath=`ls ${HIVE_LIB}/super-csv-*.jar` + jlineJarPath=`ls ${HIVE_LIB}/jline-*.jar` + jdbcStandaloneJarPath=`ls ${HIVE_LIB}/hive-jdbc-*-standalone.jar` + export HADOOP_CLASSPATH=${beelineJarPath}:${superCsvJarPath}:${jlineJarPath}:${jdbcStandaloneJarPath} + + exec $HADOOP jar ${beelineJarPath} $CLASS $HIVE_OPTS "$@" } beeline_help () { - CLASS=org.apache.hive.beeline.BeeLine; - execHiveCmd $CLASS "--help" + beeline "--help" } - Modified: hive/branches/spark-new/common/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/common/pom.xml (original) +++ hive/branches/spark-new/common/pom.xml Sun Oct 5 22:26:43 2014 @@ -72,6 +72,12 @@ </dependency> <!-- test inter-project --> <dependency> + <groupId>com.google.code.tempus-fugit</groupId> + <artifactId>tempus-fugit</artifactId> + <version>${tempus-fugit.version}</version> + <scope>test</scope> + </dependency> + <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>${junit.version}</version> Modified: hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original) +++ hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sun Oct 5 22:26:43 2014 @@ -649,6 +649,9 @@ public class HiveConf extends Configurat HIVEJOINCACHESIZE("hive.join.cache.size", 25000, "How many rows in the joining tables (except the streaming table) should be cached in memory."), + // CBO related + HIVE_CBO_ENABLED("hive.cbo.enable", false, "Flag to control enabling Cost Based Optimizations using Optiq framework."), + // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, // need to remove by hive .13. Also, do not change default (see SMB operator) HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""), @@ -1051,7 +1054,7 @@ public class HiveConf extends Configurat "That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.\n" + "The optimization will be automatically disabled if number of reducers would be less than specified value."), - HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", true, + HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false, "When enabled dynamic partitioning column will be globally sorted.\n" + "This way we can keep only one record writer open for each partition value\n" + "in the reducer thereby reducing the memory pressure on reducers."), @@ -1196,13 +1199,6 @@ public class HiveConf extends Configurat "Average row size is computed from average column size of all columns in the row. In the absence\n" + "of column statistics and for variable length complex columns like map, the average number of\n" + "entries/values can be specified using this config."), - // to accurately compute statistics for GROUPBY map side parallelism needs to be known - HIVE_STATS_MAP_SIDE_PARALLELISM("hive.stats.map.parallelism", 1, - "Hive/Tez optimizer estimates the data size flowing through each of the operators.\n" + - "For GROUPBY operator, to accurately compute the data size map-side parallelism needs to\n" + - "be known. By default, this value is set to 1 since optimizer is not aware of the number of\n" + - "mappers during compile-time. This Hive config can be used to specify the number of mappers\n" + - "to be used for data size computation of GROUPBY operator."), // statistics annotation fetches stats for each partition, which can be expensive. turning // this off will result in basic sizes being fetched from namenode instead HIVE_STATS_FETCH_PARTITION_STATS("hive.stats.fetch.partition.stats", true, @@ -1384,6 +1380,8 @@ public class HiveConf extends Configurat "authorization manager class name to be used in the metastore for authorization.\n" + "The user defined authorization class should implement interface \n" + "org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider. "), + HIVE_METASTORE_AUTHORIZATION_AUTH_READS("hive.security.metastore.authorization.auth.reads", true, + "If this is true, metastore authorizer authorizes read actions on database, table"), HIVE_METASTORE_AUTHENTICATOR_MANAGER("hive.security.metastore.authenticator.manager", "org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator", "authenticator manager class name to be used in the metastore for authentication. \n" + @@ -1479,10 +1477,10 @@ public class HiveConf extends Configurat HIVE_INSERT_INTO_MULTILEVEL_DIRS("hive.insert.into.multilevel.dirs", false, "Where to insert into multilevel directories like\n" + "\"insert directory '/HIVEFT25686/chinna/' from table\""), - HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS("hive.warehouse.subdir.inherit.perms", false, - "Set this to true if the the table directories should inherit the\n" + - "permission of the warehouse or database directory instead of being created\n" + - "with the permissions derived from dfs umask"), + HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS("hive.warehouse.subdir.inherit.perms", true, + "Set this to false if the table directories should be created\n" + + "with the permissions derived from dfs umask instead of\n" + + "inheriting the permission of the warehouse or database directory."), HIVE_INSERT_INTO_EXTERNAL_TABLES("hive.insert.into.external.tables", true, "whether insert into external tables is allowed"), @@ -1513,8 +1511,8 @@ public class HiveConf extends Configurat "The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery."), // HiveServer2 global init file location HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION("hive.server2.global.init.file.location", "${env:HIVE_CONF_DIR}", - "The location of HS2 global init file (.hiverc).\n" + - "If the property is reset, the value must be a valid path where the init file is located."), + "Either the location of a HS2 global init file or a directory containing a .hiverc file. If the \n" + + "property is set, the value must be a valid path to an init file or directory where the init file is located."), HIVE_SERVER2_TRANSPORT_MODE("hive.server2.transport.mode", "binary", new StringSet("binary", "http"), "Transport mode of HiveServer2."), HIVE_SERVER2_THRIFT_BIND_HOST("hive.server2.thrift.bind.host", "", @@ -1722,6 +1720,9 @@ public class HiveConf extends Configurat HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true, "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" + "The default value is true."), + HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled", true, + "This flag should be set to true to enable vectorized mode of the reduce-side GROUP BY query execution.\n" + + "The default value is true."), HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000, "Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000, Modified: hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java (original) +++ hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java Sun Oct 5 22:26:43 2014 @@ -18,10 +18,19 @@ package org.apache.hadoop.hive.common.type; -import junit.framework.TestCase; - -public class TestHiveChar extends TestCase { - +import com.google.code.tempusfugit.concurrency.annotations.*; +import com.google.code.tempusfugit.concurrency.*; +import org.junit.*; +import static org.junit.Assert.*; + +public class TestHiveChar { + + @Rule public ConcurrentRule concurrentRule = new ConcurrentRule(); + @Rule public RepeatingRule repeatingRule = new RepeatingRule(); + + @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testBasic() { HiveChar hc = new HiveChar("abc", 10); assertEquals("abc ", hc.toString()); @@ -47,6 +56,9 @@ public class TestHiveChar extends TestCa assertEquals(3, hc.getCharacterLength()); } + @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testStringLength() { HiveChar hc = new HiveChar(); @@ -60,6 +72,9 @@ public class TestHiveChar extends TestCa assertEquals("0123456789 ", hc.toString()); } + @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testComparison() { HiveChar hc1 = new HiveChar(); HiveChar hc2 = new HiveChar(); Modified: hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java (original) +++ hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java Sun Oct 5 22:26:43 2014 @@ -20,12 +20,19 @@ package org.apache.hadoop.hive.common.ty import java.math.BigDecimal; import java.math.BigInteger; -import org.junit.Assert; -import org.junit.Test; +import com.google.code.tempusfugit.concurrency.annotations.*; +import com.google.code.tempusfugit.concurrency.*; +import org.junit.*; +import static org.junit.Assert.*; public class TestHiveDecimal { + @Rule public ConcurrentRule concurrentRule = new ConcurrentRule(); + @Rule public RepeatingRule repeatingRule = new RepeatingRule(); + @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testPrecisionScaleEnforcement() { String decStr = "1786135888657847525803324040144343378.09799306448796128931113691624"; HiveDecimal dec = HiveDecimal.create(decStr); @@ -82,6 +89,8 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testMultiply() { HiveDecimal dec1 = HiveDecimal.create("0.00001786135888657847525803"); HiveDecimal dec2 = HiveDecimal.create("3.0000123456789"); @@ -105,6 +114,8 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testPow() { HiveDecimal dec = HiveDecimal.create("3.00001415926"); Assert.assertEquals(dec.pow(2), dec.multiply(dec)); @@ -118,6 +129,8 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testDivide() { HiveDecimal dec1 = HiveDecimal.create("3.14"); HiveDecimal dec2 = HiveDecimal.create("3"); @@ -133,6 +146,8 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testPlus() { HiveDecimal dec1 = HiveDecimal.create("99999999999999999999999999999999999"); HiveDecimal dec2 = HiveDecimal.create("1"); @@ -145,6 +160,8 @@ public class TestHiveDecimal { @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testSubtract() { HiveDecimal dec1 = HiveDecimal.create("3.140"); HiveDecimal dec2 = HiveDecimal.create("1.00"); @@ -152,6 +169,8 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testPosMod() { HiveDecimal hd1 = HiveDecimal.create("-100.91"); HiveDecimal hd2 = HiveDecimal.create("9.8"); @@ -160,12 +179,16 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testHashCode() { Assert.assertEquals(HiveDecimal.create("9").hashCode(), HiveDecimal.create("9.00").hashCode()); Assert.assertEquals(HiveDecimal.create("0").hashCode(), HiveDecimal.create("0.00").hashCode()); } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testException() { HiveDecimal dec = HiveDecimal.create("3.1415.926"); Assert.assertNull(dec); @@ -174,6 +197,8 @@ public class TestHiveDecimal { } @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testBinaryConversion() { testBinaryConversion("0.00"); testBinaryConversion("-12.25"); Modified: hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java (original) +++ hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java Sun Oct 5 22:26:43 2014 @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.common.type; -import junit.framework.TestCase; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.LogUtils; @@ -28,8 +27,15 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.Random; +import com.google.code.tempusfugit.concurrency.annotations.*; +import com.google.code.tempusfugit.concurrency.*; +import org.junit.*; +import static org.junit.Assert.*; + +public class TestHiveVarchar { + @Rule public ConcurrentRule concurrentRule = new ConcurrentRule(); + @Rule public RepeatingRule repeatingRule = new RepeatingRule(); -public class TestHiveVarchar extends TestCase { public TestHiveVarchar() { super(); } @@ -65,6 +71,9 @@ public class TestHiveVarchar extends Tes } } + @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testStringLength() throws Exception { int strLen = 20; int[] lengths = { 15, 20, 25 }; @@ -124,6 +133,9 @@ public class TestHiveVarchar extends Tes assertEquals(5, vc1.getCharacterLength()); } + @Test + @Concurrent(count=4) + @Repeating(repetition=100) public void testComparison() throws Exception { HiveVarchar hc1 = new HiveVarchar("abcd", 20); HiveVarchar hc2 = new HiveVarchar("abcd", 20); Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java Sun Oct 5 22:26:43 2014 @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.contrib.udaf.example; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; @@ -32,6 +33,8 @@ import org.apache.hadoop.hive.ql.exec.UD * more efficient. * */ +@Description(name = "example_avg", +value = "_FUNC_(col) - Example UDAF to compute average") public final class UDAFExampleAvg extends UDAF { /** Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java Sun Oct 5 22:26:43 2014 @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.contrib.u import java.util.ArrayList; import java.util.Collections; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; @@ -35,6 +36,8 @@ import org.apache.hadoop.hive.ql.exec.UD * implement built-in aggregation functions, which are harder to program but * more efficient. */ +@Description(name = "example_group_concat", +value = "_FUNC_(col) - Example UDAF that concatenates all arguments from different rows into a single string") public class UDAFExampleGroupConcat extends UDAF { /** Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java Sun Oct 5 22:26:43 2014 @@ -19,11 +19,13 @@ package org.apache.hadoop.hive.contrib.udaf.example; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDAF; /** * Returns the max N double values. */ +@Description(name = "example_max_n", value = "_FUNC_(expr) - Example UDAF that returns the max N double values") public class UDAFExampleMaxN extends UDAF { /** Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java Sun Oct 5 22:26:43 2014 @@ -19,11 +19,13 @@ package org.apache.hadoop.hive.contrib.udaf.example; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDAF; /** * Returns the min N double values. */ +@Description(name = "example_min_n", value = "_FUNC_(expr) - Example UDAF that returns the min N double values") public class UDAFExampleMinN extends UDAF{ /** Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java Sun Oct 5 22:26:43 2014 @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hive.contrib.udf.example; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; /** * UDFExampleAdd. * */ +@Description(name = "example_add", value = "_FUNC_(expr) - Example UDAF that returns the sum") public class UDFExampleAdd extends UDF { public Integer evaluate(Integer... a) { Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java Sun Oct 5 22:26:43 2014 @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.contrib.u import java.util.List; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; /** * UDFExampleArraySum. * */ +@Description(name = "example_arraysum", value = "_FUNC_(expr) - Example UDAF that returns the sum") public class UDFExampleArraySum extends UDF { public Double evaluate(List<Double> a) { Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java Sun Oct 5 22:26:43 2014 @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hive.contrib.udf.example; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; /** * UDFExampleFormat. * */ +@Description(name = "example_format", value = "_FUNC_(expr) - Example UDAF that returns formated String") public class UDFExampleFormat extends UDF { public String evaluate(String format, Object... args) { Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java Sun Oct 5 22:26:43 2014 @@ -21,12 +21,15 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; /** * UDFExampleMapConcat. * */ +@Description(name = "example_mapconcat", +value = "_FUNC_(expr) - Example UDAF that returns contents of Map as a formated String") public class UDFExampleMapConcat extends UDF { public String evaluate(Map<String, String> a) { Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java Sun Oct 5 22:26:43 2014 @@ -19,12 +19,15 @@ package org.apache.hadoop.hive.contrib.u import java.util.List; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; /** * UDFExampleStructPrint. * */ +@Description(name = "example_structprint", +value = "_FUNC_(obj) - Example UDAF that returns contents of an object") public class UDFExampleStructPrint extends UDF { public String evaluate(Object a) { Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java (original) +++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java Sun Oct 5 22:26:43 2014 @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.contrib.u import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; @@ -34,6 +35,8 @@ import org.apache.hadoop.hive.serde2.obj * to test outputting of rows on close with lateral view. * */ +@Description(name = "udtfCount2", +value = "_FUNC_(col) - UDF outputs the number of rows seen, twice.") public class GenericUDTFCount2 extends GenericUDTF { private transient Integer count = Integer.valueOf(0); Modified: hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out (original) +++ hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out Sun Oct 5 22:26:43 2014 @@ -25,36 +25,24 @@ SELECT example_add(1, 2), FROM src LIMIT 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 500 Data size: 22000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 22000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT example_add(1, 2), example_add(1, 2, 3), Modified: hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out (original) +++ hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out Sun Oct 5 22:26:43 2014 @@ -19,36 +19,24 @@ SELECT example_format("abc"), FROM src LIMIT 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'abc' (type: string), '1.1' (type: string), '1.1 1.200000e+00' (type: string), 'a 12 10' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 182500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 365 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 365 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'abc' (type: string), '1.1' (type: string), '1.1 1.200000e+00' (type: string), 'a 12 10' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 182500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 365 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT example_format("abc"), example_format("%1$s", 1.1), Modified: hive/branches/spark-new/data/files/parquet_types.txt URL: http://svn.apache.org/viewvc/hive/branches/spark-new/data/files/parquet_types.txt?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/data/files/parquet_types.txt (original) +++ hive/branches/spark-new/data/files/parquet_types.txt Sun Oct 5 22:26:43 2014 @@ -1,21 +1,21 @@ -100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a -101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab -102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc -103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd -104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde -105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef -106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg -107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh -108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop -109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef -110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede -111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded -112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd -113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc -114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b -115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161|rstuv|abcded -116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded -117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded -118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede -119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede -120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde +100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |k1:v1|101,200|10,abc +101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |k2:v2|102,200|10,def +102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|k3:v3|103,200|10,ghi +103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|k4:v4|104,200|10,jkl +104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|k5:v5|105,200|10,mno +105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|k6:v6|106,200|10,pqr +106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|k7:v7|107,200|10,stu +107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|k8:v8|108,200|10,vwx +108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop|k9:v9|109,200|10,yza +109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|k10:v10|110,200|10,bcd +110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|k11:v11|111,200|10,efg +111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|k12:v12|112,200|10,hij +112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|k13:v13|113,200|10,klm +113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|k14:v14|114,200|10,nop +114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|k15:v15|115,200|10,qrs +115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|k16:v16|116,200|10,qrs +116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|k17:v17|117,200|10,wxy +117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|k18:v18|118,200|10,zab +118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|k19:v19|119,200|10,cde +119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|k20:v20|120,200|10,fgh +120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|k21:v21|121,200|10,ijk Propchange: hive/branches/spark-new/hbase-handler/pom.xml ------------------------------------------------------------------------------ Merged /hive/branches/cbo/hbase-handler/pom.xml:r1605012-1627125 Merged /hive/trunk/hbase-handler/pom.xml:r1625359-1629477 Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java Sun Oct 5 22:26:43 2014 @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.hbase; +import java.io.IOException; +import java.util.Properties; + import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; @@ -26,9 +29,6 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import java.io.IOException; -import java.util.Properties; - public class DefaultHBaseKeyFactory extends AbstractHBaseKeyFactory implements HBaseKeyFactory { protected LazySimpleSerDe.SerDeParameters serdeParams; Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java Sun Oct 5 22:26:43 2014 @@ -53,6 +53,7 @@ public class HBaseSerDe extends Abstract public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class"; public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types"; public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory"; + public static final String HBASE_STRUCT_SERIALIZER_CLASS = "hbase.struct.serialization.class"; public static final String HBASE_SCAN_CACHE = "hbase.scan.cache"; public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock"; public static final String HBASE_SCAN_BATCH = "hbase.scan.batch"; @@ -98,7 +99,7 @@ public class HBaseSerDe extends Abstract cachedHBaseRow = new LazyHBaseRow( (LazySimpleStructObjectInspector) cachedObjectInspector, - serdeParams.getKeyIndex(), serdeParams.getKeyFactory()); + serdeParams.getKeyIndex(), serdeParams.getKeyFactory(), serdeParams.getValueFactories()); serializer = new HBaseRowSerializer(serdeParams); Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java Sun Oct 5 22:26:43 2014 @@ -41,6 +41,10 @@ import org.apache.hadoop.hive.serde.serd import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.util.StringUtils; @@ -371,6 +375,19 @@ public class HBaseSerDeHelper { } /** + * Create the {@link LazyObjectBase lazy field} + * */ + public static LazyObjectBase createLazyField(ColumnMapping[] columnMappings, int fieldID, + ObjectInspector inspector) { + ColumnMapping colMap = columnMappings[fieldID]; + if (colMap.getQualifierName() == null && !colMap.isHbaseRowKey()) { + // a column family + return new LazyHBaseCellMap((LazyMapObjectInspector) inspector); + } + return LazyFactory.createLazyObject(inspector, colMap.getBinaryStorage().get(0)); + } + + /** * Auto-generates the key struct for composite keys * * @param compositeKeyParts map of composite key part name to its type. Usually this would be Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java Sun Oct 5 22:26:43 2014 @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.hbase.Colu import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory; import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory; import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory; +import org.apache.hadoop.hive.hbase.struct.StructHBaseValueFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; @@ -204,11 +205,21 @@ public class HBaseSerDeParameters { for (int i = 0; i < columnMappings.size(); i++) { String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]); - if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) { + if (AVRO_SERIALIZATION_TYPE.equals(serType)) { Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]); - valueFactories.add(new AvroHBaseValueFactory(schema)); + valueFactories.add(new AvroHBaseValueFactory(i, schema)); + } else if (STRUCT_SERIALIZATION_TYPE.equals(serType)) { + String structValueClassName = tbl.getProperty(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS); + + if (structValueClassName == null) { + throw new IllegalArgumentException(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS + + " must be set for hbase columns of type [" + STRUCT_SERIALIZATION_TYPE + "]"); + } + + Class<?> structValueClass = job.getClassByName(structValueClassName); + valueFactories.add(new StructHBaseValueFactory(i, structValueClass)); } else { - valueFactories.add(new DefaultHBaseValueFactory()); + valueFactories.add(new DefaultHBaseValueFactory(i)); } } } catch (Exception e) { Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java Sun Oct 5 22:26:43 2014 @@ -20,15 +20,15 @@ package org.apache.hadoop.hive.hbase; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; +import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; -import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -47,18 +47,21 @@ public class LazyHBaseRow extends LazySt private final int iKey; private final HBaseKeyFactory keyFactory; + private final List<HBaseValueFactory> valueFactories; public LazyHBaseRow(LazySimpleStructObjectInspector oi) { - this(oi, -1, null); + this(oi, -1, null, null); } /** * Construct a LazyHBaseRow object with the ObjectInspector. */ - public LazyHBaseRow(LazySimpleStructObjectInspector oi, int iKey, HBaseKeyFactory keyFactory) { + public LazyHBaseRow(LazySimpleStructObjectInspector oi, int iKey, HBaseKeyFactory keyFactory, + List<HBaseValueFactory> valueFactories) { super(oi); this.iKey = iKey; this.keyFactory = keyFactory; + this.valueFactories = valueFactories; } /** @@ -76,13 +79,14 @@ public class LazyHBaseRow extends LazySt if (fieldID == iKey) { return keyFactory.createKey(fieldRef.getFieldObjectInspector()); } - ColumnMapping colMap = columnsMapping[fieldID]; - if (colMap.qualifierName == null && !colMap.hbaseRowKey) { - // a column family - return new LazyHBaseCellMap((LazyMapObjectInspector) fieldRef.getFieldObjectInspector()); + + if (valueFactories != null) { + return valueFactories.get(fieldID).createValueObject(fieldRef.getFieldObjectInspector()); } - return LazyFactory.createLazyObject(fieldRef.getFieldObjectInspector(), - colMap.binaryStorage.get(0)); + + // fallback to default + return HBaseSerDeHelper.createLazyField(columnsMapping, fieldID, + fieldRef.getFieldObjectInspector()); } /** Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java Sun Oct 5 22:26:43 2014 @@ -48,7 +48,8 @@ public class AvroHBaseValueFactory exten * * @param schema the associated {@link Schema schema} * */ - public AvroHBaseValueFactory(Schema schema) { + public AvroHBaseValueFactory(int fieldID, Schema schema) { + super(fieldID); this.schema = schema; } Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java Sun Oct 5 22:26:43 2014 @@ -21,9 +21,12 @@ import java.io.IOException; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.hbase.ColumnMappings; +import org.apache.hadoop.hive.hbase.HBaseSerDeHelper; import org.apache.hadoop.hive.hbase.HBaseSerDeParameters; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -35,15 +38,23 @@ import org.apache.hadoop.hive.serde2.typ public class DefaultHBaseValueFactory implements HBaseValueFactory{ protected LazySimpleSerDe.SerDeParameters serdeParams; + protected ColumnMappings columnMappings; protected HBaseSerDeParameters hbaseParams; protected Properties properties; protected Configuration conf; + private int fieldID; + + public DefaultHBaseValueFactory(int fieldID) { + this.fieldID = fieldID; + } + @Override public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties) throws SerDeException { this.hbaseParams = hbaseParams; this.serdeParams = hbaseParams.getSerdeParams(); + this.columnMappings = hbaseParams.getColumnMappings(); this.properties = properties; this.conf = conf; } @@ -55,6 +66,11 @@ public class DefaultHBaseValueFactory im 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); } + @Override + public LazyObjectBase createValueObject(ObjectInspector inspector) throws SerDeException { + return HBaseSerDeHelper.createLazyField(columnMappings.getColumnsMapping(), fieldID, inspector); + } + @Override public byte[] serializeValue(Object object, StructField field) throws IOException { Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java (original) +++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java Sun Oct 5 22:26:43 2014 @@ -22,8 +22,10 @@ import java.io.IOException; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.hbase.HBaseKeyFactory; import org.apache.hadoop.hive.hbase.HBaseSerDeParameters; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -53,6 +55,13 @@ public interface HBaseValueFactory { ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException; /** + * create custom object for hbase value + * + * @param inspector OI create by {@link HBaseKeyFactory#createKeyObjectInspector} + */ + LazyObjectBase createValueObject(ObjectInspector inspector) throws SerDeException; + + /** * Serialize the given hive object * * @param object the object to be serialized Modified: hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java (original) +++ hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java Sun Oct 5 22:26:43 2014 @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import junit.framework.Assert; import junit.framework.TestCase; import org.apache.avro.Schema; @@ -61,6 +62,7 @@ import org.apache.hadoop.hive.serde2.io. import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BooleanWritable; @@ -135,6 +137,27 @@ public class TestHBaseSerDe extends Test " ]\n" + "}"; + private static final String EXPECTED_DESERIALIZED_AVRO_STRING = + "{\"key\":\"test-row1\",\"cola_avro\":{\"arecord\":{\"int1\":42,\"boolean1\":true," + + "\"long1\":42432234234}}}"; + + private static final String EXPECTED_DESERIALIZED_AVRO_STRING_2 = + "{\"key\":\"test-row1\"," + + "\"cola_avro\":{\"employeename\":\"Avro Employee1\"," + + "\"employeeid\":11111,\"age\":25,\"gender\":\"FEMALE\"," + + "\"contactinfo\":{\"address\":[{\"address1\":\"Avro First Address1\",\"address2\":" + + "\"Avro Second Address1\",\"city\":\"Avro City1\",\"zipcode\":123456,\"county\":" + + "{0:{\"areacode\":999,\"number\":1234567890}},\"aliases\":null,\"metadata\":" + + "{\"testkey\":\"testvalue\"}},{\"address1\":\"Avro First Address1\",\"address2\":" + + "\"Avro Second Address1\",\"city\":\"Avro City1\",\"zipcode\":123456,\"county\":" + + "{0:{\"areacode\":999,\"number\":1234567890}},\"aliases\":null,\"metadata\":" + + "{\"testkey\":\"testvalue\"}}],\"homephone\":{\"areacode\":999,\"number\":1234567890}," + + "\"officephone\":{\"areacode\":999,\"number\":1234455555}}}}"; + + private static final String EXPECTED_DESERIALIZED_AVRO_STRING_3 = + "{\"key\":\"test-row1\",\"cola_avro\":{\"arecord\":{\"int1\":42,\"string1\":\"test\"," + + "\"boolean1\":true,\"long1\":42432234234}}}"; + /** * Test the default behavior of the Lazy family of objects and object inspectors. */ @@ -1047,7 +1070,8 @@ public class TestHBaseSerDe extends Test Properties tbl = createPropertiesForHiveAvroSchemaInline(); serDe.initialize(conf, tbl); - deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, + EXPECTED_DESERIALIZED_AVRO_STRING); } private Properties createPropertiesForHiveAvroSchemaInline() { @@ -1092,7 +1116,8 @@ public class TestHBaseSerDe extends Test Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema(); serDe.initialize(conf, tbl); - deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, + EXPECTED_DESERIALIZED_AVRO_STRING_3); } private Properties createPropertiesForHiveAvroForwardEvolvedSchema() { @@ -1136,7 +1161,8 @@ public class TestHBaseSerDe extends Test Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema(); serDe.initialize(conf, tbl); - deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, + EXPECTED_DESERIALIZED_AVRO_STRING); } private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() { @@ -1185,7 +1211,8 @@ public class TestHBaseSerDe extends Test Properties tbl = createPropertiesForHiveAvroSerClass(); serDe.initialize(conf, tbl); - deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, + EXPECTED_DESERIALIZED_AVRO_STRING_2); } private Properties createPropertiesForHiveAvroSerClass() { @@ -1243,7 +1270,8 @@ public class TestHBaseSerDe extends Test Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS); serDe.initialize(conf, tbl); - deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, + EXPECTED_DESERIALIZED_AVRO_STRING); } finally { // Teardown the cluster if (miniDfs != null) { @@ -1298,7 +1326,8 @@ public class TestHBaseSerDe extends Test Properties tbl = createPropertiesForHiveAvroExternalSchema(); serDe.initialize(conf, tbl); - deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, + EXPECTED_DESERIALIZED_AVRO_STRING_2); } private Properties createPropertiesForHiveAvroExternalSchema() { @@ -1389,8 +1418,87 @@ public class TestHBaseSerDe extends Test return tbl; } + public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException { + + byte[] cfa = "cola".getBytes(); + byte[] qualStruct = "struct".getBytes(); + + TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0); + byte[] key = testStruct.getBytes(); + // Data + List<KeyValue> kvs = new ArrayList<KeyValue>(); + + byte[] testData = testStruct.getBytes(); + kvs.add(new KeyValue(key, cfa, qualStruct, testData)); + + Result r = new Result(kvs); + byte[] putKey = testStruct.getBytesWithDelimiters(); + + Put p = new Put(putKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2))); + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForValueStruct(); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHBaseValueStruct(serDe, r, p); + + } + + private Properties createPropertiesForValueStruct() { + Properties tbl = new Properties(); + tbl.setProperty("cola.struct.serialization.type", "struct"); + tbl.setProperty("cola.struct.test.value", "test value"); + tbl.setProperty(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS, + "org.apache.hadoop.hive.hbase.HBaseTestStructSerializer"); + tbl.setProperty(serdeConstants.LIST_COLUMNS, "key,astring"); + tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, + "struct<col1:string,col2:string,col3:string>,struct<col1:string,col2:string,col3:string>"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:struct"); + tbl.setProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS, + "org.apache.hadoop.hive.hbase.HBaseTestCompositeKey"); + return tbl; + } + + private void deserializeAndSerializeHBaseValueStruct(HBaseSerDe serDe, Result r, Put p) + throws SerDeException, IOException { + StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); + + List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); + + Object row = serDe.deserialize(new ResultWritable(r)); + + Object fieldData = null; + for (int j = 0; j < fieldRefs.size(); j++) { + fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); + assertNotNull(fieldData); + if (fieldData instanceof LazyStruct) { + assertEquals(((LazyStruct) fieldData).getField(0).toString(), "A"); + assertEquals(((LazyStruct) fieldData).getField(1).toString(), "B"); + assertEquals(((LazyStruct) fieldData).getField(2).toString(), "C"); + } else { + Assert.fail("fieldData should be an instance of LazyStruct"); + } + } + + assertEquals( + "{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"}}", + SerDeUtils.getJSONString(row, soi)); + + // Now serialize + Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); + + assertEquals("Serialized put:", p.toString(), put.toString()); + } + private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p, - Object[] expectedFieldsData) + Object[] expectedFieldsData, String expectedDeserializedAvroString) throws SerDeException, IOException { StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); @@ -1403,6 +1511,8 @@ public class TestHBaseSerDe extends Test assertNotNull(fieldData); assertEquals(expectedFieldsData[j], fieldData.toString().trim()); } + + assertEquals(expectedDeserializedAvroString, SerDeUtils.getJSONString(row, soi)); // Now serialize Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); Modified: hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java (original) +++ hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java Sun Oct 5 22:26:43 2014 @@ -23,6 +23,7 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -104,6 +105,7 @@ public class HCatSemanticAnalyzer extend case HiveParser.TOK_ALTERVIEW_DROPPARTS: case HiveParser.TOK_ALTERVIEW_PROPERTIES: case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_ALTERVIEW: case HiveParser.TOK_CREATEVIEW: case HiveParser.TOK_DROPVIEW: @@ -359,7 +361,7 @@ public class HCatSemanticAnalyzer extend AlterTableDesc alterTable = work.getAlterTblDesc(); if (alterTable != null) { Table table = hive.getTable(SessionState.get().getCurrentDatabase(), - alterTable.getOldName(), false); + Utilities.getDbTableName(alterTable.getOldName())[1], false); Partition part = null; if (alterTable.getPartSpec() != null) { Modified: hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java (original) +++ hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java Sun Oct 5 22:26:43 2014 @@ -90,6 +90,7 @@ public class TestHCatPartitionPublish { File workDir = handleWorkDir(); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); fs = FileSystem.get(conf); System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); Modified: hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml (original) +++ hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml Sun Oct 5 22:26:43 2014 @@ -53,6 +53,13 @@ <classifier>tests</classifier> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> </dependencies> Modified: hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java?rev=1629544&r1=1629543&r2=1629544&view=diff ============================================================================== --- hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java (original) +++ hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java Sun Oct 5 22:26:43 2014 @@ -28,10 +28,12 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import org.apache.commons.io.FileUtils; @@ -42,6 +44,8 @@ import org.apache.hadoop.hive.cli.CliSes import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.StorageFormats; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; @@ -69,12 +73,16 @@ import org.joda.time.DateTime; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; +@RunWith(Parameterized.class) public class TestHCatLoader { private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoader.class); private static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName(System.getProperty("java.io.tmpdir") + @@ -91,9 +99,30 @@ public class TestHCatLoader { private Driver driver; private Map<Integer, Pair<Integer, String>> basicInputData; - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + private static final Map<String, Set<String>> DISABLED_STORAGE_FORMATS = + new HashMap<String, Set<String>>() {{ + put(IOConstants.AVRO, new HashSet<String>() {{ + add("testReadDataBasic"); + add("testReadPartitionedBasic"); + add("testProjectionsBasic"); + add("testSchemaLoadPrimitiveTypes"); + }}); + put(IOConstants.PARQUETFILE, new HashSet<String>() {{ + add("testReadDataBasic"); + add("testReadPartitionedBasic"); + add("testProjectionsBasic"); + }}); + }}; + + private String storageFormat; + + @Parameterized.Parameters + public static Collection<Object[]> generateParameters() { + return StorageFormats.names(); + } + + public TestHCatLoader(String storageFormat) { + this.storageFormat = storageFormat; } private void dropTable(String tablename) throws IOException, CommandNeedRetryException { @@ -105,7 +134,7 @@ public class TestHCatLoader { } private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, partitionedBy, driver, storageFormat()); + createTable(tablename, schema, partitionedBy, driver, storageFormat); } static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat) @@ -209,17 +238,18 @@ public class TestHCatLoader { server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});", ++i); server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); server.executeBatch(); - } @After public void tearDown() throws Exception { try { - dropTable(BASIC_TABLE); - dropTable(COMPLEX_TABLE); - dropTable(PARTITIONED_TABLE); - dropTable(SPECIFIC_SIZE_TABLE); - dropTable(AllTypesTable.ALL_PRIMITIVE_TYPES_TABLE); + if (driver != null) { + dropTable(BASIC_TABLE); + dropTable(COMPLEX_TABLE); + dropTable(PARTITIONED_TABLE); + dropTable(SPECIFIC_SIZE_TABLE); + dropTable(AllTypesTable.ALL_PRIMITIVE_TYPES_TABLE); + } } finally { FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); } @@ -227,6 +257,7 @@ public class TestHCatLoader { @Test public void testSchemaLoadBasic() throws IOException { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); PigServer server = new PigServer(ExecType.LOCAL); @@ -241,23 +272,28 @@ public class TestHCatLoader { assertTrue(Xfields.get(1).type == DataType.CHARARRAY); } + /** * Test that we properly translate data types in Hive/HCat table schema into Pig schema */ @Test public void testSchemaLoadPrimitiveTypes() throws IOException { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); AllTypesTable.testSchemaLoadPrimitiveTypes(); } + /** * Test that value from Hive table are read properly in Pig */ @Test public void testReadDataPrimitiveTypes() throws Exception { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); AllTypesTable.testReadDataPrimitiveTypes(); } @Test public void testReadDataBasic() throws IOException { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); PigServer server = new PigServer(ExecType.LOCAL); server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); @@ -279,6 +315,7 @@ public class TestHCatLoader { @Test public void testSchemaLoadComplex() throws IOException { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); PigServer server = new PigServer(ExecType.LOCAL); @@ -337,6 +374,7 @@ public class TestHCatLoader { @Test public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); PigServer server = new PigServer(ExecType.LOCAL); driver.run("select * from " + PARTITIONED_TABLE); @@ -404,6 +442,7 @@ public class TestHCatLoader { @Test public void testProjectionsBasic() throws IOException { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); PigServer server = new PigServer(ExecType.LOCAL); @@ -453,6 +492,7 @@ public class TestHCatLoader { @Test public void testColumnarStorePushdown() throws Exception { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); String PIGOUTPUT_DIR = TEST_DATA_DIR+ "/colpushdownop"; String PIG_FILE = "test.pig"; String expectedCols = "0,1"; @@ -486,6 +526,7 @@ public class TestHCatLoader { @Test public void testGetInputBytes() throws Exception { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); file.deleteOnExit(); RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); @@ -501,6 +542,7 @@ public class TestHCatLoader { @Test public void testConvertBooleanToInt() throws Exception { + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); String tbl = "test_convert_boolean_to_int"; String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; File inputDataDir = new File(inputFileName).getParentFile(); @@ -600,7 +642,11 @@ public class TestHCatLoader { * Test that value from Hive table are read properly in Pig */ private static void testReadDataPrimitiveTypes() throws Exception { - PigServer server = new PigServer(ExecType.LOCAL); + // testConvertBooleanToInt() sets HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER=true, and + // might be the last one to call HCatContext.INSTANCE.setConf(). Make sure setting is false. + Properties properties = new Properties(); + properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "false"); + PigServer server = new PigServer(ExecType.LOCAL, properties); server.registerQuery("X = load '" + ALL_PRIMITIVE_TYPES_TABLE + "' using " + HCatLoader.class.getName() + "();"); Iterator<Tuple> XIter = server.openIterator("X"); int numTuplesRead = 0; @@ -608,22 +654,26 @@ public class TestHCatLoader { Tuple t = XIter.next(); assertEquals(HCatFieldSchema.Type.numPrimitiveTypes(), t.size()); int colPos = 0; - for(Object referenceData : primitiveRows[numTuplesRead]) { - if(referenceData == null) { - assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data is null; actual " + - t.get(colPos), t.get(colPos) == null); - } - else if(referenceData instanceof java.util.Date) { - assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + ((java.util.Date)referenceData).getTime() + " actual=" + - ((DateTime)t.get(colPos)).getMillis() + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", + for (Object referenceData : primitiveRows[numTuplesRead]) { + if (referenceData == null) { + assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + + " Reference data is null; actual " + + t.get(colPos), t.get(colPos) == null); + } else if (referenceData instanceof java.util.Date) { + // Note that here we ignore nanos part of Hive Timestamp since nanos are dropped when + // reading Hive from Pig by design. + assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + + " Reference data=" + ((java.util.Date)referenceData).getTime() + + " actual=" + ((DateTime)t.get(colPos)).getMillis() + + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", ((java.util.Date)referenceData).getTime()== ((DateTime)t.get(colPos)).getMillis()); - //note that here we ignore nanos part of Hive Timestamp since nanos are dropped when reading Hive from Pig by design - } - else { - assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + referenceData + " actual=" + - t.get(colPos) + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", + } else { + // Doing String comps here as value objects in Hive in Pig are different so equals() + // doesn't work. + assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + + " Reference data=" + referenceData + " actual=" + t.get(colPos) + + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ") ", referenceData.toString().equals(t.get(colPos).toString())); - //doing String comps here as value objects in Hive in Pig are different so equals() doesn't work } colPos++; } @@ -633,10 +683,10 @@ public class TestHCatLoader { } private static void setupAllTypesTable(Driver driver) throws Exception { String[] primitiveData = new String[primitiveRows.length]; - for(int i = 0; i < primitiveRows.length; i++) { + for (int i = 0; i < primitiveRows.length; i++) { Object[] rowData = primitiveRows[i]; StringBuilder row = new StringBuilder(); - for(Object cell : rowData) { + for (Object cell : rowData) { row.append(row.length() == 0 ? "" : "\t").append(cell == null ? null : cell); } primitiveData[i] = row.toString();
