Modified: hive/branches/spark/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java URL: http://svn.apache.org/viewvc/hive/branches/spark/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original) +++ hive/branches/spark/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sat Jul 26 23:45:46 2014 @@ -27,17 +27,16 @@ import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.security.auth.login.LoginException; +import static org.apache.hadoop.hive.conf.Validator.*; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -58,16 +57,23 @@ public class HiveConf extends Configurat protected Properties origProp; protected String auxJars; private static final Log l4j = LogFactory.getLog(HiveConf.class); + private static boolean loadMetastoreConfig = false; + private static boolean loadHiveServer2Config = false; private static URL hiveDefaultURL = null; private static URL hiveSiteURL = null; + private static URL hivemetastoreSiteUrl = null; + private static URL hiveServer2SiteUrl = null; + private static byte[] confVarByteArray = null; + private static final Map<String, ConfVars> vars = new HashMap<String, ConfVars>(); private final List<String> restrictList = new ArrayList<String>(); private boolean isWhiteListRestrictionEnabled = false; private final List<String> modWhiteList = new ArrayList<String>(); + static { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); if (classLoader == null) { @@ -78,6 +84,9 @@ public class HiveConf extends Configurat // Look for hive-site.xml on the CLASSPATH and log its location if found. hiveSiteURL = classLoader.getResource("hive-site.xml"); + hivemetastoreSiteUrl = classLoader.getResource("hivemetastore-site.xml"); + hiveServer2SiteUrl = classLoader.getResource("hiveserver2-site.xml"); + for (ConfVars confVar : ConfVars.values()) { vars.put(confVar.varname, confVar); } @@ -175,973 +184,1605 @@ public class HiveConf extends Configurat */ public static enum ConfVars { // QL execution stuff - SCRIPTWRAPPER("hive.exec.script.wrapper", null), - PLAN("hive.exec.plan", ""), - PLAN_SERIALIZATION("hive.plan.serialization.format","kryo"), - SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive-" + System.getProperty("user.name")), - LOCALSCRATCHDIR("hive.exec.local.scratchdir", System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name")), - SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700"), - SUBMITVIACHILD("hive.exec.submitviachild", false), - SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true), - SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000), - ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false), - STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:"), - STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true), - COMPRESSRESULT("hive.exec.compress.output", false), - COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false), - COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", ""), - COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", ""), - BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (256 * 1000 * 1000)), - MAXREDUCERS("hive.exec.reducers.max", 1009), // pick a prime - PREEXECHOOKS("hive.exec.pre.hooks", ""), - POSTEXECHOOKS("hive.exec.post.hooks", ""), - ONFAILUREHOOKS("hive.exec.failure.hooks", ""), - CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", ""), - EXECPARALLEL("hive.exec.parallel", false), // parallel query launching - EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8), - HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true), - HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L), - DYNAMICPARTITIONING("hive.exec.dynamic.partition", true), - DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict"), - DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000), - DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100), - MAXCREATEDFILES("hive.exec.max.created.files", 100000L), + SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""), + PLAN("hive.exec.plan", "", ""), + PLAN_SERIALIZATION("hive.plan.serialization.format", "kryo", + "Query plan format serialization between client and task nodes. \n" + + "Two supported values are : kryo and javaXML. Kryo is default."), + SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive-${system:user.name}", "Scratch space for Hive jobs"), + LOCALSCRATCHDIR("hive.exec.local.scratchdir", + "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", + "Local scratch space for Hive jobs"), + SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700", ""), + SUBMITVIACHILD("hive.exec.submitviachild", false, ""), + SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true, + "Determines whether local tasks (typically mapjoin hashtable generation phase) runs in \n" + + "separate JVM (true recommended) or not. \n" + + "Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues."), + SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000, + "Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). \n" + + "This prevents runaway scripts from filling logs partitions to capacity"), + ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false, + "When enabled, this option allows a user script to exit successfully without consuming \n" + + "all the data from the standard input."), + STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:", + "Streaming jobs that log to standard error with this prefix can log counter or status information."), + STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true, + "Enable consumption of status and counter messages for streaming jobs."), + COMPRESSRESULT("hive.exec.compress.output", false, + "This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. \n" + + "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), + COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false, + "This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. \n" + + "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), + COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", "", ""), + COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", "", ""), + BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (1000 * 1000 * 1000), + "size per reducer.The default is 1G, i.e if the input size is 10G, it will use 10 reducers."), + MAXREDUCERS("hive.exec.reducers.max", 999, + "max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is\n" + + "negative, Hive will use this one as the max number of reducers when automatically determine number of reducers."), + PREEXECHOOKS("hive.exec.pre.hooks", "", + "Comma-separated list of pre-execution hooks to be invoked for each statement. \n" + + "A pre-execution hook is specified as the name of a Java class which implements the \n" + + "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), + POSTEXECHOOKS("hive.exec.post.hooks", "", + "Comma-separated list of post-execution hooks to be invoked for each statement. \n" + + "A post-execution hook is specified as the name of a Java class which implements the \n" + + "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), + ONFAILUREHOOKS("hive.exec.failure.hooks", "", + "Comma-separated list of on-failure hooks to be invoked for each statement. \n" + + "An on-failure hook is specified as the name of Java class which implements the \n" + + "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), + CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", "", + "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" + + "A client stats publisher is specified as the name of a Java class which implements the \n" + + "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."), + EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"), + EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8, + "How many jobs at most can be executed in parallel"), + HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true, + "Whether speculative execution for reducers should be turned on. "), + HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L, + "The interval with which to poll the JobTracker for the counters the running job. \n" + + "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."), + DYNAMICPARTITIONING("hive.exec.dynamic.partition", true, + "Whether or not to allow dynamic partitions in DML/DDL."), + DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict", + "In strict mode, the user must specify at least one static partition \n" + + "in case the user accidentally overwrites all partitions."), + DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000, + "Maximum number of dynamic partitions allowed to be created in total."), + DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100, + "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."), + MAXCREATEDFILES("hive.exec.max.created.files", 100000L, + "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."), DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", - System.getProperty("java.io.tmpdir") + File.separator + "${hive.session.id}_resources"), - DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__"), - DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__"), - // Whether to show a link to the most failed task + debugging tips - SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true), - JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true), - JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000), - TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000), - OUTPUT_FILE_EXTENSION("hive.output.file.extension", null), - - HIVE_IN_TEST("hive.in.test", false), // internal usage only, true in test mode - - // should hive determine whether to run in local mode automatically ? - LOCALMODEAUTO("hive.exec.mode.local.auto", false), - // if yes: - // run in local mode only if input bytes is less than this. 128MB by default - LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L), - // run in local mode only if number of tasks (for map and reduce each) is - // less than this - LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4), - // if true, DROP TABLE/VIEW does not fail if table/view doesn't exist and IF EXISTS is - // not specified - DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true), - - // ignore the mapjoin hint - HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true), + "${system:java.io.tmpdir}" + File.separator + "${hive.session.id}_resources", + "Temporary local directory for added resources in the remote file system."), + DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__", + "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" + + "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" + + "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."), + DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__", ""), - // Max number of lines of footer user can set for a table file. - HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100), - - // Make column names unique in the result set by using table alias if needed - HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true), + // Whether to show a link to the most failed task + debugging tips + SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true, + "If a job fails, whether to provide a link in the CLI to the task with the\n" + + "most failures, along with debugging hints if applicable."), + JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true, + "Whether or not stack traces parsed from the task logs of a sampled failed task \n" + + "for each failed job should be stored in the SessionState"), + JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000, ""), + TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000, ""), + OUTPUT_FILE_EXTENSION("hive.output.file.extension", null, + "String used as a file extension for output files. \n" + + "If not set, defaults to the codec extension for text files (e.g. \".gz\"), or no extension otherwise."), + + HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true), + + LOCALMODEAUTO("hive.exec.mode.local.auto", false, + "Let Hive determine whether to run in local mode automatically"), + LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L, + "When hive.exec.mode.local.auto is true, input bytes should less than this for local mode."), + LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4, + "When hive.exec.mode.local.auto is true, the number of tasks should less than this for local mode."), + + DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true, + "Do not report an error if DROP TABLE/VIEW specifies a non-existent table/view"), + + HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true, "Ignore the mapjoin hint"), + + HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100, + "maximum number of lines for footer user can define for a table file"), + + HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true, + "Make column names unique in the result set by qualifying column names with table alias if needed.\n" + + "Table alias will be added to column names for queries of type \"select *\" or \n" + + "if query explicitly uses table alias \"select r1.x..\"."), // Hadoop Configuration Properties // Properties with null values are ignored and exist only for the purpose of giving us // a symbolic name to reference in the Hive source code. Properties with non-null // values will override any values set in the underlying Hadoop configuration. - HADOOPBIN("hadoop.bin.path", findHadoopBinary()), - HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem"), - HADOOPFS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPFS"), null), - HADOOPMAPFILENAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPFILENAME"), null), - HADOOPMAPREDINPUTDIR(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIR"), null), - HADOOPMAPREDINPUTDIRRECURSIVE(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIRRECURSIVE"), false), - MAPREDMAXSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), 256000000L), - MAPREDMINSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 1L), - MAPREDMINSPLITSIZEPERNODE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERNODE"), 1L), - MAPREDMINSPLITSIZEPERRACK(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERRACK"), 1L), + HADOOPBIN("hadoop.bin.path", findHadoopBinary(), "", true), + HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem", + "The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20"), + HADOOPFS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPFS"), null, "", true), + HADOOPMAPFILENAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPFILENAME"), null, "", true), + HADOOPMAPREDINPUTDIR(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIR"), null, "", true), + HADOOPMAPREDINPUTDIRRECURSIVE(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIRRECURSIVE"), false, "", true), + MAPREDMAXSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), 256000000L, "", true), + MAPREDMINSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 1L, "", true), + MAPREDMINSPLITSIZEPERNODE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERNODE"), 1L, "", true), + MAPREDMINSPLITSIZEPERRACK(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERRACK"), 1L, "", true), // The number of reduce tasks per job. Hadoop sets this value to 1 by default // By setting this property to -1, Hive will automatically determine the correct // number of reducers. - HADOOPNUMREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPNUMREDUCERS"), -1), - HADOOPJOBNAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPJOBNAME"), null), - HADOOPSPECULATIVEEXECREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPSPECULATIVEEXECREDUCERS"), true), - MAPREDSETUPCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false), - MAPREDTASKCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false), + HADOOPNUMREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPNUMREDUCERS"), -1, "", true), + HADOOPJOBNAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPJOBNAME"), null, "", true), + HADOOPSPECULATIVEEXECREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPSPECULATIVEEXECREDUCERS"), true, "", true), + MAPREDSETUPCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false, "", true), + MAPREDTASKCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false, "", true), // Metastore stuff. Be sure to update HiveConf.metaVars when you add // something here! - METASTOREDIRECTORY("hive.metastore.metadb.dir", ""), - METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse"), - METASTOREURIS("hive.metastore.uris", ""), - // Number of times to retry a connection to a Thrift metastore server - METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3), - // Number of times to retry a Thrift metastore call upon failure - METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1), - - // Number of seconds the client should wait between connection attempts - METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", 1), - // Socket timeout for the client connection (in seconds) - METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", 600), - METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine"), - // Class name of JDO connection url hook - METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", ""), - METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true), - // Name of the connection url in the configuration + METASTOREDIRECTORY("hive.metastore.metadb.dir", "", ""), + METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse", + "location of default database for the warehouse"), + METASTOREURIS("hive.metastore.uris", "", + "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."), + + METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3, + "Number of retries while opening a connection to metastore"), + METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1, + "Number of retries upon failure of Thrift metastore calls"), + + METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", 1, + "Number of seconds for the client to wait between consecutive connection attempts"), + METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", 600, + "MetaStore Client socket timeout in seconds"), + METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine", + "password to use against metastore database"), + METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "", + "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"), + METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true, + "Set this to true if multiple threads access metastore through JDO concurrently."), METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL", - "jdbc:derby:;databaseName=metastore_db;create=true"), - // Whether to force reloading of the metastore configuration (including - // the connection URL, before the next metastore query that accesses the - // datastore. Once reloaded, this value is reset to false. Used for - // testing only. - METASTOREFORCERELOADCONF("hive.metastore.force.reload.conf", false), - // Number of attempts to retry connecting after there is a JDO datastore err - HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 1), - // Number of miliseconds to wait between attepting - HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", 1000), - // Whether to force reloading of the HMSHandler configuration (including - // the connection URL, before the next metastore query that accesses the - // datastore. Once reloaded, this value is reset to false. Used for - // testing only. - HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false), - METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200), - METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 100000), - METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true), - // Intermediate dir suffixes used for archiving. Not important what they - // are, as long as collisions are avoided + "jdbc:derby:;databaseName=metastore_db;create=true", + "JDBC connect string for a JDBC metastore"), + + METASTOREFORCERELOADCONF("hive.metastore.force.reload.conf", false, + "Whether to force reloading of the metastore configuration (including\n" + + "the connection URL, before the next metastore query that accesses the\n" + + "datastore. Once reloaded, this value is reset to false. Used for\n" + + "testing only."), + HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 1, + "The number of times to retry a HMSHandler call if there were a connection error"), + HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", 1000, + "The number of milliseconds between HMSHandler retry attempts"), + HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false, + "Whether to force reloading of the HMSHandler configuration (including\n" + + "the connection URL, before the next metastore query that accesses the\n" + + "datastore. Once reloaded, this value is reset to false. Used for\n" + + "testing only."), + METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200, + "Minimum number of worker threads in the Thrift server's pool."), + METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 100000, + "Maximum number of worker threads in the Thrift server's pool."), + METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true, + "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."), + METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original", - "_INTERMEDIATE_ORIGINAL"), + "_INTERMEDIATE_ORIGINAL", + "Intermediate dir suffixes used for archiving. Not important what they\n" + + "are, as long as collisions are avoided"), METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived", - "_INTERMEDIATE_ARCHIVED"), + "_INTERMEDIATE_ARCHIVED", ""), METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted", - "_INTERMEDIATE_EXTRACTED"), - METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", ""), + "_INTERMEDIATE_EXTRACTED", ""), + METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", "", + "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."), METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal", - "hive-metastore/[email protected]"), - METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false), - METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS( - "hive.cluster.delegation.token.store.class", - "org.apache.hadoop.hive.thrift.MemoryTokenStore"), + "hive-metastore/[email protected]", + "The service principal for the metastore Thrift server. \n" + + "The special string _HOST will be replaced automatically with the correct host name."), + METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false, + "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."), + METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false, + "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."), + METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS("hive.cluster.delegation.token.store.class", + "org.apache.hadoop.hive.thrift.MemoryTokenStore", + "The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster."), METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_CONNECTSTR( - "hive.cluster.delegation.token.store.zookeeper.connectString", ""), + "hive.cluster.delegation.token.store.zookeeper.connectString", "", + "The ZooKeeper token store connect string."), METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ZNODE( - "hive.cluster.delegation.token.store.zookeeper.znode", "/hive/cluster/delegation"), + "hive.cluster.delegation.token.store.zookeeper.znode", "/hive/cluster/delegation", + "The root path for token store data."), METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ACL( - "hive.cluster.delegation.token.store.zookeeper.acl", ""), - METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"), - METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "BONECP"), - METASTORE_VALIDATE_TABLES("datanucleus.validateTables", false), - METASTORE_VALIDATE_COLUMNS("datanucleus.validateColumns", false), - METASTORE_VALIDATE_CONSTRAINTS("datanucleus.validateConstraints", false), - METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms"), - METASTORE_AUTO_CREATE_SCHEMA("datanucleus.autoCreateSchema", true), - METASTORE_FIXED_DATASTORE("datanucleus.fixedDatastore", false), - METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", false), - METASTORE_AUTO_START_MECHANISM_MODE("datanucleus.autoStartMechanismMode", "checked"), - METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed"), - METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false), - METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none"), - METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1"), - METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true), - METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG"), - METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300), + "hive.cluster.delegation.token.store.zookeeper.acl", "", + "ACL for token store entries. List comma separated all server principals for the cluster."), + METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order", + "List of comma separated metastore object types that should be pinned in the cache"), + METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "BONECP", + "Specify connection pool library for datanucleus"), + METASTORE_VALIDATE_TABLES("datanucleus.validateTables", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + METASTORE_VALIDATE_COLUMNS("datanucleus.validateColumns", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + METASTORE_VALIDATE_CONSTRAINTS("datanucleus.validateConstraints", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms", "metadata store type"), + METASTORE_AUTO_CREATE_SCHEMA("datanucleus.autoCreateSchema", true, + "creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once"), + METASTORE_FIXED_DATASTORE("datanucleus.fixedDatastore", false, ""), + METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", false, + "Enforce metastore schema version consistency.\n" + + "True: Verify that version information stored in metastore matches with one from Hive jars. Also disable automatic\n" + + " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" + + " proper metastore schema migration. (Default)\n" + + "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."), + METASTORE_AUTO_START_MECHANISM_MODE("datanucleus.autoStartMechanismMode", "checked", + "throw exception if metadata tables are incorrect"), + METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed", + "Default transaction isolation level for identity generation."), + METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false, + "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"), + METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none", ""), + METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1", + "Name of the identifier factory to use when generating table/column names etc. \n" + + "'datanucleus1' is used for backward compatibility with DataNucleus v1"), + METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""), + METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG", + "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"), + METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300, + "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" + + "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" + + "but it may also cause higher memory requirement at the client side."), METASTORE_BATCH_RETRIEVE_TABLE_PARTITION_MAX( - "hive.metastore.batch.retrieve.table.partition.max", 1000), - // A comma separated list of hooks which implement MetaStoreInitListener and will be run at - // the beginning of HMSHandler initialization - METASTORE_INIT_HOOKS("hive.metastore.init.hooks", ""), - METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", ""), - METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", ""), - // should we do checks against the storage (usually hdfs) for operations like drop_partition - METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false), - METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq",0L), - METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration",0L), - METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true), - METASTORE_PARTITION_NAME_WHITELIST_PATTERN( - "hive.metastore.partition.name.whitelist.pattern", ""), - // Whether to enable integral JDO pushdown. For partition columns storing integers - // in non-canonical form, (e.g. '012'), it may not work, so it's off by default. - METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false), - METASTORE_TRY_DIRECT_SQL("hive.metastore.try.direct.sql", true), - METASTORE_TRY_DIRECT_SQL_DDL("hive.metastore.try.direct.sql.ddl", true), - METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES( - "hive.metastore.disallow.incompatible.col.type.changes", false), + "hive.metastore.batch.retrieve.table.partition.max", 1000, + "Maximum number of table partitions that metastore internally retrieves in one batch."), - // Default parameters for creating tables - NEWTABLEDEFAULTPARA("hive.table.parameters.default", ""), - // Parameters to copy over when creating a table with Create Table Like. - DDL_CTL_PARAMETERS_WHITELIST("hive.ddl.createtablelike.properties.whitelist", ""), - METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", - "org.apache.hadoop.hive.metastore.ObjectStore"), - METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", - "org.apache.derby.jdbc.EmbeddedDriver"), + METASTORE_INIT_HOOKS("hive.metastore.init.hooks", "", + "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" + + "An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener."), + METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", "", + "List of comma separated listeners for metastore events."), + METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", "", ""), + METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false, + "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" + + "for operations like drop-partition (disallow the drop-partition if the user in\n" + + "question doesn't have permissions to delete the corresponding directory\n" + + "on the storage)."), + METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq", 0L, + "Frequency at which timer task runs to purge expired events in metastore(in seconds)."), + METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration", 0L, + "Duration after which events expire from events table (in seconds)"), + METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true, + "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" + + "the client's reported user and group permissions. Note that this property must be set on \n" + + "both the client and server sides. Further note that its best effort. \n" + + "If client sets its to true and server sets it to false, client setting will be ignored."), + METASTORE_PARTITION_NAME_WHITELIST_PATTERN("hive.metastore.partition.name.whitelist.pattern", "", + "Partition names will be checked against this regex pattern and rejected if not matched."), + + METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false, + "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" + + "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" + + "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" + + "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" + + "is also irrelevant."), + METASTORE_TRY_DIRECT_SQL("hive.metastore.try.direct.sql", true, ""), + METASTORE_TRY_DIRECT_SQL_DDL("hive.metastore.try.direct.sql.ddl", true, ""), + METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES( + "hive.metastore.disallow.incompatible.col.type.changes", false, + "If true (default is false), ALTER TABLE operations which change the type of \n" + + "a column (say STRING) to an incompatible type (say MAP<STRING, STRING>) are disallowed. \n" + + "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" + + "datatypes can be converted from string to any type. The map is also serialized as\n" + + "a string, which can be read as a string as well. However, with any binary \n" + + "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" + + "when subsequently trying to access old partitions. \n" + + "\n" + + "Primitive types like INT, STRING, BIGINT, etc are compatible with each other and are \n" + + "not blocked. \n" + + "\n" + + "See HIVE-4409 for more details."), + + NEWTABLEDEFAULTPARA("hive.table.parameters.default", "", + "Default property values for newly created tables"), + DDL_CTL_PARAMETERS_WHITELIST("hive.ddl.createtablelike.properties.whitelist", "", + "Table Properties to copy over when executing a Create Table Like."), + METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", + "Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. \n" + + "This class is used to store and retrieval of raw metadata objects such as table, database"), + METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver", + "Driver class name for a JDBC metastore"), METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass", - "org.datanucleus.api.jdo.JDOPersistenceManagerFactory"), + "org.datanucleus.api.jdo.JDOPersistenceManagerFactory", + "class implementing the jdo persistence"), METASTORE_EXPRESSION_PROXY_CLASS("hive.metastore.expression.proxy", - "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore"), - METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true), - METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true), - METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP"), - METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", ""), - METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties",""), + "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", ""), + METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true, + "Detaches all objects from session so that they can be used after transaction is committed"), + METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true, + "Reads outside of transactions"), + METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP", + "Username to use against metastore database"), + METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", "", + "List of comma separated listeners for the end of metastore functions."), + METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties", "", + "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" + + "* implies all the keys will get inherited."), // Parameters for exporting metadata on table drop (requires the use of the) // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener - METADATA_EXPORT_LOCATION("hive.metadata.export.location", ""), - MOVE_EXPORTED_METADATA_TO_TRASH("hive.metadata.move.exported.metadata.to.trash", true), + METADATA_EXPORT_LOCATION("hive.metadata.export.location", "", + "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + + "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" + + "metadata being exported to the current user's home directory on HDFS."), + MOVE_EXPORTED_METADATA_TO_TRASH("hive.metadata.move.exported.metadata.to.trash", true, + "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + + "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" + + "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."), // CLI - CLIIGNOREERRORS("hive.cli.errors.ignore", false), - CLIPRINTCURRENTDB("hive.cli.print.current.db", false), - CLIPROMPT("hive.cli.prompt", "hive"), - CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1), + CLIIGNOREERRORS("hive.cli.errors.ignore", false, ""), + CLIPRINTCURRENTDB("hive.cli.print.current.db", false, + "Whether to include the current database in the Hive prompt."), + CLIPROMPT("hive.cli.prompt", "hive", + "Command line prompt configuration value. Other hiveconf can be used in this configuration value. \n" + + "Variable substitution will only be invoked at the Hive CLI startup."), + CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1, + "The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.\n" + + "If the value of this property is -1, then Hive will use the auto-detected terminal width."), - HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl"), + HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""), // Things we log in the jobconf // session identifier - HIVESESSIONID("hive.session.id", ""), + HIVESESSIONID("hive.session.id", "", ""), // whether session is running in silent mode or not - HIVESESSIONSILENT("hive.session.silent", false), + HIVESESSIONSILENT("hive.session.silent", false, ""), - // Whether to enable history for this session - HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false), + HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false, + "Whether to log Hive query, query plan, runtime statistics etc."), - // query being executed (multiple per session) - HIVEQUERYSTRING("hive.query.string", ""), + HIVEQUERYSTRING("hive.query.string", "", + "Query being executed (might be multiple per a session)"), - // id of query being executed (multiple per session) - HIVEQUERYID("hive.query.id", ""), + HIVEQUERYID("hive.query.id", "", + "ID for query being executed (might be multiple per a session)"), - // id of the mapred plan being executed (multiple per query) - HIVEPLANID("hive.query.planid", ""), - // max jobname length - HIVEJOBNAMELENGTH("hive.jobname.length", 50), + HIVEJOBNAMELENGTH("hive.jobname.length", 50, "max jobname length"), // hive jar - HIVEJAR("hive.jar.path", ""), - HIVEAUXJARS("hive.aux.jars.path", ""), + HIVEJAR("hive.jar.path", "", ""), + HIVEAUXJARS("hive.aux.jars.path", "", ""), // hive added files and jars - HIVEADDEDFILES("hive.added.files.path", ""), - HIVEADDEDJARS("hive.added.jars.path", ""), - HIVEADDEDARCHIVES("hive.added.archives.path", ""), + HIVEADDEDFILES("hive.added.files.path", "", ""), + HIVEADDEDJARS("hive.added.jars.path", "", ""), + HIVEADDEDARCHIVES("hive.added.archives.path", "", ""), - HIVE_CURRENT_DATABASE("hive.current.database", ""), // internal usage only + HIVE_CURRENT_DATABASE("hive.current.database", "", "Database name used by current session. Internal usage only.", true), // for hive script operator - HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", 0), - HIVETABLENAME("hive.table.name", ""), - HIVEPARTITIONNAME("hive.partition.name", ""), - HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false), - HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"), - HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false), - HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"), - HIVEALIAS("hive.alias", ""), - HIVEMAPSIDEAGGREGATE("hive.map.aggr", true), - HIVEGROUPBYSKEW("hive.groupby.skewindata", false), - HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS("hive.optimize.multigroupby.common.distincts", - true), - HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000), - HIVEJOINCACHESIZE("hive.join.cache.size", 25000), + HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", 0, + "How long to run autoprogressor for the script/UDTF operators (in seconds).\n" + + "Set to 0 for forever."), + HIVETABLENAME("hive.table.name", "", ""), + HIVEPARTITIONNAME("hive.partition.name", "", ""), + HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false, + "Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker \n" + + "to avoid the task getting killed because of inactivity. Hive sends progress information when the script is \n" + + "outputting to stderr. This option removes the need of periodically producing stderr messages, \n" + + "but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker."), + HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID", + "Name of the environment variable that holds the unique script operator ID in the user's \n" + + "transform function (the custom mapper/reducer that the user has specified in the query)"), + HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false, + "Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)"), + HIVEMAPREDMODE("hive.mapred.mode", "nonstrict", + "The mode in which the Hive operations are being performed. \n" + + "In strict mode, some risky queries are not allowed to run. They include:\n" + + " Cartesian Product.\n" + + " No partition being picked up for a query.\n" + + " Comparing bigints and strings.\n" + + " Comparing bigints and doubles.\n" + + " Orderby without limit."), + HIVEALIAS("hive.alias", "", ""), + HIVEMAPSIDEAGGREGATE("hive.map.aggr", true, "Whether to use map-side aggregation in Hive Group By queries"), + HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"), + HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS("hive.optimize.multigroupby.common.distincts", true, + "Whether to optimize a multi-groupby query with the same distinct.\n" + + "Consider a query like:\n" + + "\n" + + " from src\n" + + " insert overwrite table dest1 select col1, count(distinct colx) group by col1\n" + + " insert overwrite table dest2 select col2, count(distinct colx) group by col2;\n" + + "\n" + + "With this parameter set to true, first we spray by the distinct value (colx), and then\n" + + "perform the 2 groups bys. This makes sense if map-side aggregation is turned off. However,\n" + + "with maps-side aggregation, it might be useful in some cases to treat the 2 inserts independently, \n" + + "thereby performing the query above in 2MR jobs instead of 3 (due to spraying by distinct key first).\n" + + "If this parameter is turned off, we don't consider the fact that the distinct key is the same across\n" + + "different MR jobs."), + HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000, + "How many rows in the right-most join operand Hive should buffer before emitting the join result."), + HIVEJOINCACHESIZE("hive.join.cache.size", 25000, + "How many rows in the joining tables (except the streaming table) should be cached in memory."), // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, // need to remove by hive .13. Also, do not change default (see SMB operator) - HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100), - HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true), - HIVEMAPJOINUSEOPTIMIZEDKEYS("hive.mapjoin.optimized.keys", true), - HIVEMAPJOINLAZYHASHTABLE("hive.mapjoin.lazy.hashtable", true), - HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024), - - HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000), - HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000), - HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5), - HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3), - HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9), - HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5), - HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true), - HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false), - HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false), - HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false), - HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30), + HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""), + HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true, + "Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez,\n" + + "because memory-optimized hashtable cannot be serialized."), + HIVEMAPJOINUSEOPTIMIZEDKEYS("hive.mapjoin.optimized.keys", true, + "Whether MapJoin hashtable should use optimized (size-wise), keys, allowing the table to take less\n" + + "memory. Depending on key, the memory savings for entire table can be 5-15% or so."), + HIVEMAPJOINLAZYHASHTABLE("hive.mapjoin.lazy.hashtable", true, + "Whether MapJoin hashtable should deserialize values on demand. Depending on how many values in\n" + + "the table the join will actually touch, it can save a lot of memory by not creating objects for\n" + + "rows that are not needed. If all rows are needed obviously there's no gain."), + HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024, + "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" + + "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" + + "joins unnecessary memory will be allocated and then trimmed."), + + HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, + "How many rows with the same key value should be cached in memory per smb joined table."), + HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, + "Number of rows after which size of the grouping keys/aggregation classes is performed"), + HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5, + "Portion of total memory to be used by map-side group aggregation hash table"), + HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3, + "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"), + HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9, + "The max memory to be used by map-side group aggregation hash table.\n" + + "If the memory usage is higher than this number, force to flush data"), + HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5, + "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" + + "Set to 1 to make sure hash aggregation is never turned off."), + HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, + "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + + "common group by keys, it will be optimized to generate single M/R job."), + HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false, + "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + + "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + + "is that it limits the number of mappers to the number of files."), + HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false, + "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + + "the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan\n" + + "is not converted, but a query property is set to denote the same."), + HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, + "Whether to enable using Column Position Alias in Group By or Order By"), + HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, + "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" + + "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" + + "4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).\n" + + "This can lead to explosion across map-reduce boundary if the cardinality of T is very high,\n" + + "and map-side aggregation does not do a very good job. \n" + + "\n" + + "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" + + "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" + + "assumption that the original group by will reduce the data size."), - // for hive udtf operator - HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false), - // Default file format for CREATE TABLE statement - // Options: TextFile, SequenceFile - HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", - new StringsValidator("TextFile", "SequenceFile", "RCfile", "ORC")), - HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile", - new StringsValidator("TextFile", "SequenceFile", "RCfile")), - HIVECHECKFILEFORMAT("hive.fileformat.check", true), + // for hive udtf operator + HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false, + "Whether Hive should automatically send progress information to TaskTracker \n" + + "when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious \n" + + "because this may prevent TaskTracker from killing tasks with infinite loops."), + + HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC"), + "Default file format for CREATE TABLE statement. \n" + + "Options are TextFile, SequenceFile, RCfile and ORC. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"), + HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile"), + "Default file format for storing result of the query. Allows TextFile, SequenceFile and RCfile"), + HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"), // default serde for rcfile HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde", - "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"), - - SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema","org.apache.hadoop.hive.ql.io.orc.OrcSerde," - + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," - + "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe,org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," - + "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe,org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," - + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"), - //Location of Hive run time structured log file - HIVEHISTORYFILELOC("hive.querylog.location", System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name")), + "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe", + "The default SerDe Hive will use for the RCFile format"), - // Whether to log the plan's progress every time a job's progress is checked - HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true), + HIVEDEFAULTSERDE("hive.default.serde", + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "The default SerDe Hive will use for storage formats that do not specify a SerDe."), + + SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema", + "org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + + "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team"), + + HIVEHISTORYFILELOC("hive.querylog.location", + "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", + "Location of Hive run time structured log file"), + + HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true, + "Whether to log the plan's progress every time a job's progress is checked.\n" + + "These logs are written to the location specified by hive.querylog.location"), + + HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", 60000L, + "The interval to wait between logging the plan's progress in milliseconds.\n" + + "If there is a whole number percentage change in the progress of the mappers or the reducers,\n" + + "the progress is logged regardless of this value.\n" + + "The actual interval will be the ceiling of (this value divided by the value of\n" + + "hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval\n" + + "I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be\n" + + "logged less frequently than specified.\n" + + "This only has an effect if hive.querylog.enable.plan.progress is set to true."), - // The interval between logging the plan's progress in milliseconds - HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", 60000L), - - // Default serde and record reader for user scripts - HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"), + HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "The default SerDe for transmitting input data to and reading output data from the user scripts. "), HIVESCRIPTRECORDREADER("hive.script.recordreader", - "org.apache.hadoop.hive.ql.exec.TextRecordReader"), + "org.apache.hadoop.hive.ql.exec.TextRecordReader", + "The default record reader for reading data from the user scripts. "), HIVESCRIPTRECORDWRITER("hive.script.recordwriter", - "org.apache.hadoop.hive.ql.exec.TextRecordWriter"), - HIVESCRIPTESCAPE("hive.transform.escape.input", false), - HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000 ), + "org.apache.hadoop.hive.ql.exec.TextRecordWriter", + "The default record writer for writing data to the user scripts. "), + HIVESCRIPTESCAPE("hive.transform.escape.input", false, + "This adds an option to escape special chars (newlines, carriage returns and\n" + + "tabs) when they are passed to the user script. This is useful if the Hive tables\n" + + "can contain data that contains special characters."), + HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000, + "Read from a binary stream and treat each hive.binary.record.max.length bytes as a record. \n" + + "The last record before the end of stream can have less than hive.binary.record.max.length bytes"), // HWI - HIVEHWILISTENHOST("hive.hwi.listen.host", "0.0.0.0"), - HIVEHWILISTENPORT("hive.hwi.listen.port", "9999"), - HIVEHWIWARFILE("hive.hwi.war.file", System.getenv("HWI_WAR_FILE")), + HIVEHWILISTENHOST("hive.hwi.listen.host", "0.0.0.0", "This is the host address the Hive Web Interface will listen on"), + HIVEHWILISTENPORT("hive.hwi.listen.port", "9999", "This is the port the Hive Web Interface will listen on"), + HIVEHWIWARFILE("hive.hwi.war.file", "${system:HWI_WAR_FILE}", + "This sets the path to the HWI war file, relative to ${HIVE_HOME}. "), - // mapper/reducer memory in local mode - HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0), + HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0, "mapper/reducer memory in local mode"), //small table file size - HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize",25000000L), //25M + HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize", 25000000L, + "The threshold for the input file size of the small tables; if the file size is smaller \n" + + "than this threshold, it will try to convert the common join into map join"), - // random number for split sampling - HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0), + HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0, + "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."), // test mode in hive mode - HIVETESTMODE("hive.test.mode", false), - HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_"), - HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32), - HIVETESTMODENOSAMPLE("hive.test.mode.nosamplelist", ""), - HIVETESTMODEDUMMYSTATAGGR("hive.test.dummystats.aggregator", ""), // internal variable - HIVETESTMODEDUMMYSTATPUB("hive.test.dummystats.publisher", ""), // internal variable - - HIVEMERGEMAPFILES("hive.merge.mapfiles", true), - HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false), - HIVEMERGETEZFILES("hive.merge.tezfiles", false), - HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000)), - HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000)), - HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true), + HIVETESTMODE("hive.test.mode", false, + "Whether Hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename."), + HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_", + "In test mode, specfies prefixes for the output table"), + HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32, + "In test mode, specfies sampling frequency for table, which is not bucketed,\n" + + "For example, the following query:\n" + + " INSERT OVERWRITE TABLE dest SELECT col1 from src\n" + + "would be converted to\n" + + " INSERT OVERWRITE TABLE test_dest\n" + + " SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1))"), + HIVETESTMODENOSAMPLE("hive.test.mode.nosamplelist", "", + "In test mode, specifies comma separated table names which would not apply sampling"), + HIVETESTMODEDUMMYSTATAGGR("hive.test.dummystats.aggregator", "", "internal variable for test"), + HIVETESTMODEDUMMYSTATPUB("hive.test.dummystats.publisher", "", "internal variable for test"), + + HIVEMERGEMAPFILES("hive.merge.mapfiles", true, + "Merge small files at the end of a map-only job"), + HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false, + "Merge small files at the end of a map-reduce job"), + HIVEMERGETEZFILES("hive.merge.tezfiles", false, "Merge small files at the end of a Tez DAG"), + HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000), + "Size of merged files at the end of the job"), + HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000), + "When the average output file size of a job is less than this number, Hive will start an additional \n" + + "map-reduce job to merge the output files into bigger files. This is only done for map-only jobs \n" + + "if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true."), + HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true, ""), HIVEMERGEINPUTFORMATBLOCKLEVEL("hive.merge.input.format.block.level", - "org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat"), + "org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat", ""), HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS( - "hive.merge.current.job.has.dynamic.partitions", false), - - HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true), - HIVEUSERCFILESYNCCACHE("hive.exec.rcfile.use.sync.cache", true), + "hive.merge.current.job.has.dynamic.partitions", false, ""), - HIVE_RCFILE_RECORD_INTERVAL("hive.io.rcfile.record.interval", Integer.MAX_VALUE), - HIVE_RCFILE_COLUMN_NUMBER_CONF("hive.io.rcfile.column.number.conf", 0), - HIVE_RCFILE_TOLERATE_CORRUPTIONS("hive.io.rcfile.tolerate.corruptions", false), - HIVE_RCFILE_RECORD_BUFFER_SIZE("hive.io.rcfile.record.buffer.size", 4194304), // 4M - - // Maximum fraction of heap that can be used by ORC file writers - HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f), // 50% - // Define the version of the file to write - HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", null), - // Define the default ORC stripe size + HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true, + "If this is set the header for RCFiles will simply be RCF. If this is not\n" + + "set the header will be that borrowed from sequence files, e.g. SEQ- followed\n" + + "by the input and output RCFile formats."), + HIVEUSERCFILESYNCCACHE("hive.exec.rcfile.use.sync.cache", true, ""), + + HIVE_RCFILE_RECORD_INTERVAL("hive.io.rcfile.record.interval", Integer.MAX_VALUE, ""), + HIVE_RCFILE_COLUMN_NUMBER_CONF("hive.io.rcfile.column.number.conf", 0, ""), + HIVE_RCFILE_TOLERATE_CORRUPTIONS("hive.io.rcfile.tolerate.corruptions", false, ""), + HIVE_RCFILE_RECORD_BUFFER_SIZE("hive.io.rcfile.record.buffer.size", 4194304, ""), // 4M + + HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f, + "Maximum fraction of heap that can be used by ORC file writers"), + HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", null, + "Define the version of the file to write"), HIVE_ORC_DEFAULT_STRIPE_SIZE("hive.exec.orc.default.stripe.size", - 64L * 1024 * 1024), - // Define the default file system block size for ORC - HIVE_ORC_DEFAULT_BLOCK_SIZE("hive.exec.orc.default.block.size", - 256L * 1024 * 1024), - HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD( - "hive.exec.orc.dictionary.key.size.threshold", 0.8f), - // Define the default ORC index stride - HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride" - , 10000), - // Define the default ORC buffer size - HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024), - // Define the default block padding - HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", - true), - // Define the tolerance for block padding. The total padded length will - // always be less than the specified percentage. - HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f), - // Define the default compression codec for ORC file - HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", "ZLIB"), - // Define the default encoding strategy to use - HIVE_ORC_ENCODING_STRATEGY("hive.exec.orc.encoding.strategy", "SPEED", - new StringsValidator("SPEED", "COMPRESSION")), - HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false), - HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000), - HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10), - HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false), - - HIVE_ORC_ZEROCOPY("hive.exec.orc.zerocopy", false), - - // Whether extended literal set is allowed for LazySimpleSerde. - HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false), - - HIVESKEWJOIN("hive.optimize.skewjoin", false), - HIVECONVERTJOIN("hive.auto.convert.join", true), - HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true), + 64L * 1024 * 1024, + "Define the default ORC stripe size"), + HIVE_ORC_DEFAULT_BLOCK_SIZE("hive.exec.orc.default.block.size", 256L * 1024 * 1024, + "Define the default file system block size for ORC files."), + + HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f, + "If the number of keys in a dictionary is greater than this fraction of the total number of\n" + + "non-null rows, turn off dictionary encoding. Use 1 to always use dictionary encoding."), + HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", 10000, "Define the default ORC index stride"), + HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024, "Define the default ORC buffer size"), + HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true, "Define the default block padding"), + HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f, + "Define the tolerance for block padding as a percentage of stripe size.\n" + + "For the defaults of 64Mb ORC stripe and 256Mb HDFS blocks, a maximum of 3.2Mb will be reserved for padding within the 256Mb block. \n" + + "In that case, if the available size within the block is more than 3.2Mb, a new smaller stripe will be inserted to fit within that space. \n" + + "This will make sure that no stripe written will cross block boundaries and cause remote reads within a node local task."), + HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", "ZLIB", "Define the default compression codec for ORC file"), + + HIVE_ORC_ENCODING_STRATEGY("hive.exec.orc.encoding.strategy", "SPEED", new StringSet("SPEED", "COMPRESSION"), + "Define the encoding strategy to use while writing data. Changing this will\n" + + "only affect the light weight encoding for integers. This flag will not\n" + + "change the compression level of higher level compression codec (like ZLIB).\n" + + "Possible options are SPEED and COMPRESSION."), + + HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false, + "If turned on splits generated by orc will include metadata about the stripes in the file. This\n" + + "data is read remotely (from the client or HS2 machine) and sent to all the tasks."), + HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000, + "Cache size for keeping meta info about orc splits cached in the client."), + HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10, + "How many threads orc should use to create splits in parallel."), + HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false, + "If ORC reader encounters corrupt data, this value will be used to determine\n" + + "whether to skip the corrupt data or throw exception. The default behavior is to throw exception."), + + HIVE_ORC_ZEROCOPY("hive.exec.orc.zerocopy", false, "Use zerocopy reads with ORC."), + + HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, + "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + + "'1', and '0' as extened, legal boolean literal, in addition to 'TRUE' and 'FALSE'.\n" + + "The default is false, which means only 'TRUE' and 'FALSE' are treated as legal\n" + + "boolean literal."), + + HIVESKEWJOIN("hive.optimize.skewjoin", false, + "Whether to enable skew join optimization. \n" + + "The algorithm is as follows: At runtime, detect the keys with a large skew. Instead of\n" + + "processing those keys, store them temporarily in an HDFS directory. In a follow-up map-reduce\n" + + "job, process those skewed keys. The same key need not be skewed for all the tables, and so,\n" + + "the follow-up map-reduce job (for the skewed keys) would be much faster, since it would be a\n" + + "map-join."), + HIVECONVERTJOIN("hive.auto.convert.join", true, + "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size"), + HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true, + "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" + + "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" + + "specified size, the join is directly converted to a mapjoin (there is no conditional task)."), + HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", - 10000000L), - HIVECONVERTJOINUSENONSTAGED("hive.auto.convert.join.use.nonstaged", false), - HIVESKEWJOINKEY("hive.skewjoin.key", 100000), - HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000), - HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L), //32M - - HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000), - HIVELIMITMAXROWSIZE("hive.limit.row.max.size", 100000L), - HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10), - HIVELIMITOPTENABLE("hive.limit.optimize.enable", false), - HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000), - HIVELIMITPUSHDOWNMEMORYUSAGE("hive.limit.pushdown.memory.usage", -1f), - HIVELIMITTABLESCANPARTITION("hive.limit.query.max.table.partition", -1), - - HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000), - HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75), - HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55), - HIVEHASHTABLEMAXMEMORYUSAGE("hive.mapjoin.localtask.max.memory.usage", (float) 0.90), - HIVEHASHTABLESCALE("hive.mapjoin.check.memory.rows", (long)100000), - - HIVEDEBUGLOCALTASK("hive.debug.localtask",false), - - HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"), - HIVETEZINPUTFORMAT("hive.tez.input.format", "org.apache.hadoop.hive.ql.io.HiveInputFormat"), - - HIVETEZCONTAINERSIZE("hive.tez.container.size", -1), - HIVETEZJAVAOPTS("hive.tez.java.opts", null), - HIVETEZLOGLEVEL("hive.tez.log.level", "INFO"), - - HIVEENFORCEBUCKETING("hive.enforce.bucketing", false), - HIVEENFORCESORTING("hive.enforce.sorting", false), - HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true), - HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner"), - HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false), - HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false), + 10000000L, + "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + + "However, if it is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size, \n" + + "the join is directly converted to a mapjoin(there is no conditional task). The default is 10MB"), + HIVECONVERTJOINUSENONSTAGED("hive.auto.convert.join.use.nonstaged", false, + "For conditional joins, if input stream from a small alias can be directly applied to join operator without \n" + + "filtering or projection, the alias need not to be pre-staged in distributed cache via mapred local task.\n" + + "Currently, this is not working with vectorization or tez execution engine."), + HIVESKEWJOINKEY("hive.skewjoin.key", 100000, + "Determine if we get a skew key in join. If we see more than the specified number of rows with the same key in join operator,\n" + + "we think the key as a skew join key. "), + HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000, + "Determine the number of map task used in the follow up map join job for a skew join.\n" + + "It should be used together with hive.skewjoin.mapjoin.min.split to perform a fine grained control."), + HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L, + "Determine the number of map task at most used in the follow up map join job for a skew join by specifying \n" + + "the minimum split size. It should be used together with hive.skewjoin.mapjoin.map.tasks to perform a fine grained control."), + + HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000, + "Send a heartbeat after this interval - used by mapjoin and filter operators"), + HIVELIMITMAXROWSIZE("hive.limit.row.max.size", 100000L, + "When trying a smaller subset of data for simple LIMIT, how much size we need to guarantee each row to have at least."), + HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10, + "When trying a smaller subset of data for simple LIMIT, maximum number of files we can sample."), + HIVELIMITOPTENABLE("hive.limit.optimize.enable", false, + "Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first."), + HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000, + "Maximum number of rows allowed for a smaller subset of data for simple LIMIT, if it is a fetch query. \n" + + "Insert queries are not restricted by this limit."), + HIVELIMITPUSHDOWNMEMORYUSAGE("hive.limit.pushdown.memory.usage", -1f, + "The max memory to be used for hash in RS operator for top K selection."), + HIVELIMITTABLESCANPARTITION("hive.limit.query.max.table.partition", -1, + "This controls how many partitions can be scanned for each partitioned table.\n" + + "The default value \"-1\" means no limit."), + + HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000, ""), + HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75, ""), + HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55, + "This number means how much memory the local task can take to hold the key/value into an in-memory hash table \n" + + "when this map join is followed by a group by. If the local task's memory usage is more than this number, \n" + + "the local task will abort by itself. It means the data of the small table is too large to be held in memory."), + HIVEHASHTABLEMAXMEMORYUSAGE("hive.mapjoin.localtask.max.memory.usage", (float) 0.90, + "This number means how much memory the local task can take to hold the key/value into an in-memory hash table. \n" + + "If the local task's memory usage is more than this number, the local task will abort by itself. \n" + + "It means the data of the small table is too large to be held in memory."), + HIVEHASHTABLESCALE("hive.mapjoin.check.memory.rows", (long)100000, + "The number means after how many rows processed it needs to check the memory usage"), + + HIVEDEBUGLOCALTASK("hive.debug.localtask",false, ""), + + HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat", + "The default input format. Set this to HiveInputFormat if you encounter problems with CombineHiveInputFormat."), + HIVETEZINPUTFORMAT("hive.tez.input.format", "org.apache.hadoop.hive.ql.io.HiveInputFormat", + "The default input format for tez. Tez groups splits in the AM."), + + HIVETEZCONTAINERSIZE("hive.tez.container.size", -1, + "By default Tez will spawn containers of the size of a mapper. This can be used to overwrite."), + HIVETEZJAVAOPTS("hive.tez.java.opts", null, + "By default Tez will use the Java options from map tasks. This can be used to overwrite."), + HIVETEZLOGLEVEL("hive.tez.log.level", "INFO", + "The log level to use for tasks executing as part of the DAG.\n" + + "Used only if hive.tez.java.opts is used to configure Java options."), + + HIVEENFORCEBUCKETING("hive.enforce.bucketing", false, + "Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced."), + HIVEENFORCESORTING("hive.enforce.sorting", false, + "Whether sorting is enforced. If true, while inserting into the table, sorting is enforced."), + HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true, + "If hive.enforce.bucketing or hive.enforce.sorting is true, don't create a reducer for enforcing \n" + + "bucketing/sorting for queries of the form: \n" + + "insert overwrite table T2 select * from T1;\n" + + "where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets."), + HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner", ""), + HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false, + "If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ?"), + HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false, + "If the user asked for bucketed map-side join, and it cannot be performed, \n" + + "should the query fail or not ? For example, if the buckets in the tables being joined are\n" + + "not a multiple of each other, bucketed map-side join cannot be performed, and the\n" + + "query will fail if hive.enforce.bucketmapjoin is set to true."), - HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false), + HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false, + "Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."), HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR( "hive.auto.convert.sortmerge.join.bigtable.selection.policy", - "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ"), + "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ", + "The policy to choose the big table for automatic conversion to sort-merge join. \n" + + "By default, the table with the largest partitions is assigned the big table. All policies are:\n" + + ". based on position of the table - the leftmost table is selected\n" + + "org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.\n" + + ". based on total size (all the partitions selected in the query) of the table \n" + + "org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.\n" + + ". based on average size (all the partitions selected in the query) of the table \n" + + "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.\n" + + "New policies can be added in future."), HIVE_AUTO_SORTMERGE_JOIN_TOMAPJOIN( - "hive.auto.convert.sortmerge.join.to.mapjoin", false), - - HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false), - HIVEROWOFFSET("hive.exec.rowoffset", false), + "hive.auto.convert.sortmerge.join.to.mapjoin", false, + "If hive.auto.convert.sortmerge.join is set to true, and a join was converted to a sort-merge join, \n" + + "this parameter decides whether each table should be tried as a big table, and effectively a map-join should be\n" + + "tried. That would create a conditional task with n+1 children for a n-way join (1 child for each table as the\n" + + "big table), and the backup task will be the sort-merge join. In some cases, a map-join would be faster than a\n" + + "sort-merge join, if there is no advantage of having the output bucketed and sorted. For example, if a very big sorted\n" + + "and bucketed table with few files (say 10 files) are being joined with a very small sorter and bucketed table\n" + + "with few files (10 files), the sort-merge join will only use 10 mappers, and a simple map-only join might be faster\n" + + "if the complete small table can fit in memory, and a map-join can be performed."), + + HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false, ""), + HIVEROWOFFSET("hive.exec.rowoffset", false, + "Whether to provide the row offset virtual column"), - HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE("hive.hadoop.supports.splittable.combineinputformat", false), + HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE("hive.hadoop.supports.splittable.combineinputformat", false, ""), // Optimizer - HIVEOPTINDEXFILTER("hive.optimize.index.filter", false), // automatically use indexes - HIVEINDEXAUTOUPDATE("hive.optimize.index.autoupdate", false), //automatically update stale indexes - HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown - HIVEPPDRECOGNIZETRANSITIVITY("hive.ppd.recognizetransivity", true), // predicate pushdown - HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true), - HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", true), - // push predicates down to storage handlers - HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true), - HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by - HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join - HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join - HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), - HIVEOPTREDUCEDEDUPLICATIONMINREDUCER("hive.optimize.reducededuplication.min.reducer", 4), - // when enabled dynamic partitioning column will be globally sorted. - // this way we can keep only one record writer open for each partition value
[... 1444 lines stripped ...]
