http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/hive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/hive-site.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/hive-site.xml new file mode 100755 index 0000000..5f2bc18 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/hive-site.xml @@ -0,0 +1,1111 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> + +<configuration supports_final="true"> + + <property> + <name>hive.cbo.enable</name> + <value>true</value> + <description>Flag to control enabling Cost Based Optimizations using Calcite framework.</description> + </property> + + <property> + <name>hive.zookeeper.quorum</name> + <value>localhost:2181</value> + <description> + List of ZooKeeper servers to talk to. This is needed for: 1. + Read/write locks - when hive.lock.manager is set to + org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager, + 2. When HiveServer2 supports service discovery via Zookeeper. + </description> + <value-attributes> + <type>multiLine</type> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + </property> + + <property> + <name>hive.metastore.connect.retries</name> + <value>24</value> + <description>Number of retries while opening a connection to metastore</description> + </property> + + <property> + <name>hive.metastore.failure.retries</name> + <value>24</value> + <description>Number of retries upon failure of Thrift metastore calls</description> + </property> + + <property> + <name>hive.metastore.client.connect.retry.delay</name> + <value>5s</value> + <description> + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not specified. + Number of seconds for the client to wait between consecutive connection attempts + </description> + </property> + + <property> + <name>hive.metastore.client.socket.timeout</name> + <value>60</value> + <description> + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not specified. + MetaStore Client socket timeout in seconds + </description> + </property> + + <property> + <name>hive.mapjoin.bucket.cache.size</name> + <value>10000</value> + <description> + Size per reducer.The default is 1G, i.e if the input size is 10G, it + will use 10 reducers. + </description> + </property> + + <property> + <name>hive.security.authorization.manager</name> + <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value> + <description> + The Hive client authorization manager class name. The user defined authorization class should implement + interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider. + </description> + </property> + + <property> + <name>hive.cluster.delegation.token.store.class</name> + <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value> + <description>The delegation token store implementation. + Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster.</description> + </property> + + <property> + <name>hive.cluster.delegation.token.store.zookeeper.connectString</name> + <value>localhost:2181</value> + <description>The ZooKeeper token store connect string.</description> + </property> + + <property> + <name>hive.server2.support.dynamic.service.discovery</name> + <value>true</value> + <description>Whether HiveServer2 supports dynamic service discovery for its clients. + To support this, each instance of HiveServer2 currently uses ZooKeeper to register itself, + when it is brought up. JDBC/ODBC clients should use the ZooKeeper ensemble: hive.zookeeper.quorum + in their connection string. + </description> + <value-attributes> + <type>boolean</type> + </value-attributes> + </property> + + <property> + <name>fs.hdfs.impl.disable.cache</name> + <value>true</value> + <description>Disable HDFS filesystem cache.</description> + </property> + + <property> + <name>fs.file.impl.disable.cache</name> + <value>true</value> + <description>Disable local filesystem cache.</description> + </property> + + <property> + <name>hive.exec.scratchdir</name> + <value>/tmp/hive</value> + <description>HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with ${hive.scratch.dir.permission}.</description> + </property> + + <property> + <name>hive.exec.submitviachild</name> + <value>false</value> + <description/> + </property> + + <property> + <name>hive.exec.submit.local.task.via.child</name> + <value>true</value> + <description> + Determines whether local tasks (typically mapjoin hashtable generation phase) runs in + separate JVM (true recommended) or not. + Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues. + </description> + </property> + + <property> + <name>hive.exec.compress.output</name> + <value>false</value> + <description> + This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. + The compression codec and other options are determined from Hadoop config variables mapred.output.compress* + </description> + </property> + + <property> + <name>hive.exec.compress.intermediate</name> + <value>false</value> + <description> + This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. + The compression codec and other options are determined from Hadoop config variables mapred.output.compress* + </description> + </property> + + <property> + <name>hive.exec.reducers.bytes.per.reducer</name> + <value>67108864</value> + <description>size per reducer.The default is 256Mb, i.e if the input size is 1G, it will use 4 reducers.</description> + </property> + + <property> + <name>hive.exec.reducers.max</name> + <value>1009</value> + <description> + max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is + negative, Hive will use this one as the max number of reducers when automatically determine number of reducers. + </description> + </property> + + <property> + <name>hive.exec.pre.hooks</name> + <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value> + <description> + Comma-separated list of pre-execution hooks to be invoked for each statement. + A pre-execution hook is specified as the name of a Java class which implements the + org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. + </description> + </property> + + <property> + <name>hive.exec.post.hooks</name> + <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value> + <description> + Comma-separated list of post-execution hooks to be invoked for each statement. + A post-execution hook is specified as the name of a Java class which implements the + org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. + </description> + </property> + + <property> + <name>hive.exec.failure.hooks</name> + <value>org.apache.hadoop.hive.ql.hooks.ATSHook</value> + <description> + Comma-separated list of on-failure hooks to be invoked for each statement. + An on-failure hook is specified as the name of Java class which implements the + org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. + </description> + </property> + + <property> + <name>hive.exec.parallel</name> + <value>false</value> + <description>Whether to execute jobs in parallel</description> + </property> + + <property> + <name>hive.exec.parallel.thread.number</name> + <value>8</value> + <description>How many jobs at most can be executed in parallel</description> + </property> + + <property> + <name>hive.mapred.reduce.tasks.speculative.execution</name> + <value>false</value> + <description>Whether speculative execution for reducers should be turned on. </description> + </property> + + <property> + <name>hive.exec.dynamic.partition</name> + <value>true</value> + <description>Whether or not to allow dynamic partitions in DML/DDL.</description> + </property> + + <property> + <name>hive.exec.dynamic.partition.mode</name> + <value>nonstrict</value> + <description> + In strict mode, the user must specify at least one static partition + in case the user accidentally overwrites all partitions. + NonStrict allows all partitions of a table to be dynamic. + </description> + </property> + + <property> + <name>hive.exec.max.dynamic.partitions</name> + <value>5000</value> + <description>Maximum number of dynamic partitions allowed to be created in total.</description> + </property> + + <property> + <name>hive.exec.max.dynamic.partitions.pernode</name> + <value>2000</value> + <description>Maximum number of dynamic partitions allowed to be created in each mapper/reducer node.</description> + </property> + + <property> + <name>hive.exec.max.created.files</name> + <value>100000</value> + <description>Maximum number of HDFS files created by all mappers/reducers in a MapReduce job.</description> + </property> + + <property> + <name>hive.metastore.warehouse.dir</name> + <value>/apps/hive/warehouse</value> + <description>location of default database for the warehouse</description> + </property> + + <property> + <name>hive.metastore.uris</name> + <value>thrift://localhost:9083</value> + <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description> + </property> + + <property require-input = "true"> + <name>javax.jdo.option.ConnectionPassword</name> + <display-name>Database Password</display-name> + <value></value> + <property-type>PASSWORD</property-type> + <description>password to use against metastore database</description> + <value-attributes> + <overridable>false</overridable> + </value-attributes> + </property> + + <property> + <name>javax.jdo.option.ConnectionURL</name> + <display-name>Database URL</display-name> + <value>jdbc:mysql://localhost/hive?createDatabaseIfNotExist=true</value> + <description>JDBC connect string for a JDBC metastore</description> + <value-attributes> + <overridable>false</overridable> + </value-attributes> + </property> + + <property> + <name>hive.metastore.server.max.threads</name> + <value>100000</value> + <description>Maximum number of worker threads in the Thrift server's pool.</description> + </property> + + <property> + <name>hive.metastore.kerberos.keytab.file</name> + <value>/etc/security/keytabs/hive.service.keytab</value> + <description>The path to the Kerberos Keytab file containing the metastore Thrift server's service principal.</description> + </property> + + <property> + <name>hive.metastore.kerberos.principal</name> + <value>hive/_h...@example.com</value> + <description> + The service principal for the metastore Thrift server. + The special string _HOST will be replaced automatically with the correct host name. + </description> + </property> + + <property> + <name>hive.cluster.delegation.token.store.zookeeper.znode</name> + <value>/hive/cluster/delegation</value> + <description>The root path for token store data.</description> + </property> + + <property> + <name>hive.metastore.cache.pinobjtypes</name> + <value>Table,Database,Type,FieldSchema,Order</value> + <description>List of comma separated metastore object types that should be pinned in the cache</description> + </property> + + <property> + <name>hive.metastore.pre.event.listeners</name> + <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value> + <description>Pre-event listener classes to be loaded on the metastore side to run code + whenever databases, tables, and partitions are created, altered, or dropped. + Set to org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener + if metastore-side authorization is desired.</description> + </property> + + <property> + <name>hive.metastore.authorization.storage.checks</name> + <value>false</value> + <description> + Should the metastore do authorization checks against the underlying storage (usually hdfs) + for operations like drop-partition (disallow the drop-partition if the user in + question doesn't have permissions to delete the corresponding directory + on the storage). + </description> + </property> + + <property> + <name>javax.jdo.option.ConnectionDriverName</name> + <display-name>JDBC Driver Class</display-name> + <value>com.mysql.jdbc.Driver</value> + <description>Driver class name for a JDBC metastore</description> + <value-attributes> + <overridable>false</overridable> + </value-attributes> + </property> + + <property> + <name>javax.jdo.option.ConnectionUserName</name> + <display-name>Database Username</display-name> + <value>hive</value> + <description>Username to use against metastore database</description> + <value-attributes> + <type>db_user</type> + <overridable>false</overridable> + </value-attributes> + </property> + + <property> + <name>hive.map.aggr</name> + <value>true</value> + <description>Whether to use map-side aggregation in Hive Group By queries</description> + </property> + + <property> + <name>hive.smbjoin.cache.rows</name> + <value>10000</value> + <description>How many rows with the same key value should be cached in memory per smb joined table.</description> + </property> + + <property> + <name>hive.map.aggr.hash.percentmemory</name> + <value>0.5</value> + <description>Portion of total memory to be used by map-side group aggregation hash table</description> + </property> + + <property> + <name>hive.map.aggr.hash.force.flush.memory.threshold</name> + <value>0.9</value> + <description> + The max memory to be used by map-side group aggregation hash table. + If the memory usage is higher than this number, force to flush data + </description> + </property> + + <property> + <name>hive.map.aggr.hash.min.reduction</name> + <value>0.5</value> + <description> + Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. + Set to 1 to make sure hash aggregation is never turned off. + </description> + </property> + + <property> + <name>hive.merge.mapfiles</name> + <value>true</value> + <description>Merge small files at the end of a map-only job</description> + </property> + + <property> + <name>hive.merge.mapredfiles</name> + <value>false</value> + <description>Merge small files at the end of a map-reduce job</description> + </property> + + <property> + <name>hive.merge.size.per.task</name> + <value>256000000</value> + <description>Size of merged files at the end of the job</description> + </property> + + <property> + <name>hive.merge.smallfiles.avgsize</name> + <value>16000000</value> + <description> + When the average output file size of a job is less than this number, Hive will start an additional + map-reduce job to merge the output files into bigger files. This is only done for map-only jobs + if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true. + </description> + </property> + + <property> + <name>hive.merge.rcfile.block.level</name> + <value>true</value> + <description/> + </property> + + <property> + <name>hive.merge.orcfile.stripe.level</name> + <value>true</value> + <description> + When hive.merge.mapfiles or hive.merge.mapredfiles is enabled while writing a + table with ORC file format, enabling this config will do stripe level fast merge + for small ORC files. Note that enabling this config will not honor padding tolerance + config (hive.exec.orc.block.padding.tolerance). + </description> + </property> + + <property> + <name>hive.exec.orc.default.stripe.size</name> + <value>67108864</value> + <description>Define the default ORC stripe size</description> + </property> + + <property> + <name>hive.exec.orc.default.compress</name> + <value>ZLIB</value> + <description>Define the default compression codec for ORC file</description> + </property> + + <property> + <name>hive.exec.orc.compression.strategy</name> + <value>SPEED</value> + <description> + Expects one of [speed, compression]. + Define the compression strategy to use while writing data. + This changes the compression level of higher level compression codec (like ZLIB). + </description> + </property> + + <property> + <name>hive.orc.splits.include.file.footer</name> + <value>false</value> + <description> + If turned on splits generated by orc will include metadata about the stripes in the file. This + data is read remotely (from the client or HS2 machine) and sent to all the tasks. + </description> + </property> + + <property> + <name>hive.orc.compute.splits.num.threads</name> + <value>10</value> + <description>How many threads orc should use to create splits in parallel.</description> + </property> + + <property> + <name>hive.auto.convert.join</name> + <value>true</value> + <description>Whether Hive enables the optimization about converting common join into mapjoin based on the input file size</description> + </property> + <property> + <name>hive.auto.convert.join.noconditionaltask</name> + <value>true</value> + <description> + Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. + If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the + specified size, the join is directly converted to a mapjoin (there is no conditional task). + </description> + </property> + + <property> + <name>hive.limit.optimize.enable</name> + <value>true</value> + <description>Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first.</description> + </property> + + <property> + <name>hive.enforce.bucketing</name> + <value>true</value> + <description>Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced.</description> + </property> + <property> + <name>hive.enforce.sorting</name> + <value>true</value> + <description>Whether sorting is enforced. If true, while inserting into the table, sorting is enforced.</description> + </property> + + + <property> + <name>hive.enforce.sortmergebucketmapjoin</name> + <value>true</value> + <description>If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ?</description> + </property> + + <property> + <name>hive.auto.convert.sortmerge.join</name> + <value>true</value> + <description>Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join.</description> + </property> + + <property> + <name>hive.auto.convert.sortmerge.join.to.mapjoin</name> + <value>false</value> + <description> + If hive.auto.convert.sortmerge.join is set to true, and a join was converted to a sort-merge join, + this parameter decides whether each table should be tried as a big table, and effectively a map-join should be + tried. That would create a conditional task with n+1 children for a n-way join (1 child for each table as the + big table), and the backup task will be the sort-merge join. In some cases, a map-join would be faster than a + sort-merge join, if there is no advantage of having the output bucketed and sorted. For example, if a very big sorted + and bucketed table with few files (say 10 files) are being joined with a very small sorter and bucketed table + with few files (10 files), the sort-merge join will only use 10 mappers, and a simple map-only join might be faster + if the complete small table can fit in memory, and a map-join can be performed. + </description> + </property> + + <property> + <name>hive.optimize.constant.propagation</name> + <value>true</value> + <description>Whether to enable constant propagation optimizer</description> + </property> + <property> + <name>hive.optimize.metadataonly</name> + <value>true</value> + <description/> + </property> + <property> + <name>hive.optimize.null.scan</name> + <value>true</value> + <description>Dont scan relations which are guaranteed to not generate any rows</description> + </property> + + + <property> + <name>hive.optimize.bucketmapjoin</name> + <value>true</value> + <description>If the tables being joined are bucketized on the join columns, and the number of buckets in one table + is a multiple of the number of buckets in the other table, the buckets can be joined with each other by setting + this parameter as true.</description> + </property> + + <property> + <name>hive.optimize.reducededuplication</name> + <value>true</value> + <description> + Remove extra map-reduce jobs if the data is already clustered by the same key which needs to be used again. + This should always be set to true. Since it is a new feature, it has been made configurable. + </description> + </property> + <property> + <name>hive.optimize.reducededuplication.min.reducer</name> + <value>4</value> + <description> + Reduce deduplication merges two RSs by moving key/parts/reducer-num of the child RS to parent RS. + That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR. + The optimization will be automatically disabled if number of reducers would be less than specified value. + </description> + </property> + <property> + <name>hive.optimize.sort.dynamic.partition</name> + <value>false</value> + <description> + When enabled dynamic partitioning column will be globally sorted. + This way we can keep only one record writer open for each partition value + in the reducer thereby reducing the memory pressure on reducers. + </description> + </property> + <property> + <name>hive.stats.autogather</name> + <value>true</value> + <description>A flag to gather statistics automatically during the INSERT OVERWRITE command.</description> + </property> + <property> + <name>hive.stats.dbclass</name> + <value>fs</value> + <description> + Expects one of the pattern in [jdbc(:.*), hbase, counter, custom, fs]. + The storage that stores temporary Hive statistics. Currently, jdbc, hbase, counter and custom type are supported. + </description> + </property> + + <property> + <name>hive.stats.fetch.partition.stats</name> + <value>true</value> + <description> + Annotation of operator tree with statistics information requires partition level basic + statistics like number of rows, data size and file size. Partition statistics are fetched from + metastore. Fetching partition statistics for each needed partition can be expensive when the + number of partitions is high. This flag can be used to disable fetching of partition statistics + from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes + and will estimate the number of rows from row schema. + </description> + </property> + <property> + <name>hive.stats.fetch.column.stats</name> + <value>false</value> + <description> + Annotation of operator tree with statistics information requires column statistics. + Column statistics are fetched from metastore. Fetching column statistics for each needed column + can be expensive when the number of columns is high. This flag can be used to disable fetching + of column statistics from metastore. + </description> + </property> + + <property> + <name>hive.zookeeper.client.port</name> + <value>2181</value> + <description>The port of ZooKeeper servers to talk to. If the list of Zookeeper servers specified in hive.zookeeper.quorum,does not contain port numbers, this value is used.</description> + </property> + + <property> + <name>hive.zookeeper.namespace</name> + <value>hive_zookeeper_namespace</value> + <description>The parent node under which all ZooKeeper nodes are created.</description> + </property> + + <property> + <name>hive.txn.manager</name> + <value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value> + <description/> + </property> + + <property> + <name>hive.txn.max.open.batch</name> + <value>1000</value> + <description> + Maximum number of transactions that can be fetched in one call to open_txns(). + Increasing this will decrease the number of delta files created when + streaming data into Hive. But it will also increase the number of + open transactions at any given time, possibly impacting read performance. + </description> + </property> + + <property> + <name>hive.support.concurrency</name> + <value>false</value> + <description> + Support concurrency and use locks, needed for Transactions. Requires Zookeeper. + </description> + </property> + + <property> + <name>hive.cli.print.header</name> + <value>false</value> + <description> + Whether to print the names of the columns in query output. + </description> + </property> + + <property> + <name>hive.compactor.worker.timeout</name> + <value>86400L</value> + <description> + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not specified. + Time before a given compaction in working state is declared a failure + and returned to the initiated state. + </description> + </property> + <property> + <name>hive.compactor.check.interval</name> + <value>300L</value> + <description> + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not specified. + Time between checks to see if any partitions need compacted. + This should be kept high because each check for compaction requires many calls against the NameNode. + </description> + </property> + + <property> + <name>hive.compactor.delta.pct.threshold</name> + <value>0.1f</value> + <description>Percentage (by size) of base that deltas can be before major compaction is initiated.</description> + </property> + <property> + <name>hive.fetch.task.conversion</name> + <value>more</value> + <description> + Expects one of [none, minimal, more]. + Some select queries can be converted to single FETCH task minimizing latency. + Currently the query should be single sourced not having any subquery and should not have + any aggregations or distincts (which incurs RS), lateral views and joins. + 0. none : disable hive.fetch.task.conversion + 1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only + 2. more : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns) + </description> + </property> + <property> + <name>hive.fetch.task.conversion.threshold</name> + <value>1073741824</value> + <description> + Input threshold for applying hive.fetch.task.conversion. If target table is native, input length + is calculated by summation of file lengths. If it's not native, storage handler for the table + can optionally implement org.apache.hadoop.hive.ql.metadata.InputEstimator interface. + </description> + </property> + + <property> + <name>hive.fetch.task.aggr</name> + <value>false</value> + <description> + Aggregation queries with no group-by clause (for example, select count(*) from src) execute + final aggregations in single reduce task. If this is set true, Hive delegates final aggregation + stage to fetch task, possibly decreasing the query time. + </description> + </property> + + <property> + <name>hive.security.authorization.enabled</name> + <value>false</value> + <description>enable or disable the Hive client authorization</description> + <value-attributes> + <type>boolean</type> + </value-attributes> + </property> + + <property> + <name>hive.security.authenticator.manager</name> + <value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value> + <description> + hive client authenticator manager class name. The user defined authenticator should implement + interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider. + </description> + </property> + + <property> + <name>hive.security.metastore.authorization.manager</name> + <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider,org.apache.hadoop.hive.ql.security.authorization.MetaStoreAuthzAPIAuthorizerEmbedOnly</value> + <description> + authorization manager class name to be used in the metastore for authorization. + The user defined authorization class should implement interface + org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider. + </description> + </property> + + <property> + <name>hive.security.metastore.authorization.auth.reads</name> + <value>true</value> + <description>If this is true, metastore authorizer authorizes read actions on database, table</description> + </property> + + <property> + <name>hive.security.metastore.authenticator.manager</name> + <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value> + <description> + authenticator manager class name to be used in the metastore for authentication. + The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider. + </description> + </property> + + <property> + <name>hive.server2.logging.operation.enabled</name> + <value>true</value> + <description>When true, HS2 will save operation logs</description> + </property> + + <property> + <name>hive.server2.logging.operation.log.location</name> + <value>${java.io.tmpdir}/${user.name}/operation_logs</value> + <description>Top level directory where operation logs are stored if logging functionality is enabled</description> + </property> + + <property> + <name>hive.server2.zookeeper.namespace</name> + <value>hiveserver2</value> + <description>The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery.</description> + </property> + + <property> + <name>hive.server2.transport.mode</name> + <value>binary</value> + <description> + Expects one of [binary, http]. + Transport mode of HiveServer2. + </description> + </property> + + <property> + <name>hive.server2.thrift.http.port</name> + <value>10001</value> + <description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'http'.</description> + </property> + + <property> + <name>hive.server2.thrift.http.path</name> + <value>cliservice</value> + <description>Path component of URL endpoint when in HTTP mode.</description> + </property> + + <property> + <name>hive.server2.thrift.port</name> + <display-name>HiveServer2 Port</display-name> + <value>10000</value> + <description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'.</description> + <value-attributes> + <overridable>false</overridable> + <type>int</type> + </value-attributes> + </property> + + <property> + <name>hive.server2.thrift.sasl.qop</name> + <value>auth</value> + <description> + Expects one of [auth, auth-int, auth-conf]. + Sasl QOP value; Set it to one of following values to enable higher levels of + protection for HiveServer2 communication with clients. + "auth" - authentication only (default) + "auth-int" - authentication plus integrity protection + "auth-conf" - authentication plus integrity and confidentiality protection + This is applicable only if HiveServer2 is configured to use Kerberos authentication. + </description> + </property> + + <property> + <name>hive.server2.thrift.max.worker.threads</name> + <value>500</value> + <description>Maximum number of Thrift worker threads</description> + </property> + + <property> + <name>hive.server2.allow.user.substitution</name> + <value>true</value> + <description>Allow alternate user to be specified as part of HiveServer2 open connection request.</description> + </property> + + <property> + <name>hive.server2.authentication.spnego.keytab</name> + <value>HTTP/_h...@example.com</value> + <description> + keytab file for SPNego principal, optional, + typical value would look like /etc/security/keytabs/spnego.service.keytab, + This keytab would be used by HiveServer2 when Kerberos security is enabled and + HTTP transport mode is used. + This needs to be set only if SPNEGO is to be used in authentication. + SPNego authentication would be honored only if valid + hive.server2.authentication.spnego.principal + and + hive.server2.authentication.spnego.keytab + are specified. + </description> + </property> + + <property> + <name>hive.server2.authentication</name> + <description>Authentication mode, default NONE. Options are NONE, NOSASL, KERBEROS, LDAP, PAM and CUSTOM</description> + <value>NONE</value> + </property> + + <property> + <name>hive.server2.authentication.spnego.principal</name> + <value>/etc/security/keytabs/spnego.service.keytab</value> + <description> + SPNego service principal, optional, + typical value would look like HTTP/_h...@example.com + SPNego service principal would be used by HiveServer2 when Kerberos security is enabled + and HTTP transport mode is used. + This needs to be set only if SPNEGO is to be used in authentication. + </description> + </property> + + <property> + <name>hive.server2.enable.doAs</name> + <value>true</value> + <description> + Impersonate the connected user. By default HiveServer2 performs the query processing as the user who + submitted the query. But if the parameter is set to false, the query will run as the user that the hiveserver2 + process runs as. + </description> + </property> + <property> + <name>hive.server2.table.type.mapping</name> + <value>CLASSIC</value> + <description> + Expects one of [classic, hive]. + This setting reflects how HiveServer2 will report the table types for JDBC and other + client implementations that retrieve the available tables and supported table types + HIVE : Exposes Hive's native table types like MANAGED_TABLE, EXTERNAL_TABLE, VIRTUAL_VIEW + CLASSIC : More generic types like TABLE and VIEW + </description> + </property> + + <property> + <name>hive.server2.use.SSL</name> + <value>false</value> + <description/> + </property> + <property> + <name>hive.server2.keystore.path</name> + <value>/etc/security/keystores/hs2keystore.jks</value> + <description>SSL certificate keystore location</description> + </property> + <property> + <name>hive.server2.keystore.password</name> + <value>password</value> + <property-type>PASSWORD</property-type> + <description>SSL certificate keystore password</description> + </property> + + <property> + <name>hive.conf.restricted.list</name> + <value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value> + <description>Comma separated list of configuration options which are immutable at runtime</description> + </property> + + <property> + <name>hive.vectorized.groupby.maxentries</name> + <value>100000</value> + <description> + Max number of entries in the vector group by aggregation hashtables. + Exceeding this will trigger a flush irrelevant of memory pressure condition. + </description> + </property> + + <!-- missing from HiveConf --> + <property> + <name>ambari.hive.db.schema.name</name> + <display-name>Database Name</display-name> + <value>hive</value> + <description>Database name used as the Hive Metastore</description> + <value-attributes> + <type>database</type> + <type>host</type> + <overridable>false</overridable> + </value-attributes> + </property> + + <property> + <name>hive.metastore.sasl.enabled</name> + <value>false</value> + <description>If true, the metastore thrift interface will be secured with SASL. + Clients must authenticate with Kerberos.</description> + </property> + + <property> + <name>hive.metastore.execute.setugi</name> + <value>true</value> + <description>In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using the client's reported user and group permissions. Note that this property must be set on both the client and server sides. Further note that its best effort. If client sets its to true and server sets it to false, client setting will be ignored.</description> + </property> + + <property> + <name>hive.optimize.bucketmapjoin.sortedmerge</name> + <value>false</value> + <description> If the tables being joined are sorted and bucketized on the join columns, and they have the same number + of buckets, a sort-merge join can be performed by setting this parameter as true. + </description> + </property> + + <property> + <name>hive.auto.convert.join.noconditionaltask.size</name> + <value>1000000000</value> + <description>If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. However, if it + is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size, the join is directly + converted to a mapjoin(there is no conditional task). The default is 10MB. + </description> + </property> + + <property> + <name>hive.vectorized.execution.enabled</name> + <value>true</value> + <description> + This flag should be set to true to enable vectorized mode of query execution. + The default value is false. + </description> + </property> + + <property> + <name>hive.vectorized.execution.reduce.enabled</name> + <value>false</value> + <description> + This flag should be set to true to enable vectorized mode of the reduce-side of query execution. + The default value is true. + </description> + </property> + + <property> + <name>hive.optimize.index.filter</name> + <value>true</value> + <description> + Whether to enable automatic use of indexes + </description> + </property> + + <property> + <name>hive.execution.engine</name> + <value>mr</value> + <description> + Chooses execution engine. The Option is : mr (Map reduce, default) + </description> + </property> + + <property> + <name>hive.vectorized.groupby.checkinterval</name> + <value>1024</value> + <!--value>4096</value--> + <description>Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed.</description> + </property> + + <property> + <name>hive.vectorized.groupby.flush.percent</name> + <value>0.1</value> + <description>Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded.</description> + </property> + + <property> + <name>hive.compute.query.using.stats</name> + <value>true</value> + <description> + When set to true Hive will answer a few queries like count(1) purely using stats + stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true. + For more advanced stats collection need to run analyze table queries. + </description> + </property> + + <property> + <name>hive.limit.pushdown.memory.usage</name> + <value>0.04</value> + <description>The max memory to be used for hash in RS operator for top K selection.</description> + </property> + + <property> + <name>hive.txn.timeout</name> + <value>300</value> + <description>Time after which transactions are declared aborted if the client has not sent a heartbeat, in seconds.</description> + </property> + + <property> + <name>hive.compactor.initiator.on</name> + <value>false</value> + <description>Whether to run the compactor's initiator thread in this metastore instance or not. If there is more than one instance of the thrift metastore this should only be set to true for one of them.</description> + </property> + + <property> + <name>hive.compactor.worker.threads</name> + <value>0</value> + <description>Number of compactor worker threads to run on this metastore instance. Can be different values on different metastore instances.</description> + </property> + + <property> + <name>hive.compactor.delta.num.threshold</name> + <value>10</value> + <description>Number of delta files that must exist in a directory before the compactor will attempt a minor compaction.</description> + </property> + + <property> + <name>hive.compactor.abortedtxn.threshold</name> + <value>1000</value> + <description>Number of aborted transactions involving a particular table or partition before major compaction is initiated.</description> + </property> + + <property> + <name>datanucleus.cache.level2.type</name> + <value>none</value> + <description>Determines caching mechanism DataNucleus L2 cache will use. It is strongly recommended to use default value of 'none' as other values may cause consistency errors in Hive.</description> + </property> + + <property> + <name>hive.index.compact.query.max.size</name> + <value>10737418240</value> + <description>The maximum number of bytes that a query using the compact index can read. + Negative value is equivalent to infinity. + </description> + </property> + + <property> + <name>hive.warehouse.subdir.inherit.perms</name> + <value>true</value> + <description>Set this to true if table directories should inherit the permissions of the warehouse or database directory instead of being created with permissions derived from dfs umask + </description> + </property> + + <property> + <name>hive.start.cleanup.scratchdir</name> + <value>true</value> + <description>To cleanup the hive scratchdir while starting the hive server.</description> + </property> + +</configuration>
http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-env.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-env.xml new file mode 100755 index 0000000..2eec231 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-env.xml @@ -0,0 +1,54 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> + +<configuration> + <!-- webhcat-env.sh --> + <property> + <name>content</name> + <description>webhcat-env.sh content</description> + <value> +# The file containing the running pid +PID_FILE={{webhcat_pid_file}} + +TEMPLETON_LOG_DIR={{templeton_log_dir}}/ + + +WEBHCAT_LOG_DIR={{templeton_log_dir}}/ + +# The console error log +ERROR_LOG={{templeton_log_dir}}/webhcat-console-error.log + +# The console log +CONSOLE_LOG={{templeton_log_dir}}/webhcat-console.log + +#TEMPLETON_JAR=templeton_jar_name + +#HADOOP_PREFIX=hadoop_prefix + +#HCAT_PREFIX=hive_prefix + +# Set HADOOP_HOME to point to a specific hadoop install directory +export HADOOP_HOME={{hadoop_home}} + </value> + </property> + +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-log4j.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-log4j.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-log4j.xml new file mode 100755 index 0000000..0ded4d4 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-log4j.xml @@ -0,0 +1,78 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> + +<configuration supports_final="false"> + + <property> + <name>content</name> + <description>Custom webhcat-log4j.properties</description> + <value> +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Define some default values that can be overridden by system properties +webhcat.root.logger = INFO, standard +webhcat.log.dir = . +webhcat.log.file = webhcat.log + +log4j.rootLogger = ${webhcat.root.logger} + +# Logging Threshold +log4j.threshhold = DEBUG + +log4j.appender.standard = org.apache.log4j.DailyRollingFileAppender +log4j.appender.standard.File = ${webhcat.log.dir}/${webhcat.log.file} + +# Rollver at midnight +log4j.appender.DRFA.DatePattern = .yyyy-MM-dd + +log4j.appender.DRFA.layout = org.apache.log4j.PatternLayout + +log4j.appender.standard.layout = org.apache.log4j.PatternLayout +log4j.appender.standard.layout.conversionPattern = %-5p | %d{DATE} | %c | %m%n + +# Class logging settings +log4j.logger.com.sun.jersey = DEBUG +log4j.logger.com.sun.jersey.spi.container.servlet.WebComponent = ERROR +log4j.logger.org.apache.hadoop = INFO +log4j.logger.org.apache.hadoop.conf = WARN +log4j.logger.org.apache.zookeeper = WARN +log4j.logger.org.eclipse.jetty = INFO + + </value> + </property> + +</configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-site.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-site.xml new file mode 100755 index 0000000..12dfd8a --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/HIVE/configuration/webhcat-site.xml @@ -0,0 +1,167 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> + +<!-- The default settings for Templeton. --> +<!-- Edit templeton-site.xml to change settings for your local --> +<!-- install. --> + +<configuration supports_final="true"> + + <property> + <name>templeton.port</name> + <value>50111</value> + <description>The HTTP port for the main server.</description> + </property> + + <property> + <name>templeton.hadoop.conf.dir</name> + <value>/etc/hadoop/conf</value> + <description>The path to the Hadoop configuration.</description> + </property> + + <property> + <name>templeton.jar</name> + <!-- TODO VERIFY WITH STACK-SELECT value>/usr/lib/hcatalog/share/webhcat/svr/webhcat.jar</value --> + <value>/usr/iop/current/hive-webhcat/share/webhcat/svr/lib/hive-webhcat-*.jar</value> + <description>The path to the Templeton jar file.</description> + </property> + + <property> + <name>templeton.libjars</name> + <!-- TODO VERIFY WITH STACK-SELECT value>/usr/lib/zookeeper/zookeeper.jar</value--> + <value>/usr/iop/current/zookeeper-client/zookeeper.jar</value> + <description>Jars to add the the classpath.</description> + </property> + + <property> + <name>templeton.hadoop</name> + <!-- TODO VERIFY WITH STACK-SELECT value>/usr/bin/hadoop</value--> + <value>/usr/iop/current/hadoop-client/bin/hadoop</value> + <description>The path to the Hadoop executable.</description> + </property> + + <property> + <name>templeton.pig.archive</name> + <value>hdfs:///iop/apps/${iop.version}/pig/pig.tar.gz</value> + <description>The path to the Pig archive in HDFS.</description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + </property> + + <property> + <name>templeton.pig.path</name> + <value>pig.tar.gz/pig/bin/pig</value> + <description>The path to the Pig executable.</description> + </property> + + <property> + <name>templeton.hcat</name> + <!-- TODO VERIFY WITH STACK-SELECT value>/usr/bin/hcat</value --> + <value>/usr/iop/current/hive-client/bin/hcat</value> + <description>The path to the hcatalog executable.</description> + </property> + + <property> + <name>templeton.hive.archive</name> + <value>hdfs:///iop/apps/${iop.version}/hive/hive.tar.gz</value> + <description>The path to the Hive archive.</description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + </property> + + <property> + <name>templeton.hive.home</name> + <value>hive.tar.gz/hive</value> + <description>The path to the Hive home within the tar. Has no effect if templeton.hive.archive is not set.</description> + </property> + + <property> + <name>templeton.hcat.home</name> + <value>hive.tar.gz/hive/hcatalog</value> + <description>The path to the HCat home within the tar. Has no effect if templeton.hive.archive is not set.</description> + </property> + + <property> + <name>templeton.hive.path</name> + <value>hive.tar.gz/hive/bin/hive</value> + <description>The path to the Hive executable.</description> + </property> + + <property> + <name>templeton.hive.properties</name> + <value>hive.metastore.local=false, hive.metastore.uris=thrift://localhost:9933, hive.metastore.sasl.enabled=false</value> + <description>Properties to set when running hive.</description> + </property> + + <property> + <name>templeton.sqoop.archive</name> + <value>hdfs:///iop/apps/${iop.version}/sqoop/sqoop.tar.gz</value> + <description>The path to the Sqoop archive in HDFS.</description> + </property> + + <property> + <name>templeton.sqoop.path</name> + <value>sqoop.tar.gz/sqoop/bin/sqoop</value> + <description>The path to the Sqoop executable.</description> + </property> + + <property> + <name>templeton.sqoop.home</name> + <value>sqoop.tar.gz/sqoop</value> + <description>The path to the Sqoop home within the tar. Has no effect if + templeton.sqoop.archive is not set. + </description> + </property> + + <property> + <name>templeton.zookeeper.hosts</name> + <value>localhost:2181</value> + <description>ZooKeeper servers, as comma separated host:port pairs</description> + <value-attributes> + <type>multiLine</type> + </value-attributes> + </property> + + <property> + <name>templeton.storage.class</name> + <value>org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage</value> + <description>The class to use as storage</description> + </property> + + <property> + <name>templeton.override.enabled</name> + <value>false</value> + <description>Enable the override path in templeton.override.jars</description> + </property> + + <property> + <name>templeton.streaming.jar</name> + <value>hdfs:///iop/apps/${iop.version}/mapreduce/hadoop-streaming.jar</value> + <description>The hdfs path to the Hadoop streaming jar file.</description> + </property> + + <property> + <name>templeton.exec.timeout</name> + <value>60000</value> + <description>Time out for templeton api</description> + </property> + +</configuration>