http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/configuration/yarn-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/configuration/yarn-site.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/configuration/yarn-site.xml new file mode 100755 index 0000000..397f96f --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/configuration/yarn-site.xml @@ -0,0 +1,748 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- Put site-specific property overrides in this file. --> + +<configuration supports_final="true" xmlns:xi="http://www.w3.org/2001/XInclude"> + + <!-- ResourceManager --> + + <property> + <name>yarn.resourcemanager.hostname</name> + <value>localhost</value> + <description>The hostname of the RM.</description> + </property> + + <property> + <name>yarn.resourcemanager.resource-tracker.address</name> + <value>localhost:8025</value> + <description> The address of ResourceManager. </description> + </property> + + <property> + <name>yarn.resourcemanager.scheduler.address</name> + <value>localhost:8030</value> + <description>The address of the scheduler interface.</description> + </property> + + <property> + <name>yarn.resourcemanager.address</name> + <value>localhost:8050</value> + <description> + The address of the applications manager interface in the + RM. + </description> + </property> + + <property> + <name>yarn.resourcemanager.admin.address</name> + <value>localhost:8141</value> + <description>The address of the RM admin interface.</description> + </property> + + <property> + <name>yarn.resourcemanager.scheduler.class</name> + <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> + <description>The class to use as the resource scheduler.</description> + </property> + + <property> + <name>yarn.scheduler.minimum-allocation-mb</name> + <value>512</value> + <description> + The minimum allocation for every container request at the RM, + in MBs. Memory requests lower than this won't take effect, + and the specified value will get allocated at minimum. + </description> + <display-name>Minimum Container Size (Memory)</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>5120</maximum> + <unit>MB</unit> + <increment-step>250</increment-step> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.memory-mb</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.scheduler.maximum-allocation-mb</name> + <value>2048</value> + <description> + The maximum allocation for every container request at the RM, + in MBs. Memory requests higher than this won't take effect, + and will get capped to this value. + </description> + <display-name>Maximum Container Size (Memory)</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>5120</maximum> + <unit>MB</unit> + <increment-step>256</increment-step> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.memory-mb</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.acl.enable</name> + <value>false</value> + <description> Are acls enabled. </description> + <value-attributes> + <type>boolean</type> + </value-attributes> + </property> + + <property> + <name>yarn.admin.acl</name> + <value></value> + <description> ACL of who can be admin of the YARN cluster. </description> + <value-attributes> + <empty-value-valid>true</empty-value-valid> + </value-attributes> + </property> + + <!-- NodeManager --> + + <property> + <name>yarn.nodemanager.address</name> + <value>0.0.0.0:45454</value> + <description>The address of the container manager in the NM.</description> + </property> + + <property> + <name>yarn.nodemanager.resource.memory-mb</name> + <value>5120</value> + <description>Amount of physical memory, in MB, that can be allocated + for containers.</description> + <display-name>Memory allocated for all YARN containers on a node</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>268435456</maximum> + <unit>MB</unit> + <increment-step>250</increment-step> + </value-attributes> + </property> + + <property> + <name>yarn.application.classpath</name> + <value>/etc/hadoop/conf,/usr/iop/current/hadoop-client/*,/usr/iop/current/hadoop-client/lib/*,/usr/iop/current/hadoop-hdfs-client/*,/usr/iop/current/hadoop-hdfs-client/lib/*,/usr/iop/current/hadoop-yarn-client/*,/usr/iop/current/hadoop-yarn-client/lib/*</value> + <description>Classpath for typical applications.</description> + </property> + + <property> + <name>yarn.nodemanager.vmem-pmem-ratio</name> + <value>5</value> + <description>Ratio between virtual memory to physical memory when + setting memory limits for containers. Container allocations are + expressed in terms of physical memory, and virtual memory usage + is allowed to exceed this allocation by this ratio. + </description> + <display-name>Virtual Memory Ratio</display-name> + <value-attributes> + <type>float</type> + <minimum>0.1</minimum> + <maximum>5.0</maximum> + <increment-step>0.1</increment-step> + </value-attributes> + </property> + + <property> + <name>yarn.nodemanager.container-executor.class</name> + <value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value> + <description>ContainerExecutor for launching containers</description> + <depends-on> + <property> + <type>yarn-env</type> + <name>yarn_cgroups_enabled</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.nodemanager.linux-container-executor.group</name> + <value>hadoop</value> + <description>Unix group of the NodeManager</description> + <depends-on> + <property> + <type>yarn-env</type> + <name>yarn_cgroups_enabled</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.nodemanager.aux-services</name> + <value>mapreduce_shuffle</value> + <description>Auxilliary services of NodeManager. A valid service name should only contain a-zA-Z0-9_ and can + not start with numbers</description> + </property> + + <property> + <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> + <value>org.apache.hadoop.mapred.ShuffleHandler</value> + <description>The auxiliary service class to use </description> + </property> + + <property> + <name>yarn.nodemanager.log-dirs</name> + <value>/hadoop/yarn/log</value> + <description> + Where to store container logs. An application's localized log directory + will be found in ${yarn.nodemanager.log-dirs}/application_${appid}. + Individual containers' log directories will be below this, in directories + named container_{$contid}. Each container directory will contain the files + stderr, stdin, and syslog generated by that container. + </description> + <value-attributes> + <type>directories</type> + </value-attributes> + </property> + + <property> + <name>yarn.nodemanager.local-dirs</name> + <value>/hadoop/yarn/local</value> + <description> + List of directories to store localized files in. An + application's localized file directory will be found in: + ${yarn.nodemanager.local-dirs}/usercache/${user}/appcache/application_${appid}. + Individual containers' work directories, called container_${contid}, will + be subdirectories of this. + </description> + <value-attributes> + <type>directories</type> + </value-attributes> + </property> + + <property> + <name>yarn.nodemanager.container-monitor.interval-ms</name> + <value>3000</value> + <description> + The interval, in milliseconds, for which the node manager + waits between two cycles of monitoring its containers' memory usage. + </description> + </property> + + <!-- + <property> + <name>yarn.nodemanager.health-checker.script.path</name> + <value>/etc/hadoop/conf/health_check_nodemanager</value> + <description>The health check script to run.</description> + </property> + --> + + <property> + <name>yarn.nodemanager.health-checker.interval-ms</name> + <value>135000</value> + <description>Frequency of running node health script.</description> + </property> + + <property> + <name>yarn.nodemanager.health-checker.script.timeout-ms</name> + <value>60000</value> + <description>Script time out period.</description> + </property> + + <property> + <name>yarn.nodemanager.log.retain-second</name> + <value>604800</value> + <description> + Time in seconds to retain user logs. Only applicable if + log aggregation is disabled. + </description> + </property> + + <property> + <name>yarn.log-aggregation-enable</name> + <value>true</value> + <description>Whether to enable log aggregation. </description> + <display-name>Enable Log Aggregation</display-name> + <value-attributes> + <type>boolean</type> + </value-attributes> + </property> + + <property> + <name>yarn.nodemanager.remote-app-log-dir</name> + <value>/app-logs</value> + <description>Location to aggregate logs to. </description> + <value-attributes> + <type>directory</type> + </value-attributes> + </property> + + <property> + <name>yarn.nodemanager.remote-app-log-dir-suffix</name> + <value>logs</value> + <description> + The remote log dir will be created at + {yarn.nodemanager.remote-app-log-dir}/${user}/{thisParam}. + </description> + </property> + + <property> + <name>yarn.nodemanager.log-aggregation.compression-type</name> + <value>gz</value> + <description> + T-file compression types used to compress aggregated logs. + </description> + </property> + + <property> + <name>yarn.nodemanager.delete.debug-delay-sec</name> + <value>0</value> + <description> + Number of seconds after an application finishes before the nodemanager's + DeletionService will delete the application's localized file directory + and log directory. + + To diagnose Yarn application problems, set this property's value large + enough (for example, to 600 = 10 minutes) to permit examination of these + directories. After changing the property's value, you must restart the + nodemanager in order for it to have an effect. + + The roots of Yarn applications' work directories is configurable with + the yarn.nodemanager.local-dirs property (see below), and the roots + of the Yarn applications' log directories is configurable with the + yarn.nodemanager.log-dirs property (see also below). + </description> + </property> + + <property> + <name>yarn.log-aggregation.retain-seconds</name> + <value>2592000</value> + <description> + How long to keep aggregation logs before deleting them. -1 disables. + Be careful set this too small and you will spam the name node. + </description> + </property> + + <property> + <name>yarn.nodemanager.admin-env</name> + <value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value> + <description> + Environment variables that should be forwarded from the NodeManager's + environment to the container's. + </description> + </property> + + <property> + <name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name> + <value>0.25</value> + <description> + The minimum fraction of number of disks to be healthy for the nodemanager + to launch new containers. This correspond to both + yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs. i.e. + If there are less number of healthy local-dirs (or log-dirs) available, + then new containers will not be launched on this node. + </description> + </property> + + <property> + <name>yarn.resourcemanager.am.max-attempts</name> + <value>2</value> + <description> + The maximum number of application attempts. It's a global + setting for all application masters. Each application master can specify + its individual maximum number of application attempts via the API, but the + individual number cannot be more than the global upper bound. If it is, + the resourcemanager will override it. The default number is set to 2, to + allow at least one retry for AM. + </description> + </property> + + <property> + <name>yarn.resourcemanager.webapp.address</name> + <value>localhost:8088</value> + <description> + The address of the RM web application. + </description> + </property> + + <property> + <name>yarn.nodemanager.vmem-check-enabled</name> + <value>true</value> + <description> + Whether virtual memory limits will be enforced for containers. + </description> + </property> + + <property> + <name>yarn.log.server.url</name> + <value>http://localhost:19888/jobhistory/logs</value> + <description> + URI for the HistoryServer's log resource + </description> + </property> + + <property> + <name>yarn.resourcemanager.nodes.exclude-path</name> + <value>/etc/hadoop/conf/yarn.exclude</value> + <description> + Names a file that contains a list of hosts that are + not permitted to connect to the resource manager. The full pathname of the + file must be specified. If the value is empty, no hosts are + excluded. + </description> + </property> + + <property> + <name>yarn.timeline-service.enabled</name> + <value>true</value> + <description>Indicate to clients whether timeline service is enabled or not. + If enabled, clients will put entities and events to the timeline server. + </description> + <value-attributes> + <type>boolean</type> + </value-attributes> + </property> + + <property> + <name>yarn.timeline-service.store-class</name> + <!--<value>org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.LeveldbTimelineStore</value> --> + <value>org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore</value> + <description> + Store class name for timeline store + </description> + </property> + + <property> + <name>yarn.timeline-service.generic-application-history.store-class</name> + <value>org.apache.hadoop.yarn.server.applicationhistoryservice.NullApplicationHistoryStore</value> + <description> + Store class name for history store, defaulting to file system store + </description> + </property> + + <property> + <name>yarn.timeline-service.leveldb-timeline-store.path</name> + <value>/var/log/hadoop-yarn/timeline</value> + <description> + Store file name for leveldb timeline store + </description> + <value-attributes> + <type>directory</type> + </value-attributes> + </property> + + <property> + <name>yarn.timeline-service.webapp.address</name> + <value>localhost:8188</value> + <description> + The http address of the timeline service web application. + </description> + </property> + + <property> + <name>yarn.timeline-service.webapp.https.address</name> + <value>localhost:8190</value> + <description> + The http address of the timeline service web application. + </description> + </property> + + <property> + <name>yarn.timeline-service.address</name> + <value>localhost:10200</value> + <description> + This is default address for the timeline server to start + the RPC server. + </description> + </property> + <property> + <name>yarn.timeline-service.ttl-enable</name> + <description>Enable age off of timeline store data.</description> + <value>true</value> + <value-attributes> + <type>boolean</type> + </value-attributes> + </property> + <property> + <description>Time to live for timeline store data in milliseconds.</description> + <name>yarn.timeline-service.ttl-ms</name> + <value>2678400000</value> + <value-attributes> + <type>int</type> + </value-attributes> + </property> + <property> + <description>Length of time to wait between deletion cycles of leveldb timeline store in milliseconds.</description> + <name>yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms</name> + <value>300000</value> + <value-attributes> + <type>int</type> + </value-attributes> + </property> + + <!-- Default Values Set for IOP Stack --> + + <!-- Fault Tolerance--> + <property> + <name>yarn.nodemanager.recovery.enabled</name> + <value>false</value> + <description>Enable the node manager to recover after starting</description> + </property> + <property> + <name>yarn.resourcemanager.recovery.enabled</name> + <value>false</value> + <description> + Enable RM to recover state after starting. + If true, then yarn.resourcemanager.store.class must be specified. + </description> + </property> + <property> + <name>yarn.resourcemanager.work-preserving-recovery.enabled</name> + <value>false</value> + <description> + Enable RM work preserving recovery. This configuration is private to YARN for experimenting the feature. + </description> + </property> + <property> + <name>yarn.resourcemanager.zk-address</name> + <value>localhost:2181</value> + <description> + List Host:Port of the ZooKeeper servers to be used by the RM. comma separated host:port pairs, each corresponding to a zk server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" If the optional chroot suffix is used the example would look like: "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002/app/a" where the client would be rooted at "/app/a" and all paths would be relative to this root - ie getting/setting/etc... "/foo/bar" would result in operations being run on "/app/a/foo/bar" (from the server perspective). + </description> + </property> + <property> + <name>yarn.resourcemanager.connect.retry-interval.ms</name> + <value>30000</value> + <description>How often to try connecting to the ResourceManager.</description> + </property> + <property> + <name>yarn.resourcemanager.connect.max-wait.ms</name> + <value>900000</value> + <description>Maximum time to wait to establish connection to ResourceManager</description> + </property> + <property> + <name>yarn.resourcemanager.ha.enabled</name> + <value>false</value> + <description>enable RM HA or not</description> + </property> + + <!-- Isolation --> + <property> + <name>yarn.nodemanager.linux-container-executor.resources-handler.class</name> + <value>org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler</value> + <description>Pre-requisite to use CGroups</description> + </property> + <property> + <name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name> + <value>hadoop-yarn</value> + <description>Name of the Cgroups hierarchy under which all YARN jobs will be launched</description> + </property> + <property> + <name>yarn.nodemanager.linux-container-executor.cgroups.mount</name> + <value>false</value> + <description>If true, YARN will automount the CGroup, however the directory needs to already exist; else, the cgroup should be mounted by the admin</description> + </property> + <property> + <name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name> + <value>false</value> + <description>Strictly limit CPU resource usage to allocated usage even if spare CPU is available</description> + </property> + + <!-- Scheduler --> + <property> + <name>yarn.nodemanager.resource.cpu-vcores</name> + <value>8</value> + <description></description> + <display-name>Number of virtual cores</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>32</maximum> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name> + <value>80</value> + <description>The amount of CPU allocated for YARN containers - only effective when used with CGroups</description> + <display-name>Percentage of physical CPU allocated for all containers on a node</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>100</maximum> + <increment-step>1</increment-step> + </value-attributes> + </property> + + <property> + <name>yarn.timeline-service.http-authentication.type</name> + <value>simple</value> + <description> + Defines authentication used for the Timeline Server HTTP endpoint. + Supported values are: simple | kerberos | $AUTHENTICATION_HANDLER_CLASSNAME + </description> + </property> + <property> + <name>yarn.timeline-service.http-authentication.simple.anonymous.allowed</name> + <value>true</value> + <description></description> + </property> + <property> + <name>yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled</name> + <value>false</value> + <description> + Flag to enable override of the default kerberos authentication filter with + the RM authentication filter to allow authentication using delegation + tokens(fallback to kerberos if the tokens are missing). + Only applicable when the http authentication type is kerberos. + </description> + </property> + + <property> + <name>yarn.timeline-service.bind-host</name> + <value>0.0.0.0</value> + </property> + <property> + <name>yarn.nodemanager.bind-host</name> + <value>0.0.0.0</value> + </property> + <property> + <name>yarn.resourcemanager.bind-host</name> + <value>0.0.0.0</value> + </property> + <property> + <name>yarn.resourcemanager.system-metrics-publisher.enabled</name> + <value>true</value> + </property> + <property> + <name>hadoop.registry.rm.enabled</name> + <value>false</value> + <description> Is the registry enabled: does the RM start it up, create the user and system paths, and purge service records when + </description> + </property> + <property> + <name>hadoop.registry.zk.quorum</name> + <value>localhost:2181</value> + <description> List of hostname:port pairs defining the zookeeper quorum binding for the registry </description> + </property> + + <property> + <name>yarn.node-labels.enabled</name> + <value>false</value> + <description> + Enable node labels to restrict YARN applications so that they run only on cluster nodes that have a specified node label. + </description> + <display-name>Node Labels</display-name> + <value-attributes> + <type>boolean</type> + </value-attributes> + <value-attributes> + <type>value-list</type> + <entries> + <entry> + <value>true</value> + <label>Enabled</label> + </entry> + <entry> + <value>false</value> + <label>Disabled</label> + </entry> + </entries> + <selection-cardinality>1</selection-cardinality> + </value-attributes> + </property> + + <property> + <name>yarn.resourcemanager.scheduler.monitor.enable</name> + <value>false</value> + <display-name>Pre-emption</display-name> + <value-attributes> + <type>value-list</type> + <entries> + <entry> + <value>true</value> + <label>Enabled</label> + </entry> + <entry> + <value>false</value> + <label>Disabled</label> + </entry> + </entries> + <selection-cardinality>1</selection-cardinality> + </value-attributes> + </property> + + <property> + <name>yarn.scheduler.minimum-allocation-vcores</name> + <value>1</value> + <description></description> + <display-name>Minimum Container Size (VCores)</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>8</maximum> + <increment-step>1</increment-step> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.cpu-vcores</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.scheduler.maximum-allocation-vcores</name> + <value>8</value> + <description></description> + <display-name>Maximum Container Size (VCores)</display-name> + <value-attributes> + <type>int</type> + <minimum>0</minimum> + <maximum>8</maximum> + <increment-step>1</increment-step> + </value-attributes> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.nodemanager.resource.cpu-vcores</name> + </property> + </depends-on> + </property> + + <property> + <name>yarn.http.policy</name> + <value>HTTP_ONLY</value> + <description> + This configures the HTTP endpoint for Yarn Daemons.The following values are supported: - HTTP_ONLY : Service is provided only on http - HTTPS_ONLY : Service is provided only on https + </description> + </property> + +</configuration>
http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/kerberos.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/kerberos.json b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/kerberos.json new file mode 100755 index 0000000..e3a2d58 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/kerberos.json @@ -0,0 +1,208 @@ +{ + "services": [ + { + "name": "YARN", + "identities": [ + { + "name": "/spnego" + }, + { + "name": "/hdfs" + }, + { + "name": "/smokeuser" + } + ], + "configurations": [ + { + "yarn-site": { + "yarn.timeline-service.enabled": "false", + "yarn.timeline-service.http-authentication.type": "kerberos", + "yarn.acl.enable": "true", + "yarn.timeline-service.http-authentication.signature.secret": "", + "yarn.timeline-service.http-authentication.signature.secret.file": "", + "yarn.timeline-service.http-authentication.signer.secret.provider": "", + "yarn.timeline-service.http-authentication.signer.secret.provider.object": "", + "yarn.timeline-service.http-authentication.token.validity": "", + "yarn.timeline-service.http-authentication.cookie.domain": "", + "yarn.timeline-service.http-authentication.cookie.path": "", + "yarn.timeline-service.http-authentication.proxyusers.*.hosts": "", + "yarn.timeline-service.http-authentication.proxyusers.*.users": "", + "yarn.timeline-service.http-authentication.proxyusers.*.groups": "", + "yarn.timeline-service.http-authentication.kerberos.name.rules": "", + "yarn.resourcemanager.proxyusers.*.groups": "", + "yarn.resourcemanager.proxyusers.*.hosts": "", + "yarn.resourcemanager.proxyusers.*.users": "", + "yarn.resourcemanager.proxy-user-privileges.enabled": "true", + "yarn.nodemanager.linux-container-executor.cgroups.mount-path": "" + } + } + ], + "components": [ + { + "name": "NODEMANAGER", + "identities": [ + { + "name": "nodemanager_nm", + "principal": { + "value": "nm/_HOST@${realm}", + "type" : "service", + "configuration": "yarn-site/yarn.nodemanager.principal", + "local_username": "${yarn-env/yarn_user}" + }, + "keytab": { + "file": "${keytab_dir}/nm.service.keytab", + "owner": { + "name": "${yarn-env/yarn_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "yarn-site/yarn.nodemanager.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "yarn-site/yarn.nodemanager.webapp.spnego-principal" + }, + "keytab": { + "configuration": "yarn-site/yarn.nodemanager.webapp.spnego-keytab-file" + } + } + ], + "configurations": [ + { + "yarn-site": { + "yarn.nodemanager.container-executor.class": "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor" + } + } + ] + }, + { + "name": "RESOURCEMANAGER", + "identities": [ + { + "name": "resource_manager_rm", + "principal": { + "value": "rm/_HOST@${realm}", + "type" : "service", + "configuration": "yarn-site/yarn.resourcemanager.principal", + "local_username": "${yarn-env/yarn_user}" + }, + "keytab": { + "file": "${keytab_dir}/rm.service.keytab", + "owner": { + "name": "${yarn-env/yarn_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "yarn-site/yarn.resourcemanager.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "yarn-site/yarn.resourcemanager.webapp.spnego-principal" + }, + "keytab": { + "configuration": "yarn-site/yarn.resourcemanager.webapp.spnego-keytab-file" + } + } + ] + }, + { + "name": "APP_TIMELINE_SERVER", + "identities": [ + { + "name": "app_timeline_server_yarn", + "principal": { + "value": "yarn/_HOST@${realm}", + "type" : "service", + "configuration": "yarn-site/yarn.timeline-service.principal", + "local_username": "${yarn-env/yarn_user}" + }, + "keytab": { + "file": "${keytab_dir}/yarn.service.keytab", + "owner": { + "name": "${yarn-env/yarn_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "yarn-site/yarn.timeline-service.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "yarn-site/yarn.timeline-service.http-authentication.kerberos.principal" + }, + "keytab": { + "configuration": "yarn-site/yarn.timeline-service.http-authentication.kerberos.keytab" + } + } + ] + } + ] + }, + { + "name": "MAPREDUCE2", + "identities": [ + { + "name": "/spnego" + }, + { + "name": "/hdfs" + }, + { + "name": "/smokeuser" + } + ], + "components": [ + { + "name": "HISTORYSERVER", + "identities": [ + { + "name": "history_server_jhs", + "principal": { + "value": "jhs/_HOST@${realm}", + "type" : "service", + "configuration": "mapred-site/mapreduce.jobhistory.principal", + "local_username": "${mapred-env/mapred_user}" + }, + "keytab": { + "file": "${keytab_dir}/jhs.service.keytab", + "owner": { + "name": "${mapred-env/mapred_user}", + "access": "r" + }, + "group": { + "name": "${cluster-env/user_group}", + "access": "" + }, + "configuration": "mapred-site/mapreduce.jobhistory.keytab" + } + }, + { + "name": "/spnego", + "principal": { + "configuration": "mapred-site/mapreduce.jobhistory.webapp.spnego-principal" + }, + "keytab": { + "configuration": "mapred-site/mapreduce.jobhistory.webapp.spnego-keytab-file" + } + } + ] + } + ] + } + ] +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/metainfo.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/metainfo.xml b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/metainfo.xml new file mode 100755 index 0000000..b28ee8a --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/metainfo.xml @@ -0,0 +1,264 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<metainfo> + <schemaVersion>2.0</schemaVersion> + <services> + <service> + <name>YARN</name> + <displayName>YARN</displayName> + <comment>Apache Hadoop NextGen MapReduce (YARN)</comment> + <version>2.7.1</version> + <components> + + <component> + <name>APP_TIMELINE_SERVER</name> + <displayName>App Timeline Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/application_timeline_server.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </component> + + <component> + <name>RESOURCEMANAGER</name> + <displayName>ResourceManager</displayName> + <category>MASTER</category> + <cardinality>1-2</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + <customCommands> + <customCommand> + <name>DECOMMISSION</name> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </customCommand> + <customCommand> + <name>REFRESHQUEUES</name> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </customCommand> + </customCommands> + <configuration-dependencies> + <config-type>capacity-scheduler</config-type> + </configuration-dependencies> + </component> + + <component> + <name>NODEMANAGER</name> + <displayName>NodeManager</displayName> + <category>SLAVE</category> + <cardinality>1+</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/nodemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </component> + + <component> + <name>YARN_CLIENT</name> + <displayName>YARN Client</displayName> + <category>CLIENT</category> + <cardinality>1+</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/yarn_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + <configFiles> + <configFile> + <type>xml</type> + <fileName>yarn-site.xml</fileName> + <dictionaryName>yarn-site</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>core-site.xml</fileName> + <dictionaryName>core-site</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>yarn-env.sh</fileName> + <dictionaryName>yarn-env</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>log4j.properties</fileName> + <dictionaryName>hdfs-log4j,yarn-log4j</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>capacity-scheduler.xml</fileName> + <dictionaryName>capacity-scheduler</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>hadoop-yarn</name> + </package> + <package> + <name>hadoop-mapreduce</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <commandScript> + <script>scripts/service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>HDFS</service> + </requiredServices> + + <configuration-dependencies> + <config-type>yarn-site</config-type> + <config-type>yarn-env</config-type> + <config-type>core-site</config-type> + <config-type>yarn-log4j</config-type> + </configuration-dependencies> + + <widgetsFileName>YARN_widgets.json</widgetsFileName> + <metricsFileName>YARN_metrics.json</metricsFileName> + + </service> + + <service> + <name>MAPREDUCE2</name> + <displayName>MapReduce2</displayName> + <comment>Apache Hadoop NextGen MapReduce (YARN)</comment> + <version>2.7.1.4.1</version> + <components> + <component> + <name>HISTORYSERVER</name> + <displayName>History Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>true</versionAdvertised> + <auto-deploy> + <enabled>true</enabled> + <co-locate>YARN/RESOURCEMANAGER</co-locate> + </auto-deploy> + <dependencies> + <dependency> + <name>HDFS/HDFS_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/historyserver.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </component> + + <component> + <name>MAPREDUCE2_CLIENT</name> + <displayName>MapReduce2 Client</displayName> + <category>CLIENT</category> + <cardinality>0+</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/mapreduce2_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + <configFiles> + <configFile> + <type>xml</type> + <fileName>mapred-site.xml</fileName> + <dictionaryName>mapred-site</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>core-site.xml</fileName> + <dictionaryName>core-site</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>mapred-env.sh</fileName> + <dictionaryName>mapred-env</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>hadoop-mapreduce</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <commandScript> + <script>scripts/mapred_service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>YARN</service> + </requiredServices> + + <configuration-dependencies> + <config-type>core-site</config-type> + <config-type>mapred-site</config-type> + <config-type>mapred-env</config-type> + </configuration-dependencies> + + <configuration-dir>configuration-mapred</configuration-dir> + <restartRequiredAfterRackChange>true</restartRequiredAfterRackChange> + <widgetsFileName>MAPREDUCE2_widgets.json</widgetsFileName> + <metricsFileName>MAPREDUCE2_metrics.json</metricsFileName> + + </service> + + </services> +</metainfo> http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanager_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanager_health.py new file mode 100755 index 0000000..c462081 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanager_health.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import json +import socket +import urllib2 +from ambari_commons import OSCheck +from ambari_commons.inet_utils import resolve_address + +RESULT_CODE_OK = 'OK' +RESULT_CODE_CRITICAL = 'CRITICAL' +RESULT_CODE_UNKNOWN = 'UNKNOWN' + +NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.address}}' +NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.https.address}}' +YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}' + +OK_MESSAGE = 'NodeManager Healthy' +CRITICAL_CONNECTION_MESSAGE = 'Connection failed to {0} ({1})' +CRITICAL_HTTP_STATUS_MESSAGE = 'HTTP {0} returned from {1} ({2})' +CRITICAL_NODEMANAGER_STATUS_MESSAGE = 'NodeManager returned an unexpected status of "{0}"' +CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager health from unexpected JSON response' + +NODEMANAGER_DEFAULT_PORT = 8042 + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (NODEMANAGER_HTTP_ADDRESS_KEY,NODEMANAGER_HTTPS_ADDRESS_KEY, + YARN_HTTP_POLICY_KEY) + + +def execute(parameters=None, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + parameters (dictionary): a mapping of parameter key to value + host_name (string): the name of this host where the alert is running + """ + result_code = RESULT_CODE_UNKNOWN + + if parameters is None: + return (result_code, ['There were no parameters supplied to the script.']) + + scheme = 'http' + http_uri = None + https_uri = None + http_policy = 'HTTP_ONLY' + + if NODEMANAGER_HTTP_ADDRESS_KEY in parameters: + http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY] + + if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters: + https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in parameters: + http_policy = parameters[YARN_HTTP_POLICY_KEY] + + # determine the right URI and whether to use SSL + uri = http_uri + if http_policy == 'HTTPS_ONLY': + scheme = 'https' + + if https_uri is not None: + uri = https_uri + + label = '' + url_response = None + node_healthy = 'false' + total_time = 0 + + # some yarn-site structures don't have the web ui address + if uri is None: + if host_name is None: + host_name = socket.getfqdn() + + uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT) + + if OSCheck.is_windows_family(): + uri_host, uri_port = uri.split(':') + # on windows 0.0.0.0 is invalid address to connect but on linux it resolved to 127.0.0.1 + uri_host = resolve_address(uri_host) + uri = '{0}:{1}'.format(uri_host, uri_port) + + query = "{0}://{1}/ws/v1/node/info".format(scheme,uri) + + try: + # execute the query for the JSON that includes templeton status + url_response = urllib2.urlopen(query) + except urllib2.HTTPError, httpError: + label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query, + str(httpError)) + + return (RESULT_CODE_CRITICAL, [label]) + except Exception, exception: + label = CRITICAL_CONNECTION_MESSAGE.format(query, str(exception)) + return (RESULT_CODE_CRITICAL, [label]) + + # URL response received, parse it + try: + json_response = json.loads(url_response.read()) + node_healthy = json_response['nodeInfo']['nodeHealthy'] + + # convert boolean to string + node_healthy = str(node_healthy) + except: + return (RESULT_CODE_CRITICAL, [query]) + finally: + if url_response is not None: + try: + url_response.close() + except: + pass + + # proper JSON received, compare against known value + if node_healthy.lower() == 'true': + result_code = RESULT_CODE_OK + label = OK_MESSAGE + else: + result_code = RESULT_CODE_CRITICAL + label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy) + + return (result_code, [label]) http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanagers_summary.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanagers_summary.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanagers_summary.py new file mode 100755 index 0000000..72fe644 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/alerts/alert_nodemanagers_summary.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import urllib2 +import json + +from ambari_commons.urllib_handlers import RefreshHeaderProcessor + +ERROR_LABEL = '{0} NodeManager{1} {2} unhealthy.' +OK_LABEL = 'All NodeManagers are healthy' + +NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}}' +NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}' +YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}' + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return NODEMANAGER_HTTP_ADDRESS_KEY, NODEMANAGER_HTTPS_ADDRESS_KEY, \ + YARN_HTTP_POLICY_KEY + + +def execute(parameters=None, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + parameters (dictionary): a mapping of parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if parameters is None: + return (('UNKNOWN', ['There were no parameters supplied to the script.'])) + + scheme = 'http' + http_uri = None + https_uri = None + http_policy = 'HTTP_ONLY' + + if NODEMANAGER_HTTP_ADDRESS_KEY in parameters: + http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY] + + if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters: + https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in parameters: + http_policy = parameters[YARN_HTTP_POLICY_KEY] + + # determine the right URI and whether to use SSL + uri = http_uri + if http_policy == 'HTTPS_ONLY': + scheme = 'https' + + if https_uri is not None: + uri = https_uri + + live_nodemanagers_qry = "{0}://{1}/jmx?qry=Hadoop:service=ResourceManager,name=RMNMInfo".format(scheme, uri) + + try: + live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry, "LiveNodeManagers")) + + unhealthy_count = 0 + + for nodemanager in live_nodemanagers: + health_report = nodemanager['State'] + if health_report == 'UNHEALTHY': + unhealthy_count += 1 + + if unhealthy_count == 0: + result_code = 'OK' + label = OK_LABEL + else: + result_code = 'CRITICAL' + if unhealthy_count == 1: + label = ERROR_LABEL.format(unhealthy_count, '', 'is') + else: + label = ERROR_LABEL.format(unhealthy_count, 's', 'are') + + except Exception, e: + label = str(e) + result_code = 'UNKNOWN' + + return ((result_code, [label])) + + +def get_value_from_jmx(query, jmx_property): + response = None + + try: + # use a customer header process that will look for the non-standard + # "Refresh" header and attempt to follow the redirect + url_opener = urllib2.build_opener(RefreshHeaderProcessor()) + response = url_opener.open(query) + + data = response.read() + data_dict = json.loads(data) + return data_dict["beans"][0][jmx_property] + finally: + if response is not None: + try: + response.close() + except: + pass http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/files/validateYarnComponentStatus.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/files/validateYarnComponentStatus.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/files/validateYarnComponentStatus.py new file mode 100755 index 0000000..862b4c2 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/files/validateYarnComponentStatus.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import optparse +import subprocess +import json + +RESOURCEMANAGER = 'rm' +NODEMANAGER = 'nm' +HISTORYSERVER = 'hs' + +STARTED_STATE = 'STARTED' +RUNNING_STATE = 'RUNNING' + +#Return reponse for given path and address +def getResponse(path, address, ssl_enabled): + + command = "curl" + httpGssnegotiate = "--negotiate" + userpswd = "-u:" + insecure = "-k"# This is smoke test, no need to check CA of server + if ssl_enabled: + url = 'https://' + address + path + else: + url = 'http://' + address + path + + command_with_flags = [command,httpGssnegotiate,userpswd,insecure,url] + + proc = subprocess.Popen(command_with_flags, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + response = json.loads(stdout) + if response == None: + print 'There is no response for url: ' + str(url) + raise Exception('There is no response for url: ' + str(url)) + return response + +#Verify that REST api is available for given component +def validateAvailability(component, path, addresses, ssl_enabled): + responses = {} + for address in addresses.split(','): + try: + responses[address] = getResponse(path, address, ssl_enabled) + except Exception as e: + print 'Error checking availability status of component.', e + + if not responses: + exit(1) + + is_valid = validateAvailabilityResponse(component, responses.values()[0]) + if not is_valid: + exit(1) + +#Validate component-specific response +def validateAvailabilityResponse(component, response): + try: + if component == RESOURCEMANAGER: + rm_state = response['clusterInfo']['state'] + if rm_state == STARTED_STATE: + return True + else: + print 'Resourcemanager is not started' + return False + + elif component == NODEMANAGER: + node_healthy = bool(response['nodeInfo']['nodeHealthy']) + if node_healthy: + return True + else: + return False + elif component == HISTORYSERVER: + hs_start_time = response['historyInfo']['startedOn'] + if hs_start_time > 0: + return True + else: + return False + else: + return False + except Exception as e: + print 'Error validation of availability response for ' + str(component), e + return False + +#Verify that component has required resources to work +def validateAbility(component, path, addresses, ssl_enabled): + responses = {} + for address in addresses.split(','): + try: + responses[address] = getResponse(path, address, ssl_enabled) + except Exception as e: + print 'Error checking ability of component.', e + + if not responses: + exit(1) + + is_valid = validateAbilityResponse(component, responses.values()[0]) + if not is_valid: + exit(1) + +#Validate component-specific response that it has required resources to work +def validateAbilityResponse(component, response): + try: + if component == RESOURCEMANAGER: + nodes = [] + if response.has_key('nodes') and not response['nodes'] == None and response['nodes'].has_key('node'): + nodes = response['nodes']['node'] + connected_nodes_count = len(nodes) + if connected_nodes_count == 0: + print 'There is no connected nodemanagers to resourcemanager' + return False + active_nodes = filter(lambda x: x['state'] == RUNNING_STATE, nodes) + active_nodes_count = len(active_nodes) + + if connected_nodes_count == 0: + print 'There is no connected active nodemanagers to resourcemanager' + return False + else: + return True + else: + return False + except Exception as e: + print 'Error validation of ability response', e + return False + +# +# Main. +# +def main(): + parser = optparse.OptionParser(usage="usage: %prog [options] component ") + parser.add_option("-p", "--port", dest="address", help="Host:Port for REST API of a desired component") + parser.add_option("-s", "--ssl", dest="ssl_enabled", help="Is SSL enabled for UI of component") + + (options, args) = parser.parse_args() + + component = args[0] + + address = options.address + ssl_enabled = (options.ssl_enabled) in 'true' + if component == RESOURCEMANAGER: + path = '/ws/v1/cluster/info' + elif component == NODEMANAGER: + path = '/ws/v1/node/info' + elif component == HISTORYSERVER: + path = '/ws/v1/history/info' + else: + parser.error("Invalid component") + + validateAvailability(component, path, address, ssl_enabled) + + if component == RESOURCEMANAGER: + path = '/ws/v1/cluster/nodes' + validateAbility(component, path, address, ssl_enabled) + +if __name__ == "__main__": + main() http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/__init__.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/__init__.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/__init__.py new file mode 100755 index 0000000..35de4bb --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/__init__.py @@ -0,0 +1,20 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/application_timeline_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/application_timeline_server.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/application_timeline_server.py new file mode 100755 index 0000000..5152cf9 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/application_timeline_server.py @@ -0,0 +1,139 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import stack_select +from resource_management.libraries.functions.version import compare_versions, format_stack_version +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties,\ + FILE_TYPE_XML +from resource_management.libraries.functions.format import format + +from yarn import yarn +from service import service + +class ApplicationTimelineServer(Script): + + def get_component_name(self): + return "hadoop-yarn-timelineserver" + + def install(self, env): + self.install_packages(env) + #self.configure(env) + + def configure(self, env): + import params + env.set_params(params) + yarn(name='apptimelineserver') + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-yarn-timelineserver", params.version) + #Execute(format("stack-select set hadoop-yarn-timelineserver {version}")) + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + service('timelineserver', action='start') + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('timelineserver', action='stop') + + def status(self, env): + import status_params + env.set_params(status_params) + Execute(format("mv {yarn_historyserver_pid_file_old} {yarn_historyserver_pid_file}"), + only_if = format("test -e {yarn_historyserver_pid_file_old}", user=status_params.yarn_user)) + functions.check_process_status(status_params.yarn_historyserver_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + props_value_check = {"yarn.timeline-service.enabled": "true", + "yarn.timeline-service.http-authentication.type": "kerberos", + "yarn.acl.enable": "true"} + props_empty_check = ["yarn.timeline-service.principal", + "yarn.timeline-service.keytab", + "yarn.timeline-service.http-authentication.kerberos.principal", + "yarn.timeline-service.http-authentication.kerberos.keytab"] + + props_read_check = ["yarn.timeline-service.keytab", + "yarn.timeline-service.http-authentication.kerberos.keytab"] + yarn_site_props = build_expectations('yarn-site', props_value_check, props_empty_check, + props_read_check) + + yarn_expectations ={} + yarn_expectations.update(yarn_site_props) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'yarn-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, yarn_expectations) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'yarn-site' not in security_params + or 'yarn.timeline-service.keytab' not in security_params['yarn-site'] + or 'yarn.timeline-service.principal' not in security_params['yarn-site']) \ + or 'yarn.timeline-service.http-authentication.kerberos.keytab' not in security_params['yarn-site'] \ + or 'yarn.timeline-service.http-authentication.kerberos.principal' not in security_params['yarn-site']: + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal are not set property."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.timeline-service.keytab'], + security_params['yarn-site']['yarn.timeline-service.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.timeline-service.http-authentication.kerberos.keytab'], + security_params['yarn-site']['yarn.timeline-service.http-authentication.kerberos.principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + +if __name__ == "__main__": + ApplicationTimelineServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/historyserver.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/historyserver.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/historyserver.py new file mode 100755 index 0000000..bb384e4 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/historyserver.py @@ -0,0 +1,155 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management.libraries.script.script import Script +from resource_management.libraries.resources.hdfs_resource import HdfsResource +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import stack_select +from resource_management.libraries.functions.check_process_status import check_process_status +from resource_management.libraries.functions.copy_tarball import copy_to_hdfs +from resource_management.libraries.functions.version import compare_versions, format_stack_version +from resource_management.libraries.functions.format import format +from resource_management.core.source import Template +from resource_management.core.logger import Logger +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties, \ + FILE_TYPE_XML + +from yarn import yarn +from service import service + +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl + +class HistoryServer(Script): + + def get_component_name(self): + return "hadoop-mapreduce-historyserver" + + def install(self, env): + self.install_packages(env) + + def configure(self, env): + import params + env.set_params(params) + yarn(name="historyserver") + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-mapreduce-historyserver", params.version) + # MC Hammer said, "Can't touch this" + copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) + copy_to_hdfs("slider", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) + params.HdfsResource(None, action="execute") + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + + # MC Hammer said, "Can't touch this" + resource_created = copy_to_hdfs( + "mapreduce", + params.user_group, + params.hdfs_user, + host_sys_prepped=params.host_sys_prepped) + resource_created = copy_to_hdfs( + "slider", + params.user_group, + params.hdfs_user, + host_sys_prepped=params.host_sys_prepped) or resource_created + if resource_created: + params.HdfsResource(None, action="execute") + service('historyserver', action='start', serviceName='mapreduce') + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('historyserver', action='stop', serviceName='mapreduce') + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.mapred_historyserver_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + expectations = {} + expectations.update(build_expectations('mapred-site', + None, + [ + 'mapreduce.jobhistory.keytab', + 'mapreduce.jobhistory.principal', + 'mapreduce.jobhistory.webapp.spnego-keytab-file', + 'mapreduce.jobhistory.webapp.spnego-principal' + ], + None)) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'mapred-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, expectations) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'mapred-site' not in security_params or + 'mapreduce.jobhistory.keytab' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.principal' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.webapp.spnego-keytab-file' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.webapp.spnego-principal' not in security_params['mapred-site']): + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal not set."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.mapred_user, + security_params['mapred-site']['mapreduce.jobhistory.keytab'], + security_params['mapred-site']['mapreduce.jobhistory.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.mapred_user, + security_params['mapred-site']['mapreduce.jobhistory.webapp.spnego-keytab-file'], + security_params['mapred-site']['mapreduce.jobhistory.webapp.spnego-principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + +if __name__ == "__main__": + HistoryServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapred_service_check.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapred_service_check.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapred_service_check.py new file mode 100755 index 0000000..db10343 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapred_service_check.py @@ -0,0 +1,80 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * + +class MapReduce2ServiceCheck(Script): + def service_check(self, env): + import params + env.set_params(params) + + jar_path = format("{hadoop_mapred2_jar_location}/{hadoopMapredExamplesJarName}") + input_file = format("/user/{smokeuser}/mapredsmokeinput") + output_file = format("/user/{smokeuser}/mapredsmokeoutput") + + cleanup_cmd = format("fs -rm -r -f {output_file} {input_file}") + create_file_cmd = format("fs -put /etc/passwd {input_file}") + test_cmd = format("fs -test -e {output_file}") + run_wordcount_job = format("jar {jar_path} wordcount {input_file} {output_file}") + + log_dir = format("{mapred_log_dir_prefix}/{smokeuser}") + Directory(log_dir, owner=params.smokeuser, create_parents=True) + + if params.security_enabled: + kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") + + Execute(kinit_cmd, + user=params.smokeuser + ) + + ExecuteHadoop(cleanup_cmd, + tries=1, + try_sleep=5, + user=params.smokeuser, + bin_dir=params.execute_path, + conf_dir=params.hadoop_conf_dir + ) + + ExecuteHadoop(create_file_cmd, + tries=1, + try_sleep=5, + user=params.smokeuser, + bin_dir=params.execute_path, + conf_dir=params.hadoop_conf_dir + ) + + ExecuteHadoop(run_wordcount_job, + tries=1, + try_sleep=5, + user=params.smokeuser, + bin_dir=params.execute_path, + conf_dir=params.hadoop_conf_dir, + logoutput=True + ) + + ExecuteHadoop(test_cmd, + user=params.smokeuser, + bin_dir=params.execute_path, + conf_dir=params.hadoop_conf_dir + ) + +if __name__ == "__main__": + MapReduce2ServiceCheck().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/1863c3b9/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapreduce2_client.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapreduce2_client.py b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapreduce2_client.py new file mode 100755 index 0000000..77fd49e --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.0/services/YARN/package/scripts/mapreduce2_client.py @@ -0,0 +1,56 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +import sys +from resource_management import * +from resource_management.libraries.functions import conf_select +from resource_management.libraries.functions import stack_select + +from yarn import yarn + +class MapReduce2Client(Script): + + def get_component_name(self): + return "hadoop-client" + + def pre_upgrade_restart(self, env, upgrade_type=None): + import params + env.set_params(params) + + if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-client", params.version) + #Execute(format("stack-select set hadoop-client {version}")) + + def install(self, env): + self.install_packages(env) + self.configure(env) + + def configure(self, env): + import params + env.set_params(params) + yarn() + + def status(self, env): + raise ClientComponentHasNoStatus() + +if __name__ == "__main__": + MapReduce2Client().execute()