[
https://issues.apache.org/jira/browse/AMBARI-11571?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yusaku Sako updated AMBARI-11571:
---------------------------------
Description:
When using AMS in distributed mode, we went to issue a stop-all in order to
perform a Namenode move. Since AMS in distributed mode depends on HDFS, the AMS
stop hung since HDFS was unreachable. We had to manually kill AMS and “retry”
the NN move wizard.
{code}
stderr:
2015-05-25 14:35:54,589 - Error while executing command 'stop':
Traceback (most recent call last):
File
"/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py",
line 214, in execute
method(env)
File
"/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py",
line 55, in stop
self.configure(env) # for security
File
"/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py",
line 39, in configure
hbase('master')
File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py", line
89, in thunk
return fn(*args, **kwargs)
File
"/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase.py",
line 197, in hbase
params.HdfsDirectory(None, action="create")
File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line
148, in _init_
self.env.run()
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 152, in run
self.run_action(resource, action)
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 118, in run_action
provider_action()
File
"/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_directory.py",
line 107, in action_create
not_if=as_user(format("hadoop --config
{hdp_conf_dir}
fs -ls
{dir_list_str}
"), hdp_hdfs_user)
File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line
148, in _init_
self.env.run()
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 152, in run
self.run_action(resource, action)
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 118, in run_action
provider_action()
File
"/usr/lib/python2.6/site-packages/resource_management/core/providers/system.py",
line 274, in action_run
raise ex
Fail: Execution of 'hadoop --config /etc/hadoop/conf fs -mkdir -p
hdfs://cluster:8020/ams-hbase /apps/hbase/staging && hadoop --config
/etc/hadoop/conf fs -chmod 775 hdfs://cluster:8020/ams-hbase && hadoop --config
/etc/hadoop/conf fs -chmod 711 /apps/hbase/staging && hadoop --config
/etc/hadoop/conf fs -chown ams hdfs://cluster:8020/ams-hbase
/apps/hbase/staging' returned 1. 15/05/25 14:29:32 INFO
retry.RetryInvocationHandler: Exception while invoking getFileInfo of class
ClientNamenodeProtocolTranslatorPB over
nn00034.blue.ygrid.kks.yahoo.co.jp/100.97.64.62:8020 after 1 fail over
attempts. Trying to fail over after sleeping for 760ms.
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.StandbyException):
Operation category READ is not supported in state standby
{code}
was:
When using AMS in distributed mode, we went to issue a stop-all in order to
perform a Namenode move. Since AMS in distributed mode depends on HDFS, the AMS
stop hung since HDFS was unreachable. We had to manually kill AMS and “retry”
the NN move wizard.
{code}
stderr:
2015-05-25 14:35:54,589 - Error while executing command 'stop':
Traceback (most recent call last):
File
"/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py",
line 214, in execute
method(env)
File
"/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py",
line 55, in stop
self.configure(env) # for security
File
"/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py",
line 39, in configure
hbase('master')
File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py", line
89, in thunk
return fn(*args, **kwargs)
File
"/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase.py",
line 197, in hbase
params.HdfsDirectory(None, action="create")
File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line
148, in _init_
self.env.run()
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 152, in run
self.run_action(resource, action)
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 118, in run_action
provider_action()
File
"/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_directory.py",
line 107, in action_create
not_if=as_user(format("hadoop --config
{hdp_conf_dir}
fs -ls
{dir_list_str}
"), hdp_hdfs_user)
File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line
148, in _init_
self.env.run()
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 152, in run
self.run_action(resource, action)
File
"/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
line 118, in run_action
provider_action()
File
"/usr/lib/python2.6/site-packages/resource_management/core/providers/system.py",
line 274, in action_run
raise ex
Fail: Execution of 'hadoop --config /etc/hadoop/conf fs -mkdir -p
hdfs://muonblue:8020/ams-hbase /apps/hbase/staging && hadoop --config
/etc/hadoop/conf fs -chmod 775 hdfs://muonblue:8020/ams-hbase && hadoop
--config /etc/hadoop/conf fs -chmod 711 /apps/hbase/staging && hadoop --config
/etc/hadoop/conf fs -chown ams hdfs://muonblue:8020/ams-hbase
/apps/hbase/staging' returned 1. 15/05/25 14:29:32 INFO
retry.RetryInvocationHandler: Exception while invoking getFileInfo of class
ClientNamenodeProtocolTranslatorPB over
nn00034.blue.ygrid.kks.yahoo.co.jp/100.97.64.62:8020 after 1 fail over
attempts. Trying to fail over after sleeping for 760ms.
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.StandbyException):
Operation category READ is not supported in state standby
{code}
> Move NameNode with AMS in distributed mode - Stop All fails for AMS
> -------------------------------------------------------------------
>
> Key: AMBARI-11571
> URL: https://issues.apache.org/jira/browse/AMBARI-11571
> Project: Ambari
> Issue Type: Bug
> Components: ambari-server
> Affects Versions: 2.1.0
> Reporter: Siddharth Wagle
> Assignee: Siddharth Wagle
> Priority: Critical
> Fix For: 2.1.0
>
>
> When using AMS in distributed mode, we went to issue a stop-all in order to
> perform a Namenode move. Since AMS in distributed mode depends on HDFS, the
> AMS stop hung since HDFS was unreachable. We had to manually kill AMS and
> “retry” the NN move wizard.
> {code}
> stderr:
> 2015-05-25 14:35:54,589 - Error while executing command 'stop':
> Traceback (most recent call last):
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py",
> line 214, in execute
> method(env)
> File
> "/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py",
> line 55, in stop
> self.configure(env) # for security
> File
> "/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py",
> line 39, in configure
> hbase('master')
> File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py",
> line 89, in thunk
> return fn(*args, **kwargs)
> File
> "/var/lib/ambari-agent/cache/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase.py",
> line 197, in hbase
> params.HdfsDirectory(None, action="create")
> File "/usr/lib/python2.6/site-packages/resource_management/core/base.py",
> line 148, in _init_
> self.env.run()
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
> line 152, in run
> self.run_action(resource, action)
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
> line 118, in run_action
> provider_action()
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_directory.py",
> line 107, in action_create
> not_if=as_user(format("hadoop --config
> {hdp_conf_dir}
> fs -ls
> {dir_list_str}
> "), hdp_hdfs_user)
> File "/usr/lib/python2.6/site-packages/resource_management/core/base.py",
> line 148, in _init_
> self.env.run()
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
> line 152, in run
> self.run_action(resource, action)
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
> line 118, in run_action
> provider_action()
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/providers/system.py",
> line 274, in action_run
> raise ex
> Fail: Execution of 'hadoop --config /etc/hadoop/conf fs -mkdir -p
> hdfs://cluster:8020/ams-hbase /apps/hbase/staging && hadoop --config
> /etc/hadoop/conf fs -chmod 775 hdfs://cluster:8020/ams-hbase && hadoop
> --config /etc/hadoop/conf fs -chmod 711 /apps/hbase/staging && hadoop
> --config /etc/hadoop/conf fs -chown ams hdfs://cluster:8020/ams-hbase
> /apps/hbase/staging' returned 1. 15/05/25 14:29:32 INFO
> retry.RetryInvocationHandler: Exception while invoking getFileInfo of class
> ClientNamenodeProtocolTranslatorPB over
> nn00034.blue.ygrid.kks.yahoo.co.jp/100.97.64.62:8020 after 1 fail over
> attempts. Trying to fail over after sleeping for 760ms.
> org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.StandbyException):
> Operation category READ is not supported in state standby
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)