[
https://issues.apache.org/jira/browse/AMBARI-23904?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Andrii Babiichuk updated AMBARI-23904:
--------------------------------------
Description:
STR:
- Deploy a cluster with 2 namespaces using blueprint.
- Use the UI wizard to move Active Namenode for namespace NS2
- Perform manual operations when prompted in the wizard (FormatZkfc on other 3
hosts, perform bootstrapStandby on the new NN)
- In the final step, start all services, ZKFC fails to start on the host where
we moved the NN
{code}
Traceback (most recent call last):
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
line 192, in <module>
ZkfcSlave().execute()
File
"/usr/lib/ambari-agent/lib/resource_management/libraries/script/script.py",
line 353, in execute
method(env)
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
line 71, in start
ZkfcSlaveDefault.start_static(env, upgrade_type)
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
line 96, in start_static
create_log_dir=True
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/utils.py",
line 258, in service
Execute(daemon_cmd, not_if=process_id_exists_command,
environment=hadoop_env_exports)
File "/usr/lib/ambari-agent/lib/resource_management/core/base.py", line 166,
in __init__
self.env.run()
File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py",
line 160, in run
self.run_action(resource, action)
File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py",
line 124, in run_action
provider_action()
File
"/usr/lib/ambari-agent/lib/resource_management/core/providers/system.py", line
263, in action_run
returns=self.resource.returns)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 72,
in inner
result = function(command, **kwargs)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 102,
in checked_call
tries=tries, try_sleep=try_sleep,
timeout_kill_strategy=timeout_kill_strategy, returns=returns)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 150,
in _call_wrapper
result = _call(command, **kwargs_copy)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 308,
in _call
raise ExecutionFailed(err_msg, code, out, err)
resource_management.core.exceptions.ExecutionFailed: Execution of
'ambari-sudo.sh su cstm-hdfs -l -s /bin/bash -c 'ulimit -c unlimited ;
/usr/hdp/3.0.0.0-1316/hadoop/bin/hdfs --config
/usr/hdp/3.0.0.0-1316/hadoop/conf --daemon start zkfc'' returned 1. ########
Hortonworks #############
This is MOTD message, added for testing in qe infra
WARNING: HADOOP_ZKFC_OPTS has been replaced by HDFS_ZKFC_OPTS. Using value of
HADOOP_ZKFC_OPTS.
{code}
was:
STR:
-> Deploy a cluster with 2 namespaces using blueprint.
-> Use the UI wizard to move Active Namenode for namespace NS2
-> Perform manual operations when prompted in the wizard (FormatZkfc on other 3
hosts, perform bootstrapStandby on the new NN)
-> In the final step, start all services, ZKFC fails to start on the host where
we moved the NN
{code}
Traceback (most recent call last):
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
line 192, in <module>
ZkfcSlave().execute()
File
"/usr/lib/ambari-agent/lib/resource_management/libraries/script/script.py",
line 353, in execute
method(env)
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
line 71, in start
ZkfcSlaveDefault.start_static(env, upgrade_type)
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
line 96, in start_static
create_log_dir=True
File
"/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/utils.py",
line 258, in service
Execute(daemon_cmd, not_if=process_id_exists_command,
environment=hadoop_env_exports)
File "/usr/lib/ambari-agent/lib/resource_management/core/base.py", line 166,
in __init__
self.env.run()
File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py",
line 160, in run
self.run_action(resource, action)
File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py",
line 124, in run_action
provider_action()
File
"/usr/lib/ambari-agent/lib/resource_management/core/providers/system.py", line
263, in action_run
returns=self.resource.returns)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 72,
in inner
result = function(command, **kwargs)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 102,
in checked_call
tries=tries, try_sleep=try_sleep,
timeout_kill_strategy=timeout_kill_strategy, returns=returns)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 150,
in _call_wrapper
result = _call(command, **kwargs_copy)
File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line 308,
in _call
raise ExecutionFailed(err_msg, code, out, err)
resource_management.core.exceptions.ExecutionFailed: Execution of
'ambari-sudo.sh su cstm-hdfs -l -s /bin/bash -c 'ulimit -c unlimited ;
/usr/hdp/3.0.0.0-1316/hadoop/bin/hdfs --config
/usr/hdp/3.0.0.0-1316/hadoop/conf --daemon start zkfc'' returned 1. ########
Hortonworks #############
This is MOTD message, added for testing in qe infra
WARNING: HADOOP_ZKFC_OPTS has been replaced by HDFS_ZKFC_OPTS. Using value of
HADOOP_ZKFC_OPTS.
{code}
> ZKFC fails to start while moving Namenode on a cluster with multiple
> namespaces
> -------------------------------------------------------------------------------
>
> Key: AMBARI-23904
> URL: https://issues.apache.org/jira/browse/AMBARI-23904
> Project: Ambari
> Issue Type: Bug
> Components: ambari-web
> Affects Versions: 2.7.0
> Reporter: Andrii Babiichuk
> Assignee: Andrii Babiichuk
> Priority: Blocker
> Fix For: 2.7.0
>
>
> STR:
> - Deploy a cluster with 2 namespaces using blueprint.
> - Use the UI wizard to move Active Namenode for namespace NS2
> - Perform manual operations when prompted in the wizard (FormatZkfc on other
> 3 hosts, perform bootstrapStandby on the new NN)
> - In the final step, start all services, ZKFC fails to start on the host
> where we moved the NN
> {code}
> Traceback (most recent call last):
> File
> "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
> line 192, in <module>
> ZkfcSlave().execute()
> File
> "/usr/lib/ambari-agent/lib/resource_management/libraries/script/script.py",
> line 353, in execute
> method(env)
> File
> "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
> line 71, in start
> ZkfcSlaveDefault.start_static(env, upgrade_type)
> File
> "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/zkfc_slave.py",
> line 96, in start_static
> create_log_dir=True
> File
> "/var/lib/ambari-agent/cache/stacks/HDP/3.0/services/HDFS/package/scripts/utils.py",
> line 258, in service
> Execute(daemon_cmd, not_if=process_id_exists_command,
> environment=hadoop_env_exports)
> File "/usr/lib/ambari-agent/lib/resource_management/core/base.py", line
> 166, in __init__
> self.env.run()
> File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py",
> line 160, in run
> self.run_action(resource, action)
> File "/usr/lib/ambari-agent/lib/resource_management/core/environment.py",
> line 124, in run_action
> provider_action()
> File
> "/usr/lib/ambari-agent/lib/resource_management/core/providers/system.py",
> line 263, in action_run
> returns=self.resource.returns)
> File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line
> 72, in inner
> result = function(command, **kwargs)
> File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line
> 102, in checked_call
> tries=tries, try_sleep=try_sleep,
> timeout_kill_strategy=timeout_kill_strategy, returns=returns)
> File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line
> 150, in _call_wrapper
> result = _call(command, **kwargs_copy)
> File "/usr/lib/ambari-agent/lib/resource_management/core/shell.py", line
> 308, in _call
> raise ExecutionFailed(err_msg, code, out, err)
> resource_management.core.exceptions.ExecutionFailed: Execution of
> 'ambari-sudo.sh su cstm-hdfs -l -s /bin/bash -c 'ulimit -c unlimited ;
> /usr/hdp/3.0.0.0-1316/hadoop/bin/hdfs --config
> /usr/hdp/3.0.0.0-1316/hadoop/conf --daemon start zkfc'' returned 1. ########
> Hortonworks #############
> This is MOTD message, added for testing in qe infra
> WARNING: HADOOP_ZKFC_OPTS has been replaced by HDFS_ZKFC_OPTS. Using value of
> HADOOP_ZKFC_OPTS.
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)