[
https://issues.apache.org/jira/browse/AMBARI-12374?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14652913#comment-14652913
]
Vincent.He commented on AMBARI-12374:
-------------------------------------
I did more try,
1. Seems the error is not related to patch as I did these call, I can get the
data (had a typo before when I executed),
[root@h02 patch]# curl -s
http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
{
"beans" : [ {
"name" : "Hadoop:service=NameNode,name=NameNodeStatus",
"modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
"State" : "standby",
"NNRole" : "NameNode",
"HostAndPort" : "h03.bigdata.com:8020",
"SecurityEnabled" : false,
"LastHATransitionTime" : 1438594046119
} ]
}
[root@h02 patch]# curl -s
http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
{
"beans" : [ {
"name" : "Hadoop:service=NameNode,name=NameNodeStatus",
"modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
"State" : "active",
"NNRole" : "NameNode",
"HostAndPort" : "h02.bigdata.com:8020",
"SecurityEnabled" : false,
"LastHATransitionTime" : 1438594046591
} ]
}
[root@h02 patch]# curl -s
http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
{
"beans" : [ {
"name" : "Hadoop:service=NameNode,name=FSNamesystem",
"modelerType" : "FSNamesystem",
"tag.Context" : "dfs",
"tag.HAState" : "standby",
"tag.Hostname" : "h03.bigdata.com",
"MissingBlocks" : 0,
"MissingReplOneBlocks" : 0,
"ExpiredHeartbeats" : 0,
"TransactionsSinceLastCheckpoint" : -756,
"TransactionsSinceLastLogRoll" : 0,
"LastWrittenTransactionId" : 5760,
"LastCheckpointTime" : 1438637246806,
"CapacityTotal" : 377945479446528,
"CapacityTotalGB" : 351989.0,
"CapacityUsed" : 2162847744,
"CapacityUsedGB" : 2.0,
"CapacityRemaining" : 374078076620800,
"CapacityRemainingGB" : 348387.0,
"CapacityUsedNonDFS" : 3865239977984,
"TotalLoad" : 16,
"SnapshottableDirectories" : 0,
"Snapshots" : 0,
"BlocksTotal" : 588,
"FilesTotal" : 825,
"PendingReplicationBlocks" : 0,
"UnderReplicatedBlocks" : 0,
"CorruptBlocks" : 0,
"ScheduledReplicationBlocks" : 0,
"PendingDeletionBlocks" : 0,
"ExcessBlocks" : 0,
"PostponedMisreplicatedBlocks" : 0,
"PendingDataNodeMessageCount" : 0,
"MillisSinceLastLoadedEdits" : 49071,
"BlockCapacity" : 2097152,
"StaleDataNodes" : 0,
"TotalFiles" : 825
} ]
}
[root@h02 patch]# curl -s
http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
{
"beans" : [ {
"name" : "Hadoop:service=NameNode,name=FSNamesystem",
"modelerType" : "FSNamesystem",
"tag.Context" : "dfs",
"tag.HAState" : "active",
"tag.Hostname" : "h02.bigdata.com",
"MissingBlocks" : 0,
"MissingReplOneBlocks" : 0,
"ExpiredHeartbeats" : 0,
"TransactionsSinceLastCheckpoint" : 227,
"TransactionsSinceLastLogRoll" : 1,
"LastWrittenTransactionId" : 6743,
"LastCheckpointTime" : 1438637246983,
"CapacityTotal" : 377945479446528,
"CapacityTotalGB" : 351989.0,
"CapacityUsed" : 2162847744,
"CapacityUsedGB" : 2.0,
"CapacityRemaining" : 374078076620800,
"CapacityRemainingGB" : 348387.0,
"CapacityUsedNonDFS" : 3865239977984,
"TotalLoad" : 16,
"SnapshottableDirectories" : 0,
"Snapshots" : 0,
"BlocksTotal" : 588,
"FilesTotal" : 825,
"PendingReplicationBlocks" : 0,
"UnderReplicatedBlocks" : 0,
"CorruptBlocks" : 0,
"ScheduledReplicationBlocks" : 0,
"PendingDeletionBlocks" : 0,
"ExcessBlocks" : 0,
"PostponedMisreplicatedBlocks" : 0,
"PendingDataNodeMessageCount" : 0,
"MillisSinceLastLoadedEdits" : 0,
"BlockCapacity" : 2097152,
"StaleDataNodes" : 0,
"TotalFiles" : 825
} ]
}
2. I check carefully and found the exception I met is different from the one
reported, my exception indicates there is decode error.
File
"/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py",
line 40, in get_value_from_jmx
data_dict = json.loads(data)
File "/usr/lib/python2.6/site-packages/ambari_simplejson/_init_.py", line 307,
in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line
335, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line
353, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Seems this is a new issue. from the JSON object I got, anyone know what's the
error ? How to workaround this? thanks
> Unable to Start NameNode in HA Mode On HDP 2.0
> ----------------------------------------------
>
> Key: AMBARI-12374
> URL: https://issues.apache.org/jira/browse/AMBARI-12374
> Project: Ambari
> Issue Type: Bug
> Components: ambari-server
> Reporter: Dmitry Lysnichenko
> Assignee: Dmitry Lysnichenko
> Fix For: 2.1.1
>
> Attachments: AMBARI-12374.patch
>
>
> When starting an HA NameNode cluster on HDP 2.0, the following error is seen:
> {code}
> 2015-07-07 16:02:56,371 - Getting jmx metrics from NN failed. URL:
> http://c6401.ambari.apache.org:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> Traceback (most recent call last):
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py",
> line 41, in get_value_from_jmx
> return data_dict["beans"][0][property]
> IndexError: list index out of range
> 2015-07-07 16:02:56,396 - Getting jmx metrics from NN failed. URL:
> http://c6402.ambari.apache.org:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> Traceback (most recent call last):
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py",
> line 41, in get_value_from_jmx
> return data_dict["beans"][0][property]
> IndexError: list index out of range
> Traceback (most recent call last):
> File
> "/var/lib/ambari-agent/cache/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py",
> line 316, in <module>
> NameNode().execute()
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py",
> line 216, in execute
> method(env)
> File
> "/var/lib/ambari-agent/cache/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py",
> line 81, in start
> namenode(action="start", rolling_restart=rolling_restart, env=env)
> File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py",
> line 89, in thunk
> return fn(*args, **kwargs)
> File
> "/var/lib/ambari-agent/cache/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py",
> line 141, in namenode
> create_hdfs_directories(is_active_namenode_cmd)
> File
> "/var/lib/ambari-agent/cache/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py",
> line 198, in create_hdfs_directories
> only_if=check
> File "/usr/lib/python2.6/site-packages/resource_management/core/base.py",
> line 157, in __init__
> self.env.run()
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
> line 152, in run
> self.run_action(resource, action)
> File
> "/usr/lib/python2.6/site-packages/resource_management/core/environment.py",
> line 118, in run_action
> provider_action()
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
> line 390, in action_create_on_execute
> self.action_delayed("create")
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
> line 387, in action_delayed
> self.get_hdfs_resource_executor().action_delayed(action_name, self)
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
> line 239, in action_delayed
> main_resource.resource.security_enabled, main_resource.resource.logoutput)
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
> line 126, in __init__
> security_enabled, run_user)
> File
> "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/namenode_ha_utils.py",
> line 113, in get_property_for_active_namenode
> raise Fail("There is no active namenodes.")
> resource_management.core.exceptions.Fail: There is no active namenodes.
> {code}
> Although the NameNode does actually start, a failure is recorded in the
> request, stopping the rest of the cluster from coming up. This probably
> because the JMX properties for Active and Standby NameNode are different in
> HDP 2.0 vs HDP 2.1+:
> {code:title=active jmx}
> {
> "name" : "Hadoop:service=NameNode,name=FSNamesystem",
> "modelerType" : "FSNamesystem",
> "tag.Context" : "dfs",
> "tag.HAState" : "active",
> {code}
> {code:title=standby jmx}
> {
> "name" : "Hadoop:service=NameNode,name=FSNamesystem",
> "modelerType" : "FSNamesystem",
> "tag.Context" : "dfs",
> "tag.HAState" : "standby",
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)