On some error conditions related to interface removal, the PIM callbacks would
not handle the next task, and so nothing would ever look at the task queue
again, effectively hanging the multicast routing daemon.
The attached patch fixes it. I'll open a bugzilla entry as well.
Thanks,
Ben
--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com
diff --git a/fea/xrl_fea_target.cc b/fea/xrl_fea_target.cc
index dc7f663..310ba54 100644
--- a/fea/xrl_fea_target.cc
+++ b/fea/xrl_fea_target.cc
@@ -3514,6 +3514,8 @@ XrlFeaTarget::raw_packet4_0_1_send(
payload, error_msg)
!= XORP_OK) {
assert(error_msg.size()); // don't allow empty error messages.
+ //XLOG_ERROR("Failed raw_packet4_0_1_send, iface: %s/%s error_msg:
%s\n",
+ // if_name.c_str(), vif_name.c_str(), error_msg.c_str());
return XrlCmdError::COMMAND_FAILED(error_msg);
}
diff --git a/pim/pim_node.cc b/pim/pim_node.cc
index 3d1d233..765b5e8 100644
--- a/pim/pim_node.cc
+++ b/pim/pim_node.cc
@@ -1521,7 +1521,8 @@ PimNode::pim_send(const string& if_name,
string& error_msg)
{
if (! (is_up() || is_pending_down())) {
- error_msg = c_format("MLD/IGMP node is not UP");
+ error_msg = c_format("PimNode::pim_send MLD/IGMP node is not UP");
+ XLOG_ERROR("%s", error_msg.c_str());
return (XORP_ERROR);
}
diff --git a/pim/pim_proto_hello.cc b/pim/pim_proto_hello.cc
index f5bbd19..537b545 100644
--- a/pim/pim_proto_hello.cc
+++ b/pim/pim_proto_hello.cc
@@ -119,6 +119,10 @@ PimVif::pim_hello_recv(PimNbr *pim_nbr,
list<IPvX> secondary_addr_list;
list<IPvX>::iterator addr_list_iter;
int rcvd_family;
+
+ //XLOG_WARNING("pim_hello_recv: RX %s from %s to %s: ",
+ // PIMTYPE2ASCII(PIM_HELLO),
+ // cstring(src), cstring(dst));
//
// Parse the message
@@ -669,8 +673,8 @@ PimVif::pim_hello_send(string& error_msg)
}
}
- return (pim_send(primary_addr(), IPvX::PIM_ROUTERS(family()),
- PIM_HELLO, buffer, error_msg));
+ return pim_send(primary_addr(), IPvX::PIM_ROUTERS(family()),
+ PIM_HELLO, buffer, error_msg);
invalid_addr_family_error:
XLOG_UNREACHABLE();
diff --git a/pim/pim_vif.cc b/pim/pim_vif.cc
index 0719452..7737bdb 100644
--- a/pim/pim_vif.cc
+++ b/pim/pim_vif.cc
@@ -577,8 +577,11 @@ PimVif::pim_send(const IPvX& src, const IPvX& dst,
int ttl = MINTTL;
bool ip_internet_control = true; // XXX: might be overwritten below
- if (! (is_up() || is_pending_down()))
+ if (! (is_up() || is_pending_down())) {
+ error_msg += "Interface: " + name() + " is down or pending down when
trying pim_send\n";
+ XLOG_WARNING("%s", error_msg.c_str());
return (XORP_ERROR);
+ }
//
// Some of the messages should never be send via the PIM Register vif
@@ -591,6 +594,7 @@ PimVif::pim_send(const IPvX& src, const IPvX& dst,
case PIM_ASSERT:
case PIM_GRAFT:
case PIM_GRAFT_ACK:
+ error_msg += "Invalid message_type, is_pim_register == true\n";
return (XORP_ERROR); // Those messages are not allowed
case PIM_REGISTER:
case PIM_REGISTER_STOP:
@@ -730,12 +734,12 @@ PimVif::pim_send(const IPvX& src, const IPvX& dst,
BUFFER_COPYPUT_INET_CKSUM(cksum, buffer, 2); // XXX: the checksum
XLOG_TRACE(pim_node().is_log_trace(),
- "pim_send: TX %s from %s to %s on vif %s",
- PIMTYPE2ASCII(message_type),
- cstring(src),
- cstring(dst),
- name().c_str());
-
+ "pim_send: TX %s from %s to %s on vif %s",
+ PIMTYPE2ASCII(message_type),
+ cstring(src),
+ cstring(dst),
+ name().c_str());
+
//
// Send the message
//
diff --git a/pim/xrl_pim_node.cc b/pim/xrl_pim_node.cc
index ff8f584..daf789e 100644
--- a/pim/xrl_pim_node.cc
+++ b/pim/xrl_pim_node.cc
@@ -487,8 +487,6 @@
XrlPimNode::finder_send_register_unregister_interest_cb(const XrlError& xrl_erro
PimNode::decr_shutdown_requests_n(); // XXX: for the MFEA
}
}
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
@@ -518,8 +516,6 @@
XrlPimNode::finder_send_register_unregister_interest_cb(const XrlError& xrl_erro
if (entry->target_name() == _mfea_target) {
_is_mfea_registered = false;
}
- pop_xrl_task();
- send_xrl_task();
}
break;
@@ -543,8 +539,11 @@
XrlPimNode::finder_send_register_unregister_interest_cb(const XrlError& xrl_erro
"Will try again.",
entry->operation_name(), xrl_error.str().c_str());
retry_xrl_task();
- break;
+ return;
}
+
+ pop_xrl_task();
+ send_xrl_task();
}
//
@@ -1130,8 +1129,6 @@
XrlPimNode::fea_client_send_register_unregister_receiver_cb(
else {
PimNode::decr_shutdown_requests_n(); // XXX: for FEA-non-receiver
}
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
@@ -1156,8 +1153,6 @@
XrlPimNode::fea_client_send_register_unregister_receiver_cb(
XLOG_ERROR("XRL communication error: %s", xrl_error.str().c_str());
} else {
PimNode::decr_shutdown_requests_n(); // XXX: for FEA-non-receiver
- pop_xrl_task();
- send_xrl_task();
}
break;
@@ -1181,8 +1176,10 @@
XrlPimNode::fea_client_send_register_unregister_receiver_cb(
"Will try again.",
entry->operation_name(), xrl_error.str().c_str());
retry_xrl_task();
- break;
+ return;
}
+ pop_xrl_task();
+ send_xrl_task();
}
int
@@ -1319,8 +1316,6 @@
XrlPimNode::mfea_client_send_register_unregister_protocol_cb(
PimNode::decr_startup_requests_n(); // XXX: for MFEA-protocol
else
PimNode::decr_shutdown_requests_n(); // XXX: for MFEA-non-protocol
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
@@ -1346,8 +1341,6 @@
XrlPimNode::mfea_client_send_register_unregister_protocol_cb(
XLOG_ERROR("XRL communication error: %s", xrl_error.str().c_str());
} else {
PimNode::decr_shutdown_requests_n(); // XXX: for MFEA-non-protocol
- pop_xrl_task();
- send_xrl_task();
}
break;
@@ -1371,8 +1364,10 @@
XrlPimNode::mfea_client_send_register_unregister_protocol_cb(
"Will try again.",
entry->operation_name(), xrl_error.str().c_str());
retry_xrl_task();
- break;
+ return;
}
+ pop_xrl_task();
+ send_xrl_task();
}
int
@@ -1515,8 +1510,6 @@ XrlPimNode::fea_client_send_join_leave_multicast_group_cb(
PimNode::decr_startup_requests_n(); // XXX: for FEA-join
else
PimNode::decr_shutdown_requests_n(); // XXX: for FEA-leave
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
@@ -1524,7 +1517,7 @@ XrlPimNode::fea_client_send_join_leave_multicast_group_cb(
// If a command failed because the other side rejected it, this is
// fatal.
//
- XLOG_FATAL("Cannot %s a multicast group with the FEA: %s",
+ XLOG_WARNING("Cannot %s a multicast group with the FEA: %s",
entry->operation_name(),
xrl_error.str().c_str());
break;
@@ -1542,8 +1535,6 @@ XrlPimNode::fea_client_send_join_leave_multicast_group_cb(
XLOG_ERROR("XRL communication error: %s", xrl_error.str().c_str());
} else {
PimNode::decr_shutdown_requests_n(); // XXX: for FEA-leave
- pop_xrl_task();
- send_xrl_task();
}
break;
@@ -1572,8 +1563,10 @@
XrlPimNode::fea_client_send_join_leave_multicast_group_cb(
entry->vif_name().c_str(),
xrl_error.str().c_str());
retry_xrl_task();
- break;
+ return;
}
+ pop_xrl_task();
+ send_xrl_task();
}
int
@@ -1714,8 +1707,6 @@ XrlPimNode::mfea_client_send_add_delete_mfc_cb(
//
// If success, then schedule the next task
//
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
@@ -1726,8 +1717,6 @@ XrlPimNode::mfea_client_send_add_delete_mfc_cb(
XLOG_ERROR("Cannot %s a multicast forwarding entry with the MFEA: %s",
entry->operation_name(),
xrl_error.str().c_str());
- pop_xrl_task();
- send_xrl_task();
break;
case NO_FINDER:
@@ -1763,8 +1752,10 @@ XrlPimNode::mfea_client_send_add_delete_mfc_cb(
"Will try again.",
xrl_error.str().c_str());
retry_xrl_task();
- break;
+ return;
}
+ pop_xrl_task();
+ send_xrl_task();
}
int
@@ -1990,16 +1981,12 @@
XrlPimNode::mfea_client_send_add_delete_dataflow_monitor_cb(
//
// If success, then schedule the next task
//
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
XLOG_ERROR("Cannot %s dataflow monitor entry with the MFEA: %s",
entry->operation_name(),
xrl_error.str().c_str());
- pop_xrl_task();
- send_xrl_task();
break;
case NO_FINDER:
@@ -2035,8 +2022,10 @@
XrlPimNode::mfea_client_send_add_delete_dataflow_monitor_cb(
entry->operation_name(),
xrl_error.str().c_str());
retry_xrl_task();
- break;
+ return;
}
+ pop_xrl_task();
+ send_xrl_task();
}
int
@@ -2344,8 +2333,10 @@ XrlPimNode::send_protocol_message()
{
bool success = true;
- if (! _is_finder_alive)
+ if (! _is_finder_alive) {
+ XLOG_ERROR("ERROR: XrlPimNode::send_protocol_message, finder is NOT
alive!\n");
return; // The Finder is dead
+ }
XLOG_ASSERT(! _xrl_tasks_queue.empty());
XrlTaskBase* xrl_task_base = _xrl_tasks_queue.front();
@@ -2354,10 +2345,15 @@ XrlPimNode::send_protocol_message()
entry = dynamic_cast<SendProtocolMessage*>(xrl_task_base);
XLOG_ASSERT(entry != NULL);
+ //XLOG_ERROR("XrlPimNode::send_protocol_message interface/vif %s/%s. ",
+ // entry->if_name().c_str(),
+ // entry->vif_name().c_str());
+
//
// Check whether we have already registered with the FEA
//
if (! _is_fea_registered) {
+ XLOG_ERROR("ERROR: XrlPimNode::send_protocol_message, finder is NOT
registered!\n");
retry_xrl_task();
return;
}
@@ -2441,8 +2437,6 @@ XrlPimNode::fea_client_send_protocol_message_cb(const
XrlError& xrl_error)
//
// If success, then schedule the next task
//
- pop_xrl_task();
- send_xrl_task();
break;
case COMMAND_FAILED:
@@ -2460,8 +2454,6 @@ XrlPimNode::fea_client_send_protocol_message_cb(const
XrlError& xrl_error)
//
XLOG_ERROR("Cannot send a protocol message: %s",
xrl_error.str().c_str());
- pop_xrl_task();
- send_xrl_task();
break;
case NO_FINDER:
@@ -2475,8 +2467,6 @@ XrlPimNode::fea_client_send_protocol_message_cb(const
XrlError& xrl_error)
//
XLOG_ERROR("Cannot send a protocol message: %s",
xrl_error.str().c_str());
- pop_xrl_task();
- send_xrl_task();
break;
case BAD_ARGS:
@@ -2500,10 +2490,12 @@ XrlPimNode::fea_client_send_protocol_message_cb(const
XrlError& xrl_error)
//
XLOG_ERROR("Failed to send a protocol message: %s",
xrl_error.str().c_str());
- pop_xrl_task();
- send_xrl_task();
break;
}
+
+ // In all cases, do the next task.
+ pop_xrl_task();
+ send_xrl_task();
}
//
@@ -3646,8 +3638,8 @@ XrlPimNode::mld6igmp_client_0_1_delete_membership4(
if (PimNode::delete_membership(vif_index, IPvX(source), IPvX(group))
!= XORP_OK) {
- error_msg = c_format("Failed to delete membership for (%s, %s)",
- cstring(source), cstring(group));
+ error_msg = c_format("Failed to delete membership for (src: %s, group:
%s vif_idx: %i)",
+ cstring(source), cstring(group), vif_index);
return XrlCmdError::COMMAND_FAILED(error_msg);
}
_______________________________________________
Xorp-hackers mailing list
[email protected]
http://mailman.ICSI.Berkeley.EDU/mailman/listinfo/xorp-hackers