lib: Allow zclient do-over of connect on initial attempt
When a protocol is attempting to connect to the zebra daemon
through it's socket. If the inital attempt fails, give it a
few more attempts before giving up and leaving the daemon in
a bizarre state.
This problem was found by Ashley Penney, and Ashley was of
immense help in debugging and testing the fix for this issue.
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Tested-by: Ashley Penney <apenney@ntoggle.com>
Tested-by: NetDEF CI System <cisystem@netdef.org>
diff --git a/lib/zclient.c b/lib/zclient.c
index 9d50ebc..208ea5c 100644
--- a/lib/zclient.c
+++ b/lib/zclient.c
@@ -181,6 +181,7 @@
ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
if (ret < 0)
{
+ zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
close (sock);
return -1;
}
@@ -216,6 +217,7 @@
ret = connect (sock, (struct sockaddr *) &addr, len);
if (ret < 0)
{
+ zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
close (sock);
return -1;
}
@@ -429,11 +431,23 @@
if (zclient->t_connect)
return 0;
- if (zclient_socket_connect(zclient) < 0)
+ /*
+ * If we fail to connect to the socket on initialization,
+ * Let's wait a second and see if we can reconnect.
+ * Cause if we don't connect, we never attempt to
+ * reconnect. On startup if zebra is slow we
+ * can get into this situation.
+ */
+ while (zclient_socket_connect(zclient) < 0 && zclient->fail < 5)
{
if (zclient_debug)
zlog_debug ("zclient connection fail");
zclient->fail++;
+ sleep (1);
+ }
+
+ if (zclient->sock < 0)
+ {
zclient_event (ZCLIENT_CONNECT, zclient);
return -1;
}