[cmaster-next] [PATCH] lib: Partial Revert of 4ecc09d and modify zclient connect behavior

Donald Sharp sharpd at cumulusnetworks.com
Fri Dec 9 08:55:00 EST 2016


Commit 43cc09d has been shown to cause several issues with clients
connecting.

Partial revert, since I wanted to keep the debug logs added
for that commit, as well remove the piece of code that
stops attempting to connect to zebra.  If we've failed
a bunch of times, there is nothing wrong with continuing
to do so once every 60 seconds.  I've debug guarded
the connect failure for those people running bgp
without zebra.

Signed-off-by: Donald Sharp <sharpd at cumulusnetworks.com>
---
 lib/zclient.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/lib/zclient.c b/lib/zclient.c
index 894e0d1..c8a8d58 100644
--- a/lib/zclient.c
+++ b/lib/zclient.c
@@ -216,7 +216,7 @@ zclient_socket(void)
   ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
   if (ret < 0)
     {
-      zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
+      zlog_warn ("%s connect failure: %d(%s)", __PRETTY_FUNCTION__, errno, safe_strerror);
       close (sock);
       return -1;
     }
@@ -252,7 +252,8 @@ zclient_socket_un (const char *path)
   ret = connect (sock, (struct sockaddr *) &addr, len);
   if (ret < 0)
     {
-      zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
+      if (zclient_debug)
+        zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
       close (sock);
       return -1;
     }
@@ -572,23 +573,11 @@ zclient_start (struct zclient *zclient)
   if (zclient->t_connect)
     return 0;
 
-  /*
-   * If we fail to connect to the socket on initialization,
-   * Let's wait a second and see if we can reconnect.
-   * Cause if we don't connect, we never attempt to
-   * reconnect.  On startup if zebra is slow we
-   * can get into this situation.
-   */
-  while (zclient_socket_connect(zclient) < 0 && zclient->fail < 5)
+  if (zclient_socket_connect(zclient) < 0)
     {
       if (zclient_debug)
 	zlog_debug ("zclient connection fail");
       zclient->fail++;
-      sleep (1);
-    }
-
-  if (zclient->sock < 0)
-    {
       zclient_event (ZCLIENT_CONNECT, zclient);
       return -1;
     }
@@ -1727,11 +1716,9 @@ zclient_event (enum event event, struct zclient *zclient)
 	  thread_add_event (zclient->master, zclient_connect, zclient, 0);
       break;
     case ZCLIENT_CONNECT:
-      if (zclient->fail >= 10)
-	return;
       if (zclient_debug)
-	zlog_debug ("zclient connect schedule interval is %d", 
-		   zclient->fail < 3 ? 10 : 60);
+	zlog_debug ("zclient connect failures: %d schedule interval is now %d",
+		    zclient->fail, zclient->fail < 3 ? 10 : 60);
       if (! zclient->t_connect)
 	zclient->t_connect = 
 	  thread_add_timer (zclient->master, zclient_connect, zclient,
-- 
2.5.5





More information about the dev mailing list