[cmaster-next] [PATCH] lib: Partial Revert of 4ecc09d and modify zclient connect behavior
Renato Westphal
renato at opensourcerouting.org
Mon Dec 12 11:08:57 EST 2016
On Mon, Dec 12, 2016 at 10:59 AM, Donald Sharp
<sharpd at cumulusnetworks.com> wrote:
> Commit 43cc09d has been shown to cause several issues with clients
> connecting.
>
> Partial revert, since I wanted to keep the debug logs added
> for that commit, as well remove the piece of code that
> stops attempting to connect to zebra. If we've failed
> a bunch of times, there is nothing wrong with continuing
> to do so once every 60 seconds. I've debug guarded
> the connect failure for those people running bgp
> without zebra.
>
> Signed-off-by: Donald Sharp <sharpd at cumulusnetworks.com>
> ---
> lib/zclient.c | 28 +++++++++-------------------
> 1 file changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/lib/zclient.c b/lib/zclient.c
> index 894e0d1..440de36 100644
> --- a/lib/zclient.c
> +++ b/lib/zclient.c
> @@ -216,7 +216,9 @@ zclient_socket(void)
> ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
> if (ret < 0)
> {
> - zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
> + if (zclient_debug)
> + zlog_warn ("%s connect failure: %d(%s)", __PRETTY_FUNCTION__,
> + errno, safe_strerror (errno));
> close (sock);
> return -1;
> }
> @@ -252,7 +254,9 @@ zclient_socket_un (const char *path)
> ret = connect (sock, (struct sockaddr *) &addr, len);
> if (ret < 0)
> {
> - zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
> + if (zclient_debug)
> + zlog_warn ("%s connect failure: %d(%s)", __PRETTY_FUNCTION__,
> + errno, safe_strerror (errno));
> close (sock);
> return -1;
> }
> @@ -572,23 +576,11 @@ zclient_start (struct zclient *zclient)
> if (zclient->t_connect)
> return 0;
>
> - /*
> - * If we fail to connect to the socket on initialization,
> - * Let's wait a second and see if we can reconnect.
> - * Cause if we don't connect, we never attempt to
> - * reconnect. On startup if zebra is slow we
> - * can get into this situation.
> - */
> - while (zclient_socket_connect(zclient) < 0 && zclient->fail < 5)
> + if (zclient_socket_connect(zclient) < 0)
> {
> if (zclient_debug)
> zlog_debug ("zclient connection fail");
> zclient->fail++;
> - sleep (1);
> - }
> -
> - if (zclient->sock < 0)
> - {
> zclient_event (ZCLIENT_CONNECT, zclient);
> return -1;
> }
> @@ -1727,11 +1719,9 @@ zclient_event (enum event event, struct zclient *zclient)
> thread_add_event (zclient->master, zclient_connect, zclient, 0);
> break;
> case ZCLIENT_CONNECT:
> - if (zclient->fail >= 10)
> - return;
> if (zclient_debug)
> - zlog_debug ("zclient connect schedule interval is %d",
> - zclient->fail < 3 ? 10 : 60);
> + zlog_debug ("zclient connect failures: %d schedule interval is now %d",
> + zclient->fail, zclient->fail < 3 ? 10 : 60);
> if (! zclient->t_connect)
> zclient->t_connect =
> thread_add_timer (zclient->master, zclient_connect, zclient,
> --
Acked-by: Renato Westphal <renato at opensourcerouting.org>
--
Renato Westphal
More information about the dev
mailing list