[cmaster-next] [PATCH] lib: Partial Revert of 4ecc09d and modify zclient connect behavior

Renato Westphal renato at opensourcerouting.org
Mon Dec 12 11:08:57 EST 2016


On Mon, Dec 12, 2016 at 10:59 AM, Donald Sharp
<sharpd at cumulusnetworks.com> wrote:
> Commit 43cc09d has been shown to cause several issues with clients
> connecting.
>
> Partial revert, since I wanted to keep the debug logs added
> for that commit, as well remove the piece of code that
> stops attempting to connect to zebra.  If we've failed
> a bunch of times, there is nothing wrong with continuing
> to do so once every 60 seconds.  I've debug guarded
> the connect failure for those people running bgp
> without zebra.
>
> Signed-off-by: Donald Sharp <sharpd at cumulusnetworks.com>
> ---
>  lib/zclient.c | 28 +++++++++-------------------
>  1 file changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/lib/zclient.c b/lib/zclient.c
> index 894e0d1..440de36 100644
> --- a/lib/zclient.c
> +++ b/lib/zclient.c
> @@ -216,7 +216,9 @@ zclient_socket(void)
>    ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
>    if (ret < 0)
>      {
> -      zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
> +      if (zclient_debug)
> +       zlog_warn ("%s connect failure: %d(%s)", __PRETTY_FUNCTION__,
> +                  errno, safe_strerror (errno));
>        close (sock);
>        return -1;
>      }
> @@ -252,7 +254,9 @@ zclient_socket_un (const char *path)
>    ret = connect (sock, (struct sockaddr *) &addr, len);
>    if (ret < 0)
>      {
> -      zlog_warn ("%s connect failure: %d", __PRETTY_FUNCTION__, errno);
> +      if (zclient_debug)
> +        zlog_warn ("%s connect failure: %d(%s)", __PRETTY_FUNCTION__,
> +                  errno, safe_strerror (errno));
>        close (sock);
>        return -1;
>      }
> @@ -572,23 +576,11 @@ zclient_start (struct zclient *zclient)
>    if (zclient->t_connect)
>      return 0;
>
> -  /*
> -   * If we fail to connect to the socket on initialization,
> -   * Let's wait a second and see if we can reconnect.
> -   * Cause if we don't connect, we never attempt to
> -   * reconnect.  On startup if zebra is slow we
> -   * can get into this situation.
> -   */
> -  while (zclient_socket_connect(zclient) < 0 && zclient->fail < 5)
> +  if (zclient_socket_connect(zclient) < 0)
>      {
>        if (zclient_debug)
>         zlog_debug ("zclient connection fail");
>        zclient->fail++;
> -      sleep (1);
> -    }
> -
> -  if (zclient->sock < 0)
> -    {
>        zclient_event (ZCLIENT_CONNECT, zclient);
>        return -1;
>      }
> @@ -1727,11 +1719,9 @@ zclient_event (enum event event, struct zclient *zclient)
>           thread_add_event (zclient->master, zclient_connect, zclient, 0);
>        break;
>      case ZCLIENT_CONNECT:
> -      if (zclient->fail >= 10)
> -       return;
>        if (zclient_debug)
> -       zlog_debug ("zclient connect schedule interval is %d",
> -                  zclient->fail < 3 ? 10 : 60);
> +       zlog_debug ("zclient connect failures: %d schedule interval is now %d",
> +                   zclient->fail, zclient->fail < 3 ? 10 : 60);
>        if (! zclient->t_connect)
>         zclient->t_connect =
>           thread_add_timer (zclient->master, zclient_connect, zclient,
> --

Acked-by: Renato Westphal <renato at opensourcerouting.org>

-- 
Renato Westphal




More information about the dev mailing list