isisd: make send_lsp more robust

Signed-off-by: Christian Franke <chris@opensourcerouting.org>
Tested-by: NetDEF CI System <cisystem@netdef.org>
diff --git a/isisd/isis_bpf.c b/isisd/isis_bpf.c
index f6176ef..fd65608 100644
--- a/isisd/isis_bpf.c
+++ b/isisd/isis_bpf.c
@@ -29,6 +29,7 @@
 #include <net/bpf.h>
 
 #include "log.h"
+#include "network.h"
 #include "stream.h"
 #include "if.h"
 
@@ -339,8 +340,14 @@
 
   /* now we can send this */
   written = write (circuit->fd, sock_buff, buflen);
-
-  return ISIS_OK;
+  if (written < 0)
+    {
+      zlog_warn("IS-IS bpf: could not transmit packet on %s: %s",
+                circuit->interface->name, safe_strerror(errno));
+      if (ERRNO_IO_RETRY(errno))
+        return ISIS_WARNING;
+      return ISIS_ERROR;
+    }
 }
 
 int
diff --git a/isisd/isis_dlpi.c b/isisd/isis_dlpi.c
index b583d10..7c7e090 100644
--- a/isisd/isis_dlpi.c
+++ b/isisd/isis_dlpi.c
@@ -33,6 +33,7 @@
 #include <sys/pfmod.h>
 
 #include "log.h"
+#include "network.h"
 #include "stream.h"
 #include "if.h"
 
@@ -90,13 +91,14 @@
  * interfaces plus the (optional; not needed) Solaris packet filter module.
  */
 
-static void
+static int
 dlpisend (int fd, const void *cbuf, size_t cbuflen,
   const void *dbuf, size_t dbuflen, int flags)
 {
   const struct strbuf *ctlptr = NULL;
   const struct strbuf *dataptr = NULL;
   struct strbuf ctlbuf, databuf;
+  int rv;
 
   if (cbuf != NULL)
     {
@@ -115,8 +117,16 @@
     }
 
   /* We assume this doesn't happen often and isn't operationally significant */
-  if (putmsg (fd, ctlptr, dataptr, flags) == -1)
-    zlog_debug ("%s: putmsg: %s", __func__, safe_strerror (errno));
+  rv = putmsg(fd, ctlptr, dataptr, flags);
+  if (rv == -1 && dbuf == NULL)
+    {
+      /*
+       * For actual PDU transmission - recognizable buf dbuf != NULL,
+       * the error is passed upwards and should not be printed here.
+       */
+      zlog_debug ("%s: putmsg: %s", __func__, safe_strerror (errno));
+    }
+  return rv;
 }
 
 static ssize_t
@@ -587,6 +597,7 @@
   char *dstaddr;
   u_short *dstsap;
   int buflen;
+  int rv;
 
   buflen = stream_get_endp (circuit->snd_stream) + LLC_LEN;
   if ((size_t)buflen > sizeof (sock_buff))
@@ -626,8 +637,17 @@
   sock_buff[2] = 0x03;
   memcpy (sock_buff + LLC_LEN, circuit->snd_stream->data,
 	  stream_get_endp (circuit->snd_stream));
-  dlpisend (circuit->fd, dur, sizeof (*dur) + dur->dl_dest_addr_length,
-	    sock_buff, buflen, 0);
+  rv = dlpisend(circuit->fd, dur, sizeof (*dur) + dur->dl_dest_addr_length,
+                sock_buff, buflen, 0);
+  if (rv < 0)
+    {
+      zlog_warn("IS-IS dlpi: could not transmit packet on %s: %s",
+                circuit->interface->name, safe_strerror(errno));
+      if (ERRNO_IO_RETRY(errno))
+        return ISIS_WARNING;
+      return ISIS_ERROR;
+    }
+
   return ISIS_OK;
 }
 
diff --git a/isisd/isis_pdu.c b/isisd/isis_pdu.c
index 4c420f3..0401e44 100644
--- a/isisd/isis_pdu.c
+++ b/isisd/isis_pdu.c
@@ -3087,15 +3087,14 @@
   struct isis_circuit *circuit;
   struct isis_lsp *lsp;
   struct listnode *node;
+  int clear_srm = 1;
   int retval = ISIS_OK;
 
   circuit = THREAD_ARG (thread);
   assert (circuit);
 
-  if (circuit->state != C_STATE_UP || circuit->is_passive == 1)
-  {
-    return retval;
-  }
+  if (!circuit->lsp_queue)
+    return ISIS_OK;
 
   node = listhead (circuit->lsp_queue);
 
@@ -3105,28 +3104,56 @@
    * thread gets a chance to run.
    */
   if (!node)
-    {
-      return retval;
-    }
+    return ISIS_OK;
 
+  /*
+   * Delete LSP from lsp_queue. If it's still in queue, it is assumed
+   * as 'transmit pending', but send_lsp may never be called again.
+   * Retry will happen because SRM flag will not be cleared.
+   */
   lsp = listgetdata(node);
+  list_delete_node (circuit->lsp_queue, node);
+
+  /* Set the last-cleared time if the queue is empty. */
+  /* TODO: Is is possible that new lsps keep being added to the queue
+   * that the queue is never empty? */
+  if (list_isempty (circuit->lsp_queue))
+    circuit->lsp_queue_last_cleared = time (NULL);
+
+  if (circuit->state != C_STATE_UP || circuit->is_passive == 1)
+    goto out;
 
   /*
    * Do not send if levels do not match
    */
   if (!(lsp->level & circuit->is_type))
-    {
-      list_delete_node (circuit->lsp_queue, node);
-      return retval;
-    }
+    goto out;
 
   /*
    * Do not send if we do not have adjacencies in state up on the circuit
    */
   if (circuit->upadjcount[lsp->level - 1] == 0)
+    goto out;
+
+  /* stream_copy will assert and stop program execution if LSP is larger than
+   * the circuit's MTU. So handle and log this case here. */
+  if (stream_get_endp(lsp->pdu) > stream_get_size(circuit->snd_stream))
     {
-      list_delete_node (circuit->lsp_queue, node);
-      return retval;
+      zlog_err("ISIS-Upd (%s): Can't send L%d LSP %s, seq 0x%08x,"
+               " cksum 0x%04x, lifetime %us on %s. LSP Size is %zu"
+               " while interface stream size is %zu.",
+               circuit->area->area_tag, lsp->level,
+               rawlspid_print(lsp->lsp_header->lsp_id),
+               ntohl(lsp->lsp_header->seq_num),
+               ntohs(lsp->lsp_header->checksum),
+               ntohs(lsp->lsp_header->rem_lifetime),
+               circuit->interface->name,
+               stream_get_endp(lsp->pdu),
+               stream_get_size(circuit->snd_stream));
+      if (isis->debugs & DEBUG_PACKET_DUMP)
+        zlog_dump_data(STREAM_DATA(lsp->pdu), stream_get_endp(lsp->pdu));
+      retval = ISIS_ERROR;
+      goto out;
     }
 
   /* copy our lsp to the send buffer */
@@ -3147,32 +3174,29 @@
                         stream_get_endp (circuit->snd_stream));
     }
 
+  clear_srm = 0;
   retval = circuit->tx (circuit, lsp->level);
   if (retval != ISIS_OK)
     {
-      zlog_err ("ISIS-Upd (%s): Send L%d LSP on %s failed",
+      zlog_err ("ISIS-Upd (%s): Send L%d LSP on %s failed %s",
                 circuit->area->area_tag, lsp->level,
-                circuit->interface->name);
-      return retval;
+                circuit->interface->name,
+                (retval == ISIS_WARNING) ? "temporarily" : "permanently");
     }
 
-  /*
-   * If the sending succeeded, we can del the lsp from circuits
-   * lsp_queue
-   */
-  list_delete_node (circuit->lsp_queue, node);
-
-  /* Set the last-cleared time if the queue is empty. */
-  /* TODO: Is is possible that new lsps keep being added to the queue
-   * that the queue is never empty? */
-  if (list_isempty (circuit->lsp_queue))
-    circuit->lsp_queue_last_cleared = time (NULL);
-
-  /*
-   * On broadcast circuits also the SRMflag can be cleared
-   */
-  if (circuit->circ_type == CIRCUIT_T_BROADCAST)
-    ISIS_CLEAR_FLAG (lsp->SRMflags, circuit);
+out:
+  if (clear_srm
+      || (retval == ISIS_OK && circuit->circ_type == CIRCUIT_T_BROADCAST)
+      || (retval != ISIS_OK && retval != ISIS_WARNING))
+    {
+      /* SRM flag will trigger retransmission. We will not retransmit if we
+       * encountered a fatal error.
+       * On success, they should only be cleared if it's a broadcast circuit.
+       * On a P2P circuit, we will wait for the ack from the neighbor to clear
+       * the fag.
+       */
+      ISIS_CLEAR_FLAG (lsp->SRMflags, circuit);
+    }
 
   return retval;
 }
diff --git a/isisd/isis_pfpacket.c b/isisd/isis_pfpacket.c
index a9ecd40..2427047 100644
--- a/isisd/isis_pfpacket.c
+++ b/isisd/isis_pfpacket.c
@@ -26,6 +26,7 @@
 #include <netpacket/packet.h>
 
 #include "log.h"
+#include "network.h"
 #include "stream.h"
 #include "if.h"
 
@@ -367,8 +368,14 @@
   iov[1].iov_base = circuit->snd_stream->data;
   iov[1].iov_len = stream_get_endp (circuit->snd_stream);
 
-  sendmsg (circuit->fd, &msg, 0);
-
+  if (sendmsg(circuit->fd, &msg, 0) < 0)
+    {
+      zlog_warn("IS-IS pfpacket: could not transmit packet on %s: %s",
+                circuit->interface->name, safe_strerror(errno));
+      if (ERRNO_IO_RETRY(errno))
+        return ISIS_WARNING;
+      return ISIS_ERROR;
+    }
   return ISIS_OK;
 }
 
@@ -376,6 +383,7 @@
 isis_send_pdu_p2p (struct isis_circuit *circuit, int level)
 {
   struct sockaddr_ll sa;
+  ssize_t rv;
 
   stream_set_getp (circuit->snd_stream, 0);
   memset (&sa, 0, sizeof (struct sockaddr_ll));
@@ -391,11 +399,18 @@
 
   /* lets try correcting the protocol */
   sa.sll_protocol = htons (0x00FE);
-  sendto (circuit->fd, circuit->snd_stream->data,
-		    stream_get_endp (circuit->snd_stream), 0, 
-		    (struct sockaddr *) &sa,
-		    sizeof (struct sockaddr_ll));
-
+  rv = sendto(circuit->fd, circuit->snd_stream->data,
+	      stream_get_endp (circuit->snd_stream), 0,
+	      (struct sockaddr *) &sa,
+	      sizeof (struct sockaddr_ll));
+  if (rv < 0)
+    {
+      zlog_warn("IS-IS pfpacket: could not transmit packet on %s: %s",
+                circuit->interface->name, safe_strerror(errno));
+      if (ERRNO_IO_RETRY(errno))
+        return ISIS_WARNING;
+      return ISIS_ERROR;
+    }
   return ISIS_OK;
 }