zebra: rework recursive route resolution

Change the datastructure for recursive routes. This brings the following
benefits:

By using struct nexthop also to store nexthops obtained by recursive
resolution, we can get rid of quite a bit of code duplication in the fib
management. (rt_netlink, rt_socket, ...)

With the new datastructure we can make use of all available paths when
recursive routes are resolved with multipath routes.

Signed-off-by: Christian Franke <chris@opensourcerouting.org>
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 4dd8551..e39976e 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -202,20 +202,26 @@
   return desc[nh_type];
 }
 
-/* Add nexthop to the end of the list.  */
+/* Add nexthop to the end of a nexthop list.  */
 static void
-nexthop_add (struct rib *rib, struct nexthop *nexthop)
+_nexthop_add (struct nexthop **target, struct nexthop *nexthop)
 {
   struct nexthop *last;
 
-  for (last = rib->nexthop; last && last->next; last = last->next)
+  for (last = *target; last && last->next; last = last->next)
     ;
   if (last)
     last->next = nexthop;
   else
-    rib->nexthop = nexthop;
+    *target = nexthop;
   nexthop->prev = last;
+}
 
+/* Add nexthop to the end of a rib node's nexthop list */
+static void
+nexthop_add (struct rib *rib, struct nexthop *nexthop)
+{
+  _nexthop_add(&rib->nexthop, nexthop);
   rib->nexthop_num++;
 }
 
@@ -232,15 +238,32 @@
   rib->nexthop_num--;
 }
 
+static void nexthops_free(struct nexthop *nexthop);
+
 /* Free nexthop. */
 static void
 nexthop_free (struct nexthop *nexthop)
 {
   if (nexthop->ifname)
     XFREE (0, nexthop->ifname);
+  if (nexthop->resolved)
+    nexthops_free(nexthop->resolved);
   XFREE (MTYPE_NEXTHOP, nexthop);
 }
 
+/* Frees a list of nexthops */
+static void
+nexthops_free (struct nexthop *nexthop)
+{
+  struct nexthop *nh, *next;
+
+  for (nh = nexthop; nh; nh = next)
+    {
+      next = nh->next;
+      nexthop_free (nh);
+    }
+}
+
 struct nexthop *
 nexthop_ifindex_add (struct rib *rib, unsigned int ifindex)
 {
@@ -365,6 +388,24 @@
   return nexthop;
 }
 
+/* This method checks whether a recursive nexthop has at
+ * least one resolved nexthop in the fib.
+ */
+int
+nexthop_has_fib_child(struct nexthop *nexthop)
+{
+  struct nexthop *nh;
+
+  if (! CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
+    return 0;
+
+  for (nh = nexthop->resolved; nh; nh = nh->next)
+    if (CHECK_FLAG (nh->flags, NEXTHOP_FLAG_FIB))
+      return 1;
+
+  return 0;
+}
+
 /* If force flag is not set, do not modify falgs at all for uninstall
    the route from FIB. */
 static int
@@ -375,13 +416,19 @@
   struct route_table *table;
   struct route_node *rn;
   struct rib *match;
+  int resolved;
   struct nexthop *newhop;
+  struct nexthop *resolved_hop;
 
   if (nexthop->type == NEXTHOP_TYPE_IPV4)
     nexthop->ifindex = 0;
 
   if (set)
-    UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
+    {
+      UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
+      nexthops_free(nexthop->resolved);
+      nexthop->resolved = NULL;
+    }
 
   /* Make lookup prefix. */
   memset (&p, 0, sizeof (struct prefix_ipv4));
@@ -436,6 +483,7 @@
 	    }
 	  else if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_INTERNAL))
 	    {
+	      resolved = 0;
 	      for (newhop = match->nexthop; newhop; newhop = newhop->next)
 		if (CHECK_FLAG (newhop->flags, NEXTHOP_FLAG_FIB)
 		    && ! CHECK_FLAG (newhop->flags, NEXTHOP_FLAG_RECURSIVE))
@@ -443,18 +491,25 @@
 		    if (set)
 		      {
 			SET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
-			nexthop->rtype = newhop->type;
+
+			resolved_hop = XCALLOC(MTYPE_NEXTHOP, sizeof (struct nexthop));
+			SET_FLAG (resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
+
+			resolved_hop->type = newhop->type;
 			if (newhop->type == NEXTHOP_TYPE_IPV4 ||
 			    newhop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
-			  nexthop->rgate.ipv4 = newhop->gate.ipv4;
+			  resolved_hop->gate.ipv4 = newhop->gate.ipv4;
+
 			if (newhop->type == NEXTHOP_TYPE_IFINDEX
 			    || newhop->type == NEXTHOP_TYPE_IFNAME
 			    || newhop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
-			  nexthop->rifindex = newhop->ifindex;
+			  resolved_hop->ifindex = newhop->ifindex;
+
+			_nexthop_add(&nexthop->resolved, resolved_hop);
 		      }
-		    return 1;
+		    resolved = 1;
 		  }
-	      return 0;
+	      return resolved;
 	    }
 	  else
 	    {
@@ -476,13 +531,19 @@
   struct route_table *table;
   struct route_node *rn;
   struct rib *match;
+  int resolved;
   struct nexthop *newhop;
+  struct nexthop *resolved_hop;
 
   if (nexthop->type == NEXTHOP_TYPE_IPV6)
     nexthop->ifindex = 0;
 
   if (set)
-    UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
+    {
+      UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
+      nexthops_free(nexthop->resolved);
+      nexthop->resolved = NULL;
+    }
 
   /* Make lookup prefix. */
   memset (&p, 0, sizeof (struct prefix_ipv6));
@@ -538,6 +599,7 @@
 	    }
 	  else if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_INTERNAL))
 	    {
+	      resolved = 0;
 	      for (newhop = match->nexthop; newhop; newhop = newhop->next)
 		if (CHECK_FLAG (newhop->flags, NEXTHOP_FLAG_FIB)
 		    && ! CHECK_FLAG (newhop->flags, NEXTHOP_FLAG_RECURSIVE))
@@ -545,20 +607,27 @@
 		    if (set)
 		      {
 			SET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
-			nexthop->rtype = newhop->type;
+
+			resolved_hop = XCALLOC(MTYPE_NEXTHOP, sizeof (struct nexthop));
+			SET_FLAG (resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
+
+			resolved_hop->type = newhop->type;
 			if (newhop->type == NEXTHOP_TYPE_IPV6
 			    || newhop->type == NEXTHOP_TYPE_IPV6_IFINDEX
 			    || newhop->type == NEXTHOP_TYPE_IPV6_IFNAME)
-			  nexthop->rgate.ipv6 = newhop->gate.ipv6;
+			  resolved_hop->gate.ipv6 = newhop->gate.ipv6;
+
 			if (newhop->type == NEXTHOP_TYPE_IFINDEX
 			    || newhop->type == NEXTHOP_TYPE_IFNAME
 			    || newhop->type == NEXTHOP_TYPE_IPV6_IFINDEX
 			    || newhop->type == NEXTHOP_TYPE_IPV6_IFNAME)
-			  nexthop->rifindex = newhop->ifindex;
+			  resolved_hop->ifindex = newhop->ifindex;
+
+			_nexthop_add(&nexthop->resolved, resolved_hop);
 		      }
-		    return 1;
+		    resolved = 1;
 		  }
-	      return 0;
+	      return resolved;
 	    }
 	  else
 	    {
@@ -577,7 +646,8 @@
   struct route_table *table;
   struct route_node *rn;
   struct rib *match;
-  struct nexthop *newhop;
+  struct nexthop *newhop, *tnewhop;
+  int recursing;
 
   /* Lookup table.  */
   table = vrf_table (AFI_IP, SAFI_UNICAST, 0);
@@ -622,7 +692,7 @@
 	    return match;
 	  else
 	    {
-	      for (newhop = match->nexthop; newhop; newhop = newhop->next)
+	      for (ALL_NEXTHOPS_RO(match->nexthop, newhop, tnewhop, recursing))
 		if (CHECK_FLAG (newhop->flags, NEXTHOP_FLAG_FIB))
 		  return match;
 	      return NULL;
@@ -638,7 +708,8 @@
   struct route_table *table;
   struct route_node *rn;
   struct rib *match;
-  struct nexthop *nexthop;
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
 
   /* Lookup table.  */
   table = vrf_table (AFI_IP, SAFI_UNICAST, 0);
@@ -668,7 +739,7 @@
   if (match->type == ZEBRA_ROUTE_CONNECT)
     return match;
   
-  for (nexthop = match->nexthop; nexthop; nexthop = nexthop->next)
+  for (ALL_NEXTHOPS_RO(match->nexthop, nexthop, tnexthop, recursing))
     if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
       return match;
 
@@ -693,7 +764,9 @@
   struct route_table *table;
   struct route_node *rn;
   struct rib *match;
-  struct nexthop *nexthop;
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
+  int nexthops_active;
 
   /* Lookup table.  */
   table = vrf_table (AFI_IP, SAFI_UNICAST, 0);
@@ -727,26 +800,25 @@
     return ZEBRA_RIB_FOUND_CONNECTED;
   
   /* Ok, we have a cood candidate, let's check it's nexthop list... */
-  for (nexthop = match->nexthop; nexthop; nexthop = nexthop->next)
+  nexthops_active = 0;
+  for (ALL_NEXTHOPS_RO(match->nexthop, nexthop, tnexthop, recursing))
     if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
-    {
-      /* We are happy with either direct or recursive hexthop */
-      if (nexthop->gate.ipv4.s_addr == sockunion2ip (qgate) ||
-          nexthop->rgate.ipv4.s_addr == sockunion2ip (qgate))
-        return ZEBRA_RIB_FOUND_EXACT;
-      else
       {
+        nexthops_active = 1;
+        if (nexthop->gate.ipv4.s_addr == sockunion2ip (qgate))
+          return ZEBRA_RIB_FOUND_EXACT;
         if (IS_ZEBRA_DEBUG_RIB)
-        {
-          char gate_buf[INET_ADDRSTRLEN], rgate_buf[INET_ADDRSTRLEN], qgate_buf[INET_ADDRSTRLEN];
-          inet_ntop (AF_INET, &nexthop->gate.ipv4.s_addr, gate_buf, INET_ADDRSTRLEN);
-          inet_ntop (AF_INET, &nexthop->rgate.ipv4.s_addr, rgate_buf, INET_ADDRSTRLEN);
-          inet_ntop (AF_INET, &sockunion2ip (qgate), qgate_buf, INET_ADDRSTRLEN);
-          zlog_debug ("%s: qgate == %s, gate == %s, rgate == %s", __func__, qgate_buf, gate_buf, rgate_buf);
-        }
-        return ZEBRA_RIB_FOUND_NOGATE;
+          {
+            char gate_buf[INET_ADDRSTRLEN], qgate_buf[INET_ADDRSTRLEN];
+            inet_ntop (AF_INET, &nexthop->gate.ipv4.s_addr, gate_buf, INET_ADDRSTRLEN);
+            inet_ntop (AF_INET, &sockunion2ip(qgate), qgate_buf, INET_ADDRSTRLEN);
+            zlog_debug ("%s: qgate == %s, %s == %s", __func__,
+                        qgate_buf, recursing ? "rgate" : "gate", gate_buf);
+          }
       }
-    }
+
+  if (nexthops_active)
+    return ZEBRA_RIB_FOUND_NOGATE;
 
   return ZEBRA_RIB_NOTFOUND;
 }
@@ -759,7 +831,8 @@
   struct route_table *table;
   struct route_node *rn;
   struct rib *match;
-  struct nexthop *newhop;
+  struct nexthop *newhop, *tnewhop;
+  int recursing;
 
   /* Lookup table.  */
   table = vrf_table (AFI_IP6, SAFI_UNICAST, 0);
@@ -804,7 +877,7 @@
 	    return match;
 	  else
 	    {
-	      for (newhop = match->nexthop; newhop; newhop = newhop->next)
+	      for (ALL_NEXTHOPS_RO(match->nexthop, newhop, tnewhop, recursing))
 		if (CHECK_FLAG (newhop->flags, NEXTHOP_FLAG_FIB))
 		  return match;
 	      return NULL;
@@ -966,7 +1039,8 @@
 rib_install_kernel (struct route_node *rn, struct rib *rib)
 {
   int ret = 0;
-  struct nexthop *nexthop;
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
 
   /*
    * Make sure we update the FPM any time we send new information to
@@ -988,7 +1062,7 @@
   /* This condition is never met, if we are using rt_socket.c */
   if (ret < 0)
     {
-      for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
+      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
 	UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
     }
 }
@@ -998,7 +1072,8 @@
 rib_uninstall_kernel (struct route_node *rn, struct rib *rib)
 {
   int ret = 0;
-  struct nexthop *nexthop;
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
 
   /*
    * Make sure we update the FPM any time we send new information to
@@ -1018,7 +1093,7 @@
 #endif /* HAVE_IPV6 */
     }
 
-  for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
+  for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
     UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
 
   return ret;
@@ -1114,7 +1189,8 @@
   struct rib *select = NULL;
   struct rib *del = NULL;
   int installed = 0;
-  struct nexthop *nexthop = NULL;
+  struct nexthop *nexthop = NULL, *tnexthop;
+  int recursing;
   char buf[INET6_ADDRSTRLEN];
   
   assert (rn);
@@ -1237,7 +1313,7 @@
              This makes sure the routes are IN the kernel.
            */
 
-          for (nexthop = select->nexthop; nexthop; nexthop = nexthop->next)
+          for (ALL_NEXTHOPS_RO(select->nexthop, nexthop, tnexthop, recursing))
             if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
             {
               installed = 1;
@@ -1626,7 +1702,6 @@
 static void
 rib_unlink (struct route_node *rn, struct rib *rib)
 {
-  struct nexthop *nexthop, *next;
   char buf[INET6_ADDRSTRLEN];
   rib_dest_t *dest;
 
@@ -1652,11 +1727,7 @@
     }
 
   /* free RIB and nexthops */
-  for (nexthop = rib->nexthop; nexthop; nexthop = next)
-    {
-      next = nexthop->next;
-      nexthop_free (nexthop);
-    }
+  nexthops_free(rib->nexthop);
   XFREE (MTYPE_RIB, rib);
 
 }
@@ -1786,11 +1857,12 @@
 
 void rib_dump (const char * func, const struct prefix_ipv4 * p, const struct rib * rib)
 {
-  char straddr1[INET_ADDRSTRLEN], straddr2[INET_ADDRSTRLEN];
-  struct nexthop *nexthop;
+  char straddr[INET_ADDRSTRLEN];
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
 
-  inet_ntop (AF_INET, &p->prefix, straddr1, INET_ADDRSTRLEN);
-  zlog_debug ("%s: dumping RIB entry %p for %s/%d", func, rib, straddr1, p->prefixlen);
+  inet_ntop (AF_INET, &p->prefix, straddr, INET_ADDRSTRLEN);
+  zlog_debug ("%s: dumping RIB entry %p for %s/%d", func, rib, straddr, p->prefixlen);
   zlog_debug
   (
     "%s: refcnt == %lu, uptime == %lu, type == %u, table == %d",
@@ -1817,21 +1889,20 @@
     rib->nexthop_active_num,
     rib->nexthop_fib_num
   );
-  for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
-  {
-    inet_ntop (AF_INET, &nexthop->gate.ipv4.s_addr, straddr1, INET_ADDRSTRLEN);
-    inet_ntop (AF_INET, &nexthop->rgate.ipv4.s_addr, straddr2, INET_ADDRSTRLEN);
-    zlog_debug
-    (
-      "%s: NH %s (%s) with flags %s%s%s",
-      func,
-      straddr1,
-      straddr2,
-      (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE) ? "ACTIVE " : ""),
-      (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB) ? "FIB " : ""),
-      (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE) ? "RECURSIVE" : "")
-    );
-  }
+  for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
+    {
+      inet_ntop (AF_INET, &nexthop->gate.ipv4.s_addr, straddr, INET_ADDRSTRLEN);
+      zlog_debug
+      (
+        "%s: %s %s with flags %s%s%s",
+        func,
+        (recursing ? "  NH" : "NH"),
+        straddr,
+        (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE) ? "ACTIVE " : ""),
+        (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB) ? "FIB " : ""),
+        (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE) ? "RECURSIVE" : "")
+      );
+    }
   zlog_debug ("%s: dump complete", func);
 }
 
@@ -2018,7 +2089,8 @@
   struct rib *rib;
   struct rib *fib = NULL;
   struct rib *same = NULL;
-  struct nexthop *nexthop;
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
   char buf1[INET_ADDRSTRLEN];
   char buf2[INET_ADDRSTRLEN];
 
@@ -2085,16 +2157,23 @@
 	  break;
 	}
       /* Make sure that the route found has the same gateway. */
-      else if (gate == NULL ||
-	       ((nexthop = rib->nexthop) &&
-	        (IPV4_ADDR_SAME (&nexthop->gate.ipv4, gate) ||
-		 IPV4_ADDR_SAME (&nexthop->rgate.ipv4, gate)))) 
+      else
         {
-	  same = rib;
-	  break;
-	}
+          if (gate == NULL)
+            {
+              same = rib;
+              break;
+            }
+          for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
+            if (IPV4_ADDR_SAME (&nexthop->gate.ipv4, gate))
+              {
+                same = rib;
+                break;
+              }
+          if (same)
+            break;
+        }
     }
-
   /* If same type of route can't be found and this message is from
      kernel. */
   if (! same)
@@ -2576,7 +2655,8 @@
   struct rib *rib;
   struct rib *fib = NULL;
   struct rib *same = NULL;
-  struct nexthop *nexthop;
+  struct nexthop *nexthop, *tnexthop;
+  int recursing;
   char buf1[INET6_ADDRSTRLEN];
   char buf2[INET6_ADDRSTRLEN];
 
@@ -2636,14 +2716,22 @@
 	  break;
 	}
       /* Make sure that the route found has the same gateway. */
-      else if (gate == NULL ||
-	       ((nexthop = rib->nexthop) &&
-	        (IPV6_ADDR_SAME (&nexthop->gate.ipv6, gate) ||
-		 IPV6_ADDR_SAME (&nexthop->rgate.ipv6, gate))))
-	{
-	  same = rib;
-	  break;
-	}
+      else
+        {
+          if (gate == NULL)
+            {
+              same = rib;
+              break;
+            }
+          for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
+            if (IPV6_ADDR_SAME (&nexthop->gate.ipv6, gate))
+              {
+                same = rib;
+                break;
+              }
+          if (same)
+            break;
+        }
     }
 
   /* If same type of route can't be found and this message is from