zebra: ipv6 multipath support

This patch enables support for multipath for IPV6. The nexthop information
from the protocols have ifindices and nexthop addresses in two different
structures. This patch combines them to ensure that the correct APIs can
be called. Also, given that IPV6 Linux implementation does not support the
rta_XXX APIs for multipath, the communication with the kernel is in terms
of a single nh/ifindex pair.

Signed-off-by: Ayan Banerjee <ayan@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
diff --git a/zebra/rib.h b/zebra/rib.h
index 1dacc7f..677e395 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -449,6 +449,9 @@
 #define ZEBRA_RIB_NOTFOUND 3
 
 extern struct nexthop *nexthop_ipv6_add (struct rib *, struct in6_addr *);
+extern struct nexthop *nexthop_ipv6_ifindex_add (struct rib *,
+                                                 struct in6_addr *,
+                                                 ifindex_t);
 
 extern struct zebra_vrf *zebra_vrf_alloc (vrf_id_t);
 extern struct route_table *zebra_vrf_table (afi_t, safi_t, vrf_id_t);
@@ -515,6 +518,9 @@
 		 vrf_id_t vrf_id);
 
 extern int
+rib_add_ipv6_multipath (struct prefix_ipv6 *, struct rib *, safi_t);
+
+extern int
 static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate,
 		    const char *ifname, u_char distance, vrf_id_t vrf_id);
 
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 2e15f99..eec2976 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -289,7 +289,7 @@
   return nexthop;
 }
 
-static struct nexthop *
+struct nexthop *
 nexthop_ipv6_ifindex_add (struct rib *rib, struct in6_addr *ipv6,
 			  ifindex_t ifindex)
 {
@@ -2669,6 +2669,86 @@
   return 0;
 }
 
+int
+rib_add_ipv6_multipath (struct prefix_ipv6 *p, struct rib *rib, safi_t safi)
+{
+  struct route_table *table;
+  struct route_node *rn;
+  struct rib *same = NULL;
+  struct nexthop *nexthop;
+  int ret = 0;
+
+  if (!rib)
+    return 0;			/* why are we getting called with NULL rib */
+
+  /* Lookup table.  */
+  table = zebra_vrf_table (AFI_IP6, safi, rib->vrf_id);
+
+  if (! table)
+    return 0;
+
+  /* Make sure mask is applied. */
+  apply_mask_ipv6 (p);
+
+  /* Set default distance by route type. */
+  if (rib->distance == 0)
+    {
+      rib->distance = route_info[rib->type].distance;
+
+      /* iBGP distance is 200. */
+      if (rib->type == ZEBRA_ROUTE_BGP
+	  && CHECK_FLAG (rib->flags, ZEBRA_FLAG_IBGP))
+	rib->distance = 200;
+    }
+
+  /* Lookup route node.*/
+  rn = route_node_get (table, (struct prefix *) p);
+
+  /* If same type of route are installed, treat it as a implicit
+     withdraw. */
+  RNODE_FOREACH_RIB (rn, same) {
+     if (CHECK_FLAG (same->status, RIB_ENTRY_REMOVED)) {
+       continue;
+     }
+     if (same->type != rib->type) {
+       continue;
+     }
+
+     if (same->table != rib->table) {
+       continue;
+     }
+     if (same->type != ZEBRA_ROUTE_CONNECT) {
+       break;
+     }
+  }
+
+  /* If this route is kernel route, set FIB flag to the route. */
+  if (rib->type == ZEBRA_ROUTE_KERNEL || rib->type == ZEBRA_ROUTE_CONNECT) {
+    for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next) {
+      SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+    }
+  }
+
+  /* Link new rib to node.*/
+  rib_addnode (rn, rib);
+  ret = 1;
+  /* Free implicit route.*/
+  if (same)
+  {
+    if (IS_ZEBRA_DEBUG_RIB)
+    {
+      zlog_debug ("%s: calling rib_delnode (%p, %p) on existing RIB entry",
+        __func__, rn, same);
+      rib_dump ((struct prefix *)p, same);
+    }
+    rib_delnode (rn, same);
+    ret = -1;
+  }
+
+  route_unlock_node (rn);
+  return ret;
+}
+
 /* XXX factor with rib_delete_ipv6 */
 int
 rib_delete_ipv6 (int type, int flags, struct prefix_ipv6 *p,
diff --git a/zebra/zserv.c b/zebra/zserv.c
index d4c5db9..86f141b 100644
--- a/zebra/zserv.c
+++ b/zebra/zserv.c
@@ -1080,34 +1080,50 @@
 {
   int i;
   struct stream *s;
-  struct zapi_ipv6 api;
   struct in6_addr nexthop;
-  unsigned long ifindex;
+  struct rib *rib;
+  u_char message;
+  u_char gateway_num;
+  u_char nexthop_type;
   struct prefix_ipv6 p;
-  
+  safi_t safi;
+  static struct in6_addr nexthops[MULTIPATH_NUM];
+  static unsigned int ifindices[MULTIPATH_NUM];
+
+  /* Get input stream.  */
   s = client->ibuf;
-  ifindex = 0;
+
   memset (&nexthop, 0, sizeof (struct in6_addr));
 
-  /* Type, flags, message. */
-  api.type = stream_getc (s);
-  api.flags = stream_getc (s);
-  api.message = stream_getc (s);
-  api.safi = stream_getw (s);
+  /* Allocate new rib. */
+  rib = XCALLOC (MTYPE_RIB, sizeof (struct rib));
 
-  /* IPv4 prefix. */
+  /* Type, flags, message. */
+  rib->type = stream_getc (s);
+  rib->flags = stream_getc (s);
+  message = stream_getc (s);
+  safi = stream_getw (s);
+  rib->uptime = time (NULL);
+
+  /* IPv6 prefix. */
   memset (&p, 0, sizeof (struct prefix_ipv6));
   p.family = AF_INET6;
   p.prefixlen = stream_getc (s);
   stream_get (&p.prefix, s, PSIZE (p.prefixlen));
 
-  /* Nexthop, ifindex, distance, metric. */
-  if (CHECK_FLAG (api.message, ZAPI_MESSAGE_NEXTHOP))
+  /* We need to give nh-addr, nh-ifindex with the same next-hop object
+   * to the rib to ensure that IPv6 multipathing works; need to coalesce
+   * these. Clients should send the same number of paired set of
+   * next-hop-addr/next-hop-ifindices. */
+  if (CHECK_FLAG (message, ZAPI_MESSAGE_NEXTHOP))
     {
-      u_char nexthop_type;
+      int nh_count = 0;
+      int if_count = 0;
+      int max_nh_if = 0;
+      unsigned int ifindex;
 
-      api.nexthop_num = stream_getc (s);
-      for (i = 0; i < api.nexthop_num; i++)
+      gateway_num = stream_getc (s);
+      for (i = 0; i < gateway_num; i++)
 	{
 	  nexthop_type = stream_getc (s);
 
@@ -1115,37 +1131,51 @@
 	    {
 	    case ZEBRA_NEXTHOP_IPV6:
 	      stream_get (&nexthop, s, 16);
+              if (nh_count < MULTIPATH_NUM) {
+	        nexthops[nh_count++] = nexthop;
+              }
 	      break;
 	    case ZEBRA_NEXTHOP_IFINDEX:
 	      ifindex = stream_getl (s);
+              if (if_count < MULTIPATH_NUM) {
+	        ifindices[if_count++] = ifindex;
+              }
 	      break;
 	    }
 	}
+
+      max_nh_if = (nh_count > if_count) ? nh_count : if_count;
+      for (i = 0; i < max_nh_if; i++)
+        {
+	  if ((i < nh_count) && !IN6_IS_ADDR_UNSPECIFIED (&nexthops[i]))
+	    {
+	      if ((i < if_count) && ifindices[i])
+		nexthop_ipv6_ifindex_add (rib, &nexthops[i], ifindices[i]);
+	      else
+		nexthop_ipv6_add (rib, &nexthops[i]);
+	    }
+          else
+	    {
+	      if ((i < if_count) && ifindices[i])
+		nexthop_ifindex_add (rib, ifindices[i]);
+	    }
+	}
     }
 
-  if (CHECK_FLAG (api.message, ZAPI_MESSAGE_DISTANCE))
-    api.distance = stream_getc (s);
-  else
-    api.distance = 0;
+  /* Distance. */
+  if (CHECK_FLAG (message, ZAPI_MESSAGE_DISTANCE))
+    rib->distance = stream_getc (s);
 
-  if (CHECK_FLAG (api.message, ZAPI_MESSAGE_METRIC))
-    api.metric = stream_getl (s);
-  else
-    api.metric = 0;
+  /* Metric. */
+  if (CHECK_FLAG (message, ZAPI_MESSAGE_METRIC))
+    rib->metric = stream_getl (s);
 
-  if (CHECK_FLAG (api.message, ZAPI_MESSAGE_MTU))
-    api.mtu = stream_getl (s);
-  else
-    api.mtu = 0;
-    
-  if (IN6_IS_ADDR_UNSPECIFIED (&nexthop))
-    rib_add_ipv6 (api.type, api.flags, &p, NULL, ifindex,
-                  vrf_id, zebrad.rtm_table_default, api.metric,
-                  api.mtu, api.distance, api.safi);
-  else
-    rib_add_ipv6 (api.type, api.flags, &p, &nexthop, ifindex,
-                  vrf_id, zebrad.rtm_table_default, api.metric,
-                  api.mtu, api.distance, api.safi);
+  if (CHECK_FLAG (message, ZAPI_MESSAGE_MTU))
+    rib->mtu = stream_getl (s);
+
+  /* Table */
+  rib->table=zebrad.rtm_table_default;
+  rib_add_ipv6_multipath (&p, rib, safi);
   return 0;
 }