zebra: add structure to hold per-prefix state in RIB

Add the rib_dest_t structure to hold per-prefix state in the routing
information base. This gives us an appropriate place to maintain the
queueing state of a route_node. Queuing state was previously being
stored on the first rib in the list of ribs hanging off the
route_node.

  * zebra/rib.h

    - Add new structure rib_dest_t.

    - Remove the rn_status field from 'struct rib', it is no longer
      required.

    - Add macros (RNODE_FOREACH_RIB, RNODE_FOREACH_RIB_SAFE) for
      walking all 'struct ribs' corresponding to a route_node. These
      hide the fact that there is an intermediate rib_dest_t
      structure.

    - Add a few utility inlines to go between a rib_dest_t and
      associated structures.

  * zebra/zebra_rib.c

    - rib_link()/rib_unlink()

      Tweak for new behavior, where the 'info' pointer of a route_node
      points to a rib_dest_t. The list of ribs for a prefix now hangs
      off of the dest.

      Change the way we ref count route_nodes. We now hold a single
      ref count on a route_node if there is a corresponding
      rib_dest_t.

    - Maintain the queuing state of a route_node on the flags field of
      the rib_dest_t.

    - Add the rib_gc_dest() function, which deletes a rib_dest_t if it
      is no longer required. A rib_dest_t can be deleted iff there are
      no struct ribs hanging off of it.

    - Call rib_gc_dest() any time we unlink a rib from the
      rib_dest_t. Currently we only need to call it once, just before
      we return from rib_process().

  * zebra/{redistribute,zebra_rib,zebra_snmp,zebra_vty}.c

    Use new macros to walk over route_node ribs.

  * lib/memtypes.c

    Add memory type for rib_dest_t.

Signed-off-by: Avneesh Sachdev <avneesh@opensourcerouting.org>
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 0e29e61..f0d5c9d 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -351,7 +351,7 @@
 	return 0;
 
       /* Pick up selected route. */
-      for (match = rn->info; match; match = match->next)
+      RNODE_FOREACH_RIB (rn, match)
 	{
 	  if (CHECK_FLAG (match->status, RIB_ENTRY_REMOVED))
 	    continue;
@@ -452,7 +452,7 @@
 	return 0;
 
       /* Pick up selected route. */
-      for (match = rn->info; match; match = match->next)
+      RNODE_FOREACH_RIB (rn, match)
 	{
 	  if (CHECK_FLAG (match->status, RIB_ENTRY_REMOVED))
 	    continue;
@@ -543,7 +543,7 @@
       route_unlock_node (rn);
       
       /* Pick up selected route. */
-      for (match = rn->info; match; match = match->next)
+      RNODE_FOREACH_RIB (rn, match)
 	{
 	  if (CHECK_FLAG (match->status, RIB_ENTRY_REMOVED))
 	    continue;
@@ -601,7 +601,7 @@
   /* Unlock node. */
   route_unlock_node (rn);
 
-  for (match = rn->info; match; match = match->next)
+  RNODE_FOREACH_RIB (rn, match)
     {
       if (CHECK_FLAG (match->status, RIB_ENTRY_REMOVED))
 	continue;
@@ -658,7 +658,7 @@
   route_unlock_node (rn);
 
   /* Find out if a "selected" RR for the discovered RIB entry exists ever. */
-  for (match = rn->info; match; match = match->next)
+  RNODE_FOREACH_RIB (rn, match)
     {
       if (CHECK_FLAG (match->status, RIB_ENTRY_REMOVED))
 	continue;
@@ -725,7 +725,7 @@
       route_unlock_node (rn);
       
       /* Pick up selected route. */
-      for (match = rn->info; match; match = match->next)
+      RNODE_FOREACH_RIB (rn, match)
 	{
 	  if (CHECK_FLAG (match->status, RIB_ENTRY_REMOVED))
 	    continue;
@@ -975,6 +975,61 @@
 
 static void rib_unlink (struct route_node *, struct rib *);
 
+/*
+ * rib_can_delete_dest
+ *
+ * Returns TRUE if the given dest can be deleted from the table.
+ */
+static int
+rib_can_delete_dest (rib_dest_t *dest)
+{
+  if (dest->routes)
+    {
+      return 0;
+    }
+
+  return 1;
+}
+
+/*
+ * rib_gc_dest
+ *
+ * Garbage collect the rib dest corresponding to the given route node
+ * if appropriate.
+ *
+ * Returns TRUE if the dest was deleted, FALSE otherwise.
+ */
+int
+rib_gc_dest (struct route_node *rn)
+{
+  rib_dest_t *dest;
+  char buf[INET6_ADDRSTRLEN];
+
+  dest = rib_dest_from_rnode (rn);
+  if (!dest)
+    return 0;
+
+  if (!rib_can_delete_dest (dest))
+    return 0;
+
+  if (IS_ZEBRA_DEBUG_RIB)
+    {
+      inet_ntop (rn->p.family, &rn->p.u.prefix, buf, sizeof (buf));
+      zlog_debug ("%s: %s/%d: removing dest from table", __func__,
+		  buf, rn->p.prefixlen);
+    }
+
+  dest->rnode = NULL;
+  XFREE (MTYPE_RIB_DEST, dest);
+  rn->info = NULL;
+
+  /*
+   * Release the one reference that we keep on the route node.
+   */
+  route_unlock_node (rn);
+  return 1;
+}
+
 /* Core function for processing routing information base. */
 static void
 rib_process (struct route_node *rn)
@@ -993,13 +1048,8 @@
   if (IS_ZEBRA_DEBUG_RIB || IS_ZEBRA_DEBUG_RIB_Q)
     inet_ntop (rn->p.family, &rn->p.u.prefix, buf, INET6_ADDRSTRLEN);
 
-  for (rib = rn->info; rib; rib = next)
+  RNODE_FOREACH_RIB_SAFE (rn, rib, next)
     {
-      /* The next pointer is saved, because current pointer
-       * may be passed to rib_unlink() in the middle of iteration.
-       */
-      next = rib->next;
-      
       /* Currently installed rib. */
       if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
         {
@@ -1017,7 +1067,7 @@
               if (IS_ZEBRA_DEBUG_RIB)
                 zlog_debug ("%s: %s/%d: rn %p, removing rib %p", __func__,
                   buf, rn->p.prefixlen, rn, rib);
-                rib_unlink (rn, rib);
+	      rib_unlink (rn, rib);
             }
           else
             del = rib;
@@ -1074,7 +1124,7 @@
       /* metric tie-breaks equal distance */
       if (rib->metric <= select->metric)
         select = rib;
-    } /* for (rib = rn->info; rib; rib = next) */
+    } /* RNODE_FOREACH_RIB_SAFE */
 
   /* After the cycle is finished, the following pointers will be set:
    * select --- the winner RIB entry, if any was found, otherwise NULL
@@ -1171,6 +1221,11 @@
 end:
   if (IS_ZEBRA_DEBUG_RIB_Q)
     zlog_debug ("%s: %s/%d: rn %p dequeued", __func__, buf, rn->p.prefixlen, rn);
+
+  /*
+   * Check if the dest can be deleted now.
+   */
+  rib_gc_dest (rn);
 }
 
 /* Take a list of route_node structs and return 1, if there was a record
@@ -1189,8 +1244,9 @@
   rnode = listgetdata (lnode);
   rib_process (rnode);
 
-  if (rnode->info) /* The first RIB record is holding the flags bitmask. */
-    UNSET_FLAG (((struct rib *)rnode->info)->rn_status, RIB_ROUTE_QUEUED(qindex));
+  if (rnode->info)
+    UNSET_FLAG (rib_dest_from_rnode (rnode)->flags, RIB_ROUTE_QUEUED (qindex));
+
 #if 0
   else
     {
@@ -1223,7 +1279,9 @@
   return mq->size ? WQ_REQUEUE : WQ_SUCCESS;
 }
 
-/* Map from rib types to queue type (priority) in meta queue */
+/*
+ * Map from rib types to queue type (priority) in meta queue
+ */
 static const u_char meta_queue_map[ZEBRA_ROUTE_MAX] = {
   [ZEBRA_ROUTE_SYSTEM]  = 4,
   [ZEBRA_ROUTE_KERNEL]  = 0,
@@ -1251,12 +1309,13 @@
   if (IS_ZEBRA_DEBUG_RIB_Q)
     inet_ntop (rn->p.family, &rn->p.u.prefix, buf, INET6_ADDRSTRLEN);
 
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       u_char qindex = meta_queue_map[rib->type];
 
       /* Invariant: at this point we always have rn->info set. */
-      if (CHECK_FLAG (((struct rib *)rn->info)->rn_status, RIB_ROUTE_QUEUED(qindex)))
+      if (CHECK_FLAG (rib_dest_from_rnode (rn)->flags,
+		      RIB_ROUTE_QUEUED (qindex)))
 	{
 	  if (IS_ZEBRA_DEBUG_RIB_Q)
 	    zlog_debug ("%s: %s/%d: rn %p is already queued in sub-queue %u",
@@ -1264,7 +1323,7 @@
 	  continue;
 	}
 
-      SET_FLAG (((struct rib *)rn->info)->rn_status, RIB_ROUTE_QUEUED(qindex));
+      SET_FLAG (rib_dest_from_rnode (rn)->flags, RIB_ROUTE_QUEUED (qindex));
       listnode_add (mq->subq[qindex], rn);
       route_lock_node (rn);
       mq->size++;
@@ -1286,7 +1345,7 @@
     inet_ntop (AF_INET, &rn->p.u.prefix, buf, INET_ADDRSTRLEN);
 
   /* Pointless to queue a route_node with no RIB entries to add or remove */
-  if (!rn->info)
+  if (!rnode_to_ribs (rn))
     {
       zlog_debug ("%s: called for route_node (%p, %d) with no ribs",
                   __func__, rn, rn->lock);
@@ -1395,17 +1454,16 @@
  * |-> set RIB_ENTRY_REMOVE                           |
  * rib_delnode                                  (RIB freed)
  *
- *
- * Queueing state for a route_node is kept in the head RIB entry, this
- * state must be preserved as and when the head RIB entry of a
- * route_node is changed by rib_unlink / rib_link. A small complication,
- * but saves having to allocate a dedicated object for this.
+ * The 'info' pointer of a route_node points to a rib_dest_t
+ * ('dest'). Queueing state for a route_node is kept on the dest. The
+ * dest is created on-demand by rib_link() and is kept around at least
+ * as long as there are ribs hanging off it (@see rib_gc_dest()).
  * 
  * Refcounting (aka "locking" throughout the GNU Zebra and Quagga code):
  *
  * - route_nodes: refcounted by:
- *   - RIBs attached to route_node:
- *     - managed by: rib_link/unlink
+ *   - dest attached to route_node:
+ *     - managed by: rib_link/rib_gc_dest
  *   - route_node processing queue
  *     - managed by: rib_addqueue, rib_process.
  *
@@ -1416,12 +1474,11 @@
 rib_link (struct route_node *rn, struct rib *rib)
 {
   struct rib *head;
+  rib_dest_t *dest;
   char buf[INET6_ADDRSTRLEN];
-  
+
   assert (rib && rn);
   
-  route_lock_node (rn); /* rn route table reference */
-
   if (IS_ZEBRA_DEBUG_RIB)
   {
     inet_ntop (rn->p.family, &rn->p.u.prefix, buf, INET6_ADDRSTRLEN);
@@ -1429,18 +1486,28 @@
       buf, rn->p.prefixlen, rn, rib);
   }
 
-  head = rn->info;
-  if (head)
+  dest = rib_dest_from_rnode (rn);
+  if (!dest)
     {
       if (IS_ZEBRA_DEBUG_RIB)
-        zlog_debug ("%s: %s/%d: new head, rn_status copied over", __func__,
-          buf, rn->p.prefixlen);
+	{
+	  zlog_debug ("%s: %s/%d: adding dest to table", __func__,
+		      buf, rn->p.prefixlen);
+	}
+
+      dest = XCALLOC (MTYPE_RIB_DEST, sizeof (rib_dest_t));
+      route_lock_node (rn); /* rn route table reference */
+      rn->info = dest;
+      dest->rnode = rn;
+    }
+
+  head = dest->routes;
+  if (head)
+    {
       head->prev = rib;
-      /* Transfer the rn status flags to the new head RIB */
-      rib->rn_status = head->rn_status;
     }
   rib->next = head;
-  rn->info = rib;
+  dest->routes = rib;
   rib_queue_add (&zebrad, rn);
 }
 
@@ -1465,11 +1532,21 @@
   rib_link (rn, rib);
 }
 
+/*
+ * rib_unlink
+ *
+ * Detach a rib structure from a route_node.
+ *
+ * Note that a call to rib_unlink() should be followed by a call to
+ * rib_gc_dest() at some point. This allows a rib_dest_t that is no
+ * longer required to be deleted.
+ */
 static void
 rib_unlink (struct route_node *rn, struct rib *rib)
 {
   struct nexthop *nexthop, *next;
   char buf[INET6_ADDRSTRLEN];
+  rib_dest_t *dest;
 
   assert (rn && rib);
 
@@ -1480,6 +1557,8 @@
                 __func__, buf, rn->p.prefixlen, rn, rib);
   }
 
+  dest = rib_dest_from_rnode (rn);
+
   if (rib->next)
     rib->next->prev = rib->prev;
 
@@ -1487,15 +1566,7 @@
     rib->prev->next = rib->next;
   else
     {
-      rn->info = rib->next;
-      
-      if (rn->info)
-        {
-          if (IS_ZEBRA_DEBUG_RIB)
-            zlog_debug ("%s: %s/%d: rn %p, rib %p, new head copy",
-                        __func__, buf, rn->p.prefixlen, rn, rib);
-          rib->next->rn_status = rib->rn_status;
-        }
+      dest->routes = rib->next;
     }
 
   /* free RIB and nexthops */
@@ -1506,7 +1577,6 @@
     }
   XFREE (MTYPE_RIB, rib);
 
-  route_unlock_node (rn); /* rn route table reference */
 }
 
 static void
@@ -1561,7 +1631,7 @@
 
   /* If same type of route are installed, treat it as a implicit
      withdraw. */
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -1717,7 +1787,7 @@
   route_unlock_node (rn);
 
   /* let's go */
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
   {
     zlog_debug
     (
@@ -1764,7 +1834,7 @@
    * RIBQ record already on head. This is necessary for proper revalidation
    * of the rest of the RIB.
    */
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
   {
     if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED) &&
       ! RIB_SYSTEM_ROUTE (rib))
@@ -1816,7 +1886,7 @@
 
   /* If same type of route are installed, treat it as a implicit
      withdraw. */
-  for (same = rn->info; same; same = same->next)
+  RNODE_FOREACH_RIB (rn, same)
     {
       if (CHECK_FLAG (same->status, RIB_ENTRY_REMOVED))
         continue;
@@ -1907,7 +1977,7 @@
     }
 
   /* Lookup same type route. */
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -2000,7 +2070,7 @@
 
   /* Lookup existing route */
   rn = route_node_get (table, p);
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
        if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
          continue;
@@ -2096,7 +2166,7 @@
   if (! rn)
     return;
 
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -2355,7 +2425,7 @@
 
   /* If same type of route are installed, treat it as a implicit
      withdraw. */
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -2458,7 +2528,7 @@
     }
 
   /* Lookup same type route. */
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG(rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -2551,7 +2621,7 @@
 
   /* Lookup existing route */
   rn = route_node_get (table, p);
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG(rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -2648,7 +2718,7 @@
   if (! rn)
     return;
 
-  for (rib = rn->info; rib; rib = rib->next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
         continue;
@@ -2844,13 +2914,13 @@
   table = vrf_table (AFI_IP, SAFI_UNICAST, 0);
   if (table)
     for (rn = route_top (table); rn; rn = route_next (rn))
-      if (rn->info)
+      if (rnode_to_ribs (rn))
         rib_queue_add (&zebrad, rn);
 
   table = vrf_table (AFI_IP6, SAFI_UNICAST, 0);
   if (table)
     for (rn = route_top (table); rn; rn = route_next (rn))
-      if (rn->info)
+      if (rnode_to_ribs (rn))
         rib_queue_add (&zebrad, rn);
 }
 
@@ -2865,10 +2935,8 @@
 
   if (table)
     for (rn = route_top (table); rn; rn = route_next (rn))
-      for (rib = rn->info; rib; rib = next)
+      RNODE_FOREACH_RIB_SAFE (rn, rib, next)
 	{
-	  next = rib->next;
-
 	  if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
 	    continue;
 
@@ -2897,10 +2965,8 @@
 
   if (table)
     for (rn = route_top (table); rn; rn = route_next (rn))
-      for (rib = rn->info; rib; rib = next)
+      RNODE_FOREACH_RIB_SAFE (rn, rib, next)
 	{
-	  next = rib->next;
-
 	  if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
 	    continue;
 
@@ -2933,9 +2999,8 @@
 
   if (table)
     for (rn = route_top (table); rn; rn = route_next (rn))
-      for (rib = rn->info; rib; rib = next)
+      RNODE_FOREACH_RIB_SAFE (rn, rib, next)
         {
-          next = rib->next;
           if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
             continue;
           if (rib->type == proto)
@@ -2965,11 +3030,13 @@
 
   if (table)
     for (rn = route_top (table); rn; rn = route_next (rn))
-      for (rib = rn->info; rib; rib = rib->next)
+      RNODE_FOREACH_RIB (rn, rib)
         {
-          if (! RIB_SYSTEM_ROUTE (rib)
-	      && CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
-            rib_uninstall_kernel (rn, rib);
+          if (!CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
+	    continue;
+
+	  if (! RIB_SYSTEM_ROUTE (rib))
+	    rib_uninstall_kernel (rn, rib);
         }
 }