ospf: Convert MAX_AGE LSA list to tree

Store the MaxAge LSA list in a tree instead of a linked list for efficient access.
Walking the list can be quite inefficient in some large systems and under certain tests.

ospfd maintains the list of LSA's that have been MaxAge'd out in a separate
linked list for removal by a remover/walker thread. When a new LSA is to be
installed, the old LSA is ejected and when it is ejected, the MaxAge LSA list
is traversed to ensure that the old LSA is also removed from this list if it
exists on this list.

When a large number (> 5K) MaxAge LSAs are bombarding the system, walking this
list takes a significant time causing timers to fire and actions to be taken
such as expiring neighbors due to expiry of DeadInterval (especially when timer
is really low, <= 12s), creating a spiral of instability.

By making this MaxAge LSA list be a tree, this problem is mitigated.

Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Reviewed-by: Ayan Banerjee <ayan@cumulusnetworks.com>
Reviewed-by: Scott Feldman <sfeldma@cumulusnetworks.com>
Reviewed-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
Signed-off-by: Scott Feldman <sfeldma@cumulusnetworks.com>
diff --git a/ospfd/ospf_lsa.c b/ospfd/ospf_lsa.c
index e778251..66c7e1c 100644
--- a/ospfd/ospf_lsa.c
+++ b/ospfd/ospf_lsa.c
@@ -2828,7 +2828,7 @@
 {
   struct ospf *ospf = THREAD_ARG (thread);
   struct ospf_lsa *lsa;
-  struct listnode *node, *nnode;
+  struct route_node *rn;
   int reschedule = 0;
 
   ospf->t_maxage = NULL;
@@ -2839,8 +2839,13 @@
   reschedule = !ospf_check_nbr_status (ospf);
 
   if (!reschedule)
-    for (ALL_LIST_ELEMENTS (ospf->maxage_lsa, node, nnode, lsa))
+    for (rn = route_top(ospf->maxage_lsa); rn; rn = route_next(rn))
       {
+	if ((lsa = rn->info) == NULL)
+	  {
+	    continue;
+	  }
+
         if (lsa->retransmit_counter > 0)
           {
             reschedule = 1;
@@ -2893,13 +2898,22 @@
 void
 ospf_lsa_maxage_delete (struct ospf *ospf, struct ospf_lsa *lsa)
 {
-  struct listnode *n;
+  struct route_node *rn;
+  struct prefix_ls lsa_prefix;
 
-  if ((n = listnode_lookup (ospf->maxage_lsa, lsa)))
+  ls_prefix_set (&lsa_prefix, lsa);
+
+  if ((rn = route_node_lookup(ospf->maxage_lsa,
+			      (struct prefix *)&lsa_prefix)))
     {
-      list_delete_node (ospf->maxage_lsa, n);
-      UNSET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
-      ospf_lsa_unlock (&lsa); /* maxage_lsa */
+      if (rn->info == lsa)
+	{
+	  UNSET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
+	  ospf_lsa_unlock (&lsa); /* maxage_lsa */
+	  rn->info = NULL;
+	  route_unlock_node (rn); /* route_node_lookup */
+	}
+	  route_unlock_node (rn); /* route_node_lookup */
     }
 }
 
@@ -2911,6 +2925,9 @@
 void
 ospf_lsa_maxage (struct ospf *ospf, struct ospf_lsa *lsa)
 {
+  struct prefix_ls lsa_prefix;
+  struct route_node *rn;
+
   /* When we saw a MaxAge LSA flooded to us, we put it on the list
      and schedule the MaxAge LSA remover. */
   if (CHECK_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE))
@@ -2921,8 +2938,25 @@
       return;
     }
 
-  listnode_add (ospf->maxage_lsa, ospf_lsa_lock (lsa));
-  SET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
+  ls_prefix_set (&lsa_prefix, lsa);
+  if ((rn = route_node_get (ospf->maxage_lsa,
+			    (struct prefix *)&lsa_prefix)) != NULL)
+    {
+      if (rn->info != NULL)
+	{
+	  route_unlock_node (rn);
+	}
+      else
+	{
+	  rn->info = ospf_lsa_lock(lsa);
+	  SET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
+	}
+    }
+  else
+    {
+      zlog_err("Unable to allocate memory for maxage lsa\n");
+      assert(0);
+    }
 
   if (IS_DEBUG_OSPF (lsa, LSA_FLOODING))
     zlog_debug ("LSA[%s]: MaxAge LSA remover scheduled.", dump_lsa_key (lsa));