zebra: atomic FIB updates
This commit updates the kernel API so that route changes are
atomically updated using change/replaces messages instead
of first sending a withdraw followed with update.
Same for zclient updates, changes are sent as single ADD
instead of DELETE + ADD.
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 38357ff..7cce13f 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -1103,8 +1103,8 @@
-static void
-rib_install_kernel (struct route_node *rn, struct rib *rib)
+static int
+rib_update_kernel (struct route_node *rn, struct rib *old, struct rib *new)
{
int ret = 0;
struct nexthop *nexthop, *tnexthop;
@@ -1113,72 +1113,31 @@
if (info->safi != SAFI_UNICAST)
{
- for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
- SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
- return;
+ if (new)
+ for (ALL_NEXTHOPS_RO(new->nexthop, nexthop, tnexthop, recursing))
+ SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+ if (old)
+ for (ALL_NEXTHOPS_RO(old->nexthop, nexthop, tnexthop, recursing))
+ UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+ return 0;
}
/*
* Make sure we update the FPM any time we send new information to
* the kernel.
*/
- zfpm_trigger_update (rn, "installing in kernel");
- switch (PREFIX_FAMILY (&rn->p))
- {
- case AF_INET:
- ret = kernel_add_ipv4 (&rn->p, rib);
- break;
-#ifdef HAVE_IPV6
- case AF_INET6:
- ret = kernel_add_ipv6 (&rn->p, rib);
- break;
-#endif /* HAVE_IPV6 */
- }
+ zfpm_trigger_update (rn, "updating in kernel");
+
+ ret = kernel_route_rib (&rn->p, old, new);
/* This condition is never met, if we are using rt_socket.c */
- if (ret < 0)
- {
- for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
- UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
- }
-}
+ if (ret < 0 && new)
+ for (ALL_NEXTHOPS_RO(new->nexthop, nexthop, tnexthop, recursing))
+ UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
-/* Uninstall the route from kernel. */
-static int
-rib_uninstall_kernel (struct route_node *rn, struct rib *rib)
-{
- int ret = 0;
- struct nexthop *nexthop, *tnexthop;
- rib_table_info_t *info = rn->table->info;
- int recursing;
-
- if (info->safi != SAFI_UNICAST)
- {
- for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
- SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
- return ret;
- }
-
- /*
- * Make sure we update the FPM any time we send new information to
- * the kernel.
- */
- zfpm_trigger_update (rn, "uninstalling from kernel");
-
- switch (PREFIX_FAMILY (&rn->p))
- {
- case AF_INET:
- ret = kernel_delete_ipv4 (&rn->p, rib);
- break;
-#ifdef HAVE_IPV6
- case AF_INET6:
- ret = kernel_delete_ipv6 (&rn->p, rib);
- break;
-#endif /* HAVE_IPV6 */
- }
-
- for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
- UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+ if (old)
+ for (ALL_NEXTHOPS_RO(old->nexthop, nexthop, tnexthop, recursing))
+ UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
return ret;
}
@@ -1196,7 +1155,7 @@
redistribute_delete (&rn->p, rib);
if (! RIB_SYSTEM_ROUTE (rib))
- rib_uninstall_kernel (rn, rib);
+ rib_update_kernel (rn, rib, NULL);
UNSET_FLAG (rib->flags, ZEBRA_FLAG_SELECTED);
}
}
@@ -1261,15 +1220,56 @@
return 1;
}
+/* Check if 'alternate' RIB entry is better than 'current'. */
+static struct rib *
+rib_choose_best (struct rib *current, struct rib *alternate)
+{
+ if (current == NULL)
+ return alternate;
+
+ /* filter route selection in following order:
+ * - connected beats other types
+ * - lower distance beats higher
+ * - lower metric beats higher for equal distance
+ * - last, hence oldest, route wins tie break.
+ */
+
+ /* Connected routes. Pick the last connected
+ * route of the set of lowest metric connected routes.
+ */
+ if (alternate->type == ZEBRA_ROUTE_CONNECT)
+ {
+ if (current->type != ZEBRA_ROUTE_CONNECT
+ || alternate->metric <= current->metric)
+ return alternate;
+
+ return current;
+ }
+
+ if (current->type == ZEBRA_ROUTE_CONNECT)
+ return current;
+
+ /* higher distance loses */
+ if (alternate->distance < current->distance)
+ return alternate;
+ if (current->distance < alternate->distance)
+ return current;
+
+ /* metric tie-breaks equal distance */
+ if (alternate->metric <= current->metric)
+ return alternate;
+
+ return current;
+}
+
/* Core function for processing routing information base. */
static void
rib_process (struct route_node *rn)
{
struct rib *rib;
struct rib *next;
- struct rib *fib = NULL;
- struct rib *select = NULL;
- struct rib *del = NULL;
+ struct rib *old_fib = NULL;
+ struct rib *new_fib = NULL;
int installed = 0;
struct nexthop *nexthop = NULL, *tnexthop;
int recursing;
@@ -1279,32 +1279,18 @@
info = rn->table->info;
- RNODE_FOREACH_RIB_SAFE (rn, rib, next)
+ RNODE_FOREACH_RIB (rn, rib)
{
/* Currently installed rib. */
if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
{
- assert (fib == NULL);
- fib = rib;
+ assert (old_fib == NULL);
+ old_fib = rib;
}
-
- /* Unlock removed routes, so they'll be freed, bar the FIB entry,
- * which we need to do do further work with below.
- */
+
+ /* Skip deleted entries from selection */
if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
- {
- if (rib != fib)
- {
- if (IS_ZEBRA_DEBUG_RIB)
- rnode_debug (rn, "rn %p, removing rib %p",
- (void *)rn, (void *)rib);
- rib_unlink (rn, rib);
- }
- else
- del = rib;
-
- continue;
- }
+ continue;
/* Skip unreachable nexthop. */
if (! nexthop_active_update (rn, rib, 0))
@@ -1314,150 +1300,73 @@
if (rib->distance == DISTANCE_INFINITY)
continue;
- /* Newly selected rib, the common case. */
- if (!select)
- {
- select = rib;
- continue;
- }
-
- /* filter route selection in following order:
- * - connected beats other types
- * - lower distance beats higher
- * - lower metric beats higher for equal distance
- * - last, hence oldest, route wins tie break.
- */
-
- /* Connected routes. Pick the last connected
- * route of the set of lowest metric connected routes.
- */
- if (rib->type == ZEBRA_ROUTE_CONNECT)
- {
- if (select->type != ZEBRA_ROUTE_CONNECT
- || rib->metric <= select->metric)
- select = rib;
- continue;
- }
- else if (select->type == ZEBRA_ROUTE_CONNECT)
- continue;
-
- /* higher distance loses */
- if (rib->distance > select->distance)
- continue;
-
- /* lower wins */
- if (rib->distance < select->distance)
- {
- select = rib;
- continue;
- }
-
- /* metric tie-breaks equal distance */
- if (rib->metric <= select->metric)
- select = rib;
+ new_fib = rib_choose_best(new_fib, rib);
} /* RNODE_FOREACH_RIB_SAFE */
/* After the cycle is finished, the following pointers will be set:
- * select --- the winner RIB entry, if any was found, otherwise NULL
- * fib --- the SELECTED RIB entry, if any, otherwise NULL
- * del --- equal to fib, if fib is queued for deletion, NULL otherwise
- * rib --- NULL
+ * old_fib --- RIB entry currently having SELECTED
+ * new_fib --- RIB entry that is newly SELECTED
*/
- /* Same RIB entry is selected. Update FIB and finish. */
- if (select && select == fib)
+ /* Set real nexthops. */
+ if (new_fib)
+ nexthop_active_update (rn, new_fib, 1);
+
+ /* Update kernel if FIB entry has changed */
+ if (old_fib != new_fib
+ || (new_fib && CHECK_FLAG (new_fib->status, RIB_ENTRY_CHANGED)))
{
- if (IS_ZEBRA_DEBUG_RIB)
- rnode_debug (rn, "Updating existing route, select %p, fib %p",
- (void *)select, (void *)fib);
- if (CHECK_FLAG (select->status, RIB_ENTRY_CHANGED))
+ if (old_fib && old_fib != new_fib)
+ {
+ if (! new_fib)
+ redistribute_delete (&rn->p, old_fib);
+
+ if (! RIB_SYSTEM_ROUTE (old_fib) && (! new_fib || RIB_SYSTEM_ROUTE (new_fib)))
+ rib_update_kernel (rn, old_fib, NULL);
+ UNSET_FLAG (old_fib->flags, ZEBRA_FLAG_SELECTED);
+ }
+
+ if (new_fib)
+ {
+ /* Install new or replace existing FIB entry */
+ SET_FLAG (new_fib->flags, ZEBRA_FLAG_SELECTED);
+ redistribute_add (&rn->p, new_fib);
+
+ if (! RIB_SYSTEM_ROUTE (new_fib))
+ rib_update_kernel (rn, old_fib, new_fib);
+ }
+
+ if (info->safi == SAFI_UNICAST)
+ zfpm_trigger_update (rn, "updating existing route");
+ }
+ else if (old_fib == new_fib && new_fib && ! RIB_SYSTEM_ROUTE (new_fib))
+ {
+ /* Housekeeping code to deal with race conditions in kernel with
+ * linux netlink reporting interface up before IPv4 or IPv6 protocol
+ * is ready to add routes. This makes sure routes are IN the kernel.
+ */
+ for (ALL_NEXTHOPS_RO(new_fib->nexthop, nexthop, tnexthop, recursing))
+ if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
+ {
+ installed = 1;
+ break;
+ }
+ if (! installed)
+ rib_update_kernel (rn, NULL, new_fib);
+ }
+
+ /* Remove all RIB entries queued for removal */
+ RNODE_FOREACH_RIB_SAFE (rn, rib, next)
+ {
+ if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
{
- if (info->safi == SAFI_UNICAST)
- zfpm_trigger_update (rn, "updating existing route");
-
- redistribute_delete (&rn->p, select);
- if (! RIB_SYSTEM_ROUTE (select))
- rib_uninstall_kernel (rn, select);
-
- /* Set real nexthop. */
- nexthop_active_update (rn, select, 1);
-
- if (! RIB_SYSTEM_ROUTE (select))
- rib_install_kernel (rn, select);
- redistribute_add (&rn->p, select);
+ if (IS_ZEBRA_DEBUG_RIB)
+ rnode_debug (rn, "rn %p, removing rib %p",
+ (void *)rn, (void *)rib);
+ rib_unlink (rn, rib);
}
- else if (! RIB_SYSTEM_ROUTE (select))
- {
- /* Housekeeping code to deal with
- race conditions in kernel with linux
- netlink reporting interface up before IPv4 or IPv6 protocol
- is ready to add routes.
- This makes sure the routes are IN the kernel.
- */
-
- for (ALL_NEXTHOPS_RO(select->nexthop, nexthop, tnexthop, recursing))
- if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
- {
- installed = 1;
- break;
- }
- if (! installed)
- rib_install_kernel (rn, select);
- }
- goto end;
}
- /* At this point we either haven't found the best RIB entry or it is
- * different from what we currently intend to flag with SELECTED. In both
- * cases, if a RIB block is present in FIB, it should be withdrawn.
- */
- if (fib)
- {
- if (IS_ZEBRA_DEBUG_RIB)
- rnode_debug (rn, "Removing existing route, fib %p", (void *)fib);
-
- if (info->safi == SAFI_UNICAST)
- zfpm_trigger_update (rn, "removing existing route");
-
- redistribute_delete (&rn->p, fib);
- if (! RIB_SYSTEM_ROUTE (fib))
- rib_uninstall_kernel (rn, fib);
- UNSET_FLAG (fib->flags, ZEBRA_FLAG_SELECTED);
-
- /* Set real nexthop. */
- nexthop_active_update (rn, fib, 1);
- }
-
- /* Regardless of some RIB entry being SELECTED or not before, now we can
- * tell, that if a new winner exists, FIB is still not updated with this
- * data, but ready to be.
- */
- if (select)
- {
- if (IS_ZEBRA_DEBUG_RIB)
- rnode_debug (rn, "Adding route, select %p", (void *)select);
-
- if (info->safi == SAFI_UNICAST)
- zfpm_trigger_update (rn, "new route selected");
-
- /* Set real nexthop. */
- nexthop_active_update (rn, select, 1);
-
- if (! RIB_SYSTEM_ROUTE (select))
- rib_install_kernel (rn, select);
- SET_FLAG (select->flags, ZEBRA_FLAG_SELECTED);
- redistribute_add (&rn->p, select);
- }
-
- /* FIB route was removed, should be deleted */
- if (del)
- {
- if (IS_ZEBRA_DEBUG_RIB)
- rnode_debug (rn, "Deleting fib %p, rn %p", (void *)del, (void *)rn);
- rib_unlink (rn, del);
- }
-
-end:
if (IS_ZEBRA_DEBUG_RIB_Q)
rnode_debug (rn, "rn %p dequeued", (void *)rn);
@@ -3082,7 +2991,7 @@
if (rib->type == ZEBRA_ROUTE_KERNEL &&
CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELFROUTE))
{
- ret = rib_uninstall_kernel (rn, rib);
+ ret = rib_update_kernel (rn, rib, NULL);
if (! ret)
rib_delnode (rn, rib);
}
@@ -3165,7 +3074,7 @@
zfpm_trigger_update (rn, NULL);
if (! RIB_SYSTEM_ROUTE (rib))
- rib_uninstall_kernel (rn, rib);
+ rib_update_kernel (rn, rib, NULL);
}
}