zebra: implement per-route mtu handling

This commits allow overriding MTU using netlink attributes on
per-route basis. This is useful for routing protocols that can
advertice prefix specific MTUs between routers (e.g. NHRP).

Signed-off-by: Timo Teräs <timo.teras@iki.fi>
diff --git a/lib/zclient.c b/lib/zclient.c
index bfff9a3..ca6a4c7 100644
--- a/lib/zclient.c
+++ b/lib/zclient.c
@@ -562,6 +562,8 @@
     stream_putc (s, api->distance);
   if (CHECK_FLAG (api->message, ZAPI_MESSAGE_METRIC))
     stream_putl (s, api->metric);
+  if (CHECK_FLAG (api->message, ZAPI_MESSAGE_MTU))
+    stream_putl (s, api->mtu);
 
   /* Put length at the first point of the stream. */
   stream_putw_at (s, 0, stream_get_endp (s));
@@ -616,6 +618,8 @@
     stream_putc (s, api->distance);
   if (CHECK_FLAG (api->message, ZAPI_MESSAGE_METRIC))
     stream_putl (s, api->metric);
+  if (CHECK_FLAG (api->message, ZAPI_MESSAGE_MTU))
+    stream_putl (s, api->mtu);
 
   /* Put length at the first point of the stream. */
   stream_putw_at (s, 0, stream_get_endp (s));
diff --git a/lib/zclient.h b/lib/zclient.h
index aa935c1..a14f599 100644
--- a/lib/zclient.h
+++ b/lib/zclient.h
@@ -96,6 +96,7 @@
 #define ZAPI_MESSAGE_IFINDEX  0x02
 #define ZAPI_MESSAGE_DISTANCE 0x04
 #define ZAPI_MESSAGE_METRIC   0x08
+#define ZAPI_MESSAGE_MTU      0x10
 
 /* Zserv protocol message header */
 struct zserv_header
@@ -131,6 +132,8 @@
 
   u_int32_t metric;
 
+  u_int32_t mtu;
+
   vrf_id_t vrf_id;
 };
 
@@ -204,6 +207,8 @@
 
   u_int32_t metric;
 
+  u_int32_t mtu;
+
   vrf_id_t vrf_id;
 };
 
diff --git a/zebra/connected.c b/zebra/connected.c
index 244f291..84b0d1c 100644
--- a/zebra/connected.c
+++ b/zebra/connected.c
@@ -192,10 +192,10 @@
     return;
 
   rib_add_ipv4 (ZEBRA_ROUTE_CONNECT, 0, &p, NULL, NULL, ifp->ifindex,
-       ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, SAFI_UNICAST);
+       ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, 0, SAFI_UNICAST);
 
   rib_add_ipv4 (ZEBRA_ROUTE_CONNECT, 0, &p, NULL, NULL, ifp->ifindex,
-       ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, SAFI_MULTICAST);
+       ifp->vrf_id, RT_TABLE_MAIN, ifp->metric, 0, 0, SAFI_MULTICAST);
 
   rib_update (ifp->vrf_id);
 }
@@ -356,7 +356,7 @@
 #endif
 
   rib_add_ipv6 (ZEBRA_ROUTE_CONNECT, 0, &p, NULL, ifp->ifindex, ifp->vrf_id,
-                RT_TABLE_MAIN, ifp->metric, 0, SAFI_UNICAST);
+                RT_TABLE_MAIN, ifp->metric, 0, 0, SAFI_UNICAST);
 
   rib_update (ifp->vrf_id);
 }
diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c
index eded2bb..fe9e4ac 100644
--- a/zebra/kernel_socket.c
+++ b/zebra/kernel_socket.c
@@ -971,7 +971,7 @@
           || rtm->rtm_type == RTM_ADD
           || rtm->rtm_type == RTM_CHANGE)
         rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p, &gate.sin.sin_addr,
-                      NULL, 0, VRF_DEFAULT, 0, 0, 0, SAFI_UNICAST);
+                      NULL, 0, VRF_DEFAULT, 0, 0, 0, 0, SAFI_UNICAST);
       else
         rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p,
                          &gate.sin.sin_addr, 0, VRF_DEFAULT, SAFI_UNICAST);
@@ -1013,7 +1013,7 @@
           || rtm->rtm_type == RTM_ADD
           || rtm->rtm_type == RTM_CHANGE)
         rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p, &gate.sin6.sin6_addr,
-                      ifindex, VRF_DEFAULT, RT_TABLE_MAIN, 0, 0, SAFI_UNICAST);
+                      ifindex, VRF_DEFAULT, RT_TABLE_MAIN, 0, 0, 0, SAFI_UNICAST);
       else
         rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p,
                          &gate.sin6.sin6_addr, ifindex,
diff --git a/zebra/rib.h b/zebra/rib.h
index 6e0966a..fbf4fc4 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -66,6 +66,10 @@
   /* Metric */
   u_int32_t metric;
 
+  /* MTU */
+  u_int32_t mtu;
+  u_int32_t nexthop_mtu;
+
   /* Distance. */
   u_char distance;
 
@@ -494,7 +498,7 @@
 extern int rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, 
 			 struct in_addr *gate, struct in_addr *src,
 			 unsigned int ifindex, vrf_id_t vrf_id, int table_id,
-			 u_int32_t, u_char, safi_t);
+			 u_int32_t, u_int32_t, u_char, safi_t);
 
 extern int rib_add_ipv4_multipath (struct prefix_ipv4 *, struct rib *, safi_t);
 
@@ -531,7 +535,8 @@
 extern int
 rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p,
 	      struct in6_addr *gate, unsigned int ifindex, vrf_id_t vrf_id,
-	      int table_id, u_int32_t metric, u_char distance, safi_t safi);
+	      int table_id, u_int32_t metric, u_int32_t mtu,
+	      u_char distance, safi_t safi);
 
 extern int
 rib_delete_ipv6 (int type, int flags, struct prefix_ipv6 *p,
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
index cbcbe7c..614f809 100644
--- a/zebra/rt_netlink.c
+++ b/zebra/rt_netlink.c
@@ -668,6 +668,7 @@
   int index;
   int table;
   int metric;
+  u_int32_t mtu = 0;
 
   void *dest;
   void *gate;
@@ -730,6 +731,18 @@
   if (tb[RTA_PRIORITY])
     metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
 
+  if (tb[RTA_METRICS])
+    {
+      struct rtattr *mxrta[RTAX_MAX+1];
+
+      memset (mxrta, 0, sizeof mxrta);
+      netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
+                            RTA_PAYLOAD(tb[RTA_METRICS]));
+
+      if (mxrta[RTAX_MTU])
+        mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]);
+    }
+
   if (rtm->rtm_family == AF_INET)
     {
       struct prefix_ipv4 p;
@@ -739,7 +752,7 @@
 
       if (!tb[RTA_MULTIPATH])
           rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, src, index,
-                        vrf_id, table, metric, 0, SAFI_UNICAST);
+                        vrf_id, table, metric, mtu, 0, SAFI_UNICAST);
       else
         {
           /* This is a multipath route */
@@ -755,6 +768,7 @@
           rib->distance = 0;
           rib->flags = flags;
           rib->metric = metric;
+          rib->mtu = mtu;
           rib->vrf_id = vrf_id;
           rib->table = table;
           rib->nexthop_num = 0;
@@ -805,7 +819,7 @@
       p.prefixlen = rtm->rtm_dst_len;
 
       rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, index, vrf_id,
-                    table, metric, 0, SAFI_UNICAST);
+                    table, metric, mtu, 0, SAFI_UNICAST);
     }
 #endif /* HAVE_IPV6 */
 
@@ -841,6 +855,7 @@
   int index;
   int table;
   int metric;
+  u_int32_t mtu = 0;
 
   void *dest;
   void *gate;
@@ -919,8 +934,23 @@
   if (tb[RTA_PREFSRC])
     src = RTA_DATA (tb[RTA_PREFSRC]);
 
-  if (h->nlmsg_type == RTM_NEWROUTE && tb[RTA_PRIORITY])
-    metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
+  if (h->nlmsg_type == RTM_NEWROUTE)
+    {
+      if (tb[RTA_PRIORITY])
+        metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
+
+      if (tb[RTA_METRICS])
+        {
+          struct rtattr *mxrta[RTAX_MAX+1];
+
+          memset (mxrta, 0, sizeof mxrta);
+          netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
+                                RTA_PAYLOAD(tb[RTA_METRICS]));
+
+          if (mxrta[RTAX_MTU])
+            mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]);
+        }
+    }
 
   if (rtm->rtm_family == AF_INET)
     {
@@ -941,7 +971,7 @@
         {
           if (!tb[RTA_MULTIPATH])
             rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, src, index, vrf_id,
-                          table, metric, 0, SAFI_UNICAST);
+                          table, metric, mtu, 0, SAFI_UNICAST);
           else
             {
               /* This is a multipath route */
@@ -957,6 +987,7 @@
               rib->distance = 0;
               rib->flags = 0;
               rib->metric = metric;
+              rib->mtu = mtu;
               rib->vrf_id = vrf_id;
               rib->table = table;
               rib->nexthop_num = 0;
@@ -1022,7 +1053,7 @@
 
       if (h->nlmsg_type == RTM_NEWROUTE)
         rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, vrf_id, table,
-                      metric, 0, SAFI_UNICAST);
+                      metric, mtu, 0, SAFI_UNICAST);
       else
         rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, vrf_id,
                          SAFI_UNICAST);
@@ -1634,6 +1665,20 @@
   /* Metric. */
   addattr32 (&req.n, sizeof req, RTA_PRIORITY, rib->metric);
 
+  if (rib->mtu || rib->nexthop_mtu)
+    {
+      char buf[NL_PKT_BUF_SIZE];
+      struct rtattr *rta = (void *) buf;
+      u_int32_t mtu = rib->mtu;
+      if (!mtu || (rib->nexthop_mtu && rib->nexthop_mtu < mtu))
+        mtu = rib->nexthop_mtu;
+      rta->rta_type = RTA_METRICS;
+      rta->rta_len = RTA_LENGTH(0);
+      rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
+      addattr_l (&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA (rta),
+                 RTA_PAYLOAD (rta));
+    }
+
   if (discard)
     {
       if (cmd == RTM_NEWROUTE)
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index c202c5f..1d09815 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -361,6 +361,7 @@
       UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
       nexthops_free(nexthop->resolved);
       nexthop->resolved = NULL;
+      rib->nexthop_mtu = 0;
     }
 
   /* Make lookup prefix. */
@@ -469,6 +470,8 @@
 		      }
 		    resolved = 1;
 		  }
+              if (resolved && set)
+                rib->nexthop_mtu = match->mtu;
 	      return resolved;
 	    }
 	  else
@@ -1806,7 +1809,7 @@
 rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, 
 	      struct in_addr *gate, struct in_addr *src,
 	      unsigned int ifindex, vrf_id_t vrf_id, int table_id,
-	      u_int32_t metric, u_char distance, safi_t safi)
+	      u_int32_t metric, u_int32_t mtu, u_char distance, safi_t safi)
 {
   struct rib *rib;
   struct rib *same = NULL;
@@ -1869,6 +1872,7 @@
   rib->distance = distance;
   rib->flags = flags;
   rib->metric = metric;
+  rib->mtu = mtu;
   rib->vrf_id = vrf_id;
   rib->table = table_id;
   rib->nexthop_num = 0;
@@ -2598,7 +2602,7 @@
 rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p,
 	      struct in6_addr *gate, unsigned int ifindex,
 	      vrf_id_t vrf_id, int table_id,
-	      u_int32_t metric, u_char distance, safi_t safi)
+	      u_int32_t metric, u_int32_t mtu, u_char distance, safi_t safi)
 {
   struct rib *rib;
   struct rib *same = NULL;
@@ -2654,6 +2658,7 @@
   rib->distance = distance;
   rib->flags = flags;
   rib->metric = metric;
+  rib->mtu = mtu;
   rib->vrf_id = vrf_id;
   rib->table = table_id;
   rib->nexthop_num = 0;
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 6ad286f..b37b901 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -1290,6 +1290,8 @@
               VTY_NEWLINE);
       vty_out (vty, "  Known via \"%s\"", zebra_route_string (rib->type));
       vty_out (vty, ", distance %u, metric %u", rib->distance, rib->metric);
+      if (rib->mtu)
+        vty_out (vty, ", mtu %u", rib->mtu);
       vty_out (vty, ", vrf %u", rib->vrf_id);
       if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
         vty_out (vty, ", best");
diff --git a/zebra/zserv.c b/zebra/zserv.c
index e26c8ca..7a75ed4 100644
--- a/zebra/zserv.c
+++ b/zebra/zserv.c
@@ -455,6 +455,8 @@
       stream_putc (s, rib->distance);
       SET_FLAG (zapi_flags, ZAPI_MESSAGE_METRIC);
       stream_putl (s, rib->metric);
+      SET_FLAG (zapi_flags, ZAPI_MESSAGE_MTU);
+      stream_putl (s, rib->mtu);
     }
   
   /* write real message flags value */
@@ -903,6 +905,9 @@
   if (CHECK_FLAG (message, ZAPI_MESSAGE_METRIC))
     rib->metric = stream_getl (s);
     
+  if (CHECK_FLAG (message, ZAPI_MESSAGE_MTU))
+    rib->mtu = stream_getl (s);
+
   /* Table */
   rib->table=zebrad.rtm_table_default;
   rib_add_ipv4_multipath (&p, rib, safi);
@@ -1092,15 +1097,20 @@
     api.metric = stream_getl (s);
   else
     api.metric = 0;
+
+  if (CHECK_FLAG (api.message, ZAPI_MESSAGE_MTU))
+    api.mtu = stream_getl (s);
+  else
+    api.mtu = 0;
     
   if (IN6_IS_ADDR_UNSPECIFIED (&nexthop))
     rib_add_ipv6 (api.type, api.flags, &p, NULL, ifindex,
                   vrf_id, zebrad.rtm_table_default, api.metric,
-                  api.distance, api.safi);
+                  api.mtu, api.distance, api.safi);
   else
     rib_add_ipv6 (api.type, api.flags, &p, &nexthop, ifindex,
                   vrf_id, zebrad.rtm_table_default, api.metric,
-                  api.distance, api.safi);
+                  api.mtu, api.distance, api.safi);
   return 0;
 }