zebra: make MRIB lookup behaviour switchable

depending on the usage scenario (and availability of multitopology IGP
protocols, which is currently zero in Quagga), different approaches of
Multicast RPF lookups are useful.

Reference behaviours from commercial vendors are urib-only/mrib-only
(Juniper, depending on inet.2 availability) and lowest-distance (Cisco).
As we are currently without MT IGP support, mrib-first seems the most
useful default for Quagga.

Cc: Everton Marques <everton.marques@gmail.com>
Cc: Balaji G <balajig81@gmail.com>
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
diff --git a/zebra/rib.h b/zebra/rib.h
index 347fadb..94a7419 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -373,6 +373,21 @@
   rib_tables_iter_state_t state;
 } rib_tables_iter_t;
 
+/* RPF lookup behaviour */
+enum multicast_mode
+{
+  MCAST_NO_CONFIG = 0,	/* MIX_MRIB_FIRST, but no show in config write */
+  MCAST_MRIB_ONLY,	/* MRIB only */
+  MCAST_URIB_ONLY,	/* URIB only */
+  MCAST_MIX_MRIB_FIRST,	/* MRIB, if nothing at all then URIB */
+  MCAST_MIX_DISTANCE,	/* MRIB & URIB, lower distance wins */
+  MCAST_MIX_PFXLEN,	/* MRIB & URIB, longer prefix wins */
+			/* on equal value, MRIB wins for last 2 */
+};
+
+extern void multicast_mode_ipv4_set (enum multicast_mode mode);
+extern enum multicast_mode multicast_mode_ipv4_get (void);
+
 extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type);
 extern struct nexthop *nexthop_ifindex_add (struct rib *, unsigned int);
 extern struct nexthop *nexthop_ifname_add (struct rib *, char *);
@@ -420,6 +435,8 @@
 
 extern struct rib *rib_match_ipv4_safi (struct in_addr addr, safi_t safi,
 					int skip_bgp, struct route_node **rn_out);
+extern struct rib *rib_match_ipv4_multicast (struct in_addr addr,
+					     struct route_node **rn_out);
 
 extern struct rib *rib_lookup_ipv4 (struct prefix_ipv4 *);
 
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index abef90f..effe233 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -75,6 +75,9 @@
 /* Vector for routing table.  */
 static vector vrf_vector;
 
+/* RPF lookup behaviour */
+static enum multicast_mode ipv4_multicast_mode = MCAST_NO_CONFIG;
+
 static void
 _rnode_zlog(const char *_func, struct route_node *rn, int priority,
 	    const char *msgfmt, ...)
@@ -782,6 +785,78 @@
 }
 
 struct rib *
+rib_match_ipv4_multicast (struct in_addr addr, struct route_node **rn_out)
+{
+  struct rib *rib = NULL, *mrib = NULL, *urib = NULL;
+  struct route_node *m_rn = NULL, *u_rn = NULL;
+  int skip_bgp = 0; /* bool */
+
+  switch (ipv4_multicast_mode)
+    {
+    case MCAST_MRIB_ONLY:
+      return rib_match_ipv4_safi (addr, SAFI_MULTICAST, skip_bgp, rn_out);
+    case MCAST_URIB_ONLY:
+      return rib_match_ipv4_safi (addr, SAFI_UNICAST, skip_bgp, rn_out);
+    case MCAST_NO_CONFIG:
+    case MCAST_MIX_MRIB_FIRST:
+      rib = mrib = rib_match_ipv4_safi (addr, SAFI_MULTICAST, skip_bgp, &m_rn);
+      if (!mrib)
+	rib = urib = rib_match_ipv4_safi (addr, SAFI_UNICAST, skip_bgp, &u_rn);
+      break;
+    case MCAST_MIX_DISTANCE:
+      mrib = rib_match_ipv4_safi (addr, SAFI_MULTICAST, skip_bgp, &m_rn);
+      urib = rib_match_ipv4_safi (addr, SAFI_UNICAST, skip_bgp, &u_rn);
+      if (mrib && urib)
+	rib = urib->distance < mrib->distance ? urib : mrib;
+      else if (mrib)
+	rib = mrib;
+      else if (urib)
+	rib = urib;
+      break;
+    case MCAST_MIX_PFXLEN:
+      mrib = rib_match_ipv4_safi (addr, SAFI_MULTICAST, skip_bgp, &m_rn);
+      urib = rib_match_ipv4_safi (addr, SAFI_UNICAST, skip_bgp, &u_rn);
+      if (mrib && urib)
+	rib = u_rn->p.prefixlen > m_rn->p.prefixlen ? urib : mrib;
+      else if (mrib)
+	rib = mrib;
+      else if (urib)
+	rib = urib;
+      break;
+  }
+
+  if (rn_out)
+    *rn_out = (rib == mrib) ? m_rn : u_rn;
+
+  if (IS_ZEBRA_DEBUG_RIB)
+    {
+      char buf[BUFSIZ];
+      inet_ntop (AF_INET, &addr, buf, BUFSIZ);
+
+      zlog_debug("%s: %s: found %s, using %s",
+		 __func__, buf,
+                 mrib ? (urib ? "MRIB+URIB" : "MRIB") :
+                         urib ? "URIB" : "nothing",
+		 rib == urib ? "URIB" : rib == mrib ? "MRIB" : "none");
+    }
+  return rib;
+}
+
+void
+multicast_mode_ipv4_set (enum multicast_mode mode)
+{
+  if (IS_ZEBRA_DEBUG_RIB)
+    zlog_debug("%s: multicast lookup mode set (%d)", __func__, mode);
+  ipv4_multicast_mode = mode;
+}
+
+enum multicast_mode
+multicast_mode_ipv4_get (void)
+{
+  return ipv4_multicast_mode;
+}
+
+struct rib *
 rib_lookup_ipv4 (struct prefix_ipv4 *p)
 {
   struct route_table *table;
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 69245a5..f00e35e 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -189,6 +189,62 @@
        "Nexthop address\n"
        "Nexthop interface name\n")
 
+DEFUN (ip_multicast_mode,
+       ip_multicast_mode_cmd,
+       "ip multicast rpf-lookup-mode (urib-only|mrib-only|mrib-then-urib|lower-distance|longer-prefix)",
+       IP_STR
+       "Multicast options\n"
+       "RPF lookup behavior\n"
+       "Lookup in unicast RIB only\n"
+       "Lookup in multicast RIB only\n"
+       "Try multicast RIB first, fall back to unicast RIB\n"
+       "Lookup both, use entry with lower distance\n"
+       "Lookup both, use entry with longer prefix\n")
+{
+  if (!strncmp (argv[0], "u", 1))
+    multicast_mode_ipv4_set (MCAST_URIB_ONLY);
+  else if (!strncmp (argv[0], "mrib-o", 6))
+    multicast_mode_ipv4_set (MCAST_MRIB_ONLY);
+  else if (!strncmp (argv[0], "mrib-t", 6))
+    multicast_mode_ipv4_set (MCAST_MIX_MRIB_FIRST);
+  else if (!strncmp (argv[0], "low", 3))
+    multicast_mode_ipv4_set (MCAST_MIX_DISTANCE);
+  else if (!strncmp (argv[0], "lon", 3))
+    multicast_mode_ipv4_set (MCAST_MIX_PFXLEN);
+  else
+    {
+      vty_out (vty, "Invalid mode specified%s", VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (no_ip_multicast_mode,
+       no_ip_multicast_mode_cmd,
+       "no ip multicast rpf-lookup-mode (urib-only|mrib-only|mrib-then-urib|lower-distance|longer-prefix)",
+       NO_STR
+       IP_STR
+       "Multicast options\n"
+       "RPF lookup behavior\n"
+       "Lookup in unicast RIB only\n"
+       "Lookup in multicast RIB only\n"
+       "Try multicast RIB first, fall back to unicast RIB\n"
+       "Lookup both, use entry with lower distance\n"
+       "Lookup both, use entry with longer prefix\n")
+{
+  multicast_mode_ipv4_set (MCAST_NO_CONFIG);
+  return CMD_SUCCESS;
+}
+
+ALIAS (no_ip_multicast_mode,
+       no_ip_multicast_mode_noarg_cmd,
+       "no ip multicast rpf-lookup-mode",
+       NO_STR
+       IP_STR
+       "Multicast options\n"
+       "RPF lookup behavior\n")
+
 DEFUN (show_ip_rpf,
        show_ip_rpf_cmd,
        "show ip rpf",
@@ -2228,10 +2284,19 @@
   return write;
 }
 
-/* ip protocol configuration write function */
-static int config_write_protocol(struct vty *vty)
-{  
+static int config_write_vty(struct vty *vty)
+{
   int i;
+  enum multicast_mode ipv4_multicast_mode = multicast_mode_ipv4_get ();
+
+  if (ipv4_multicast_mode != MCAST_NO_CONFIG)
+    vty_out (vty, "ip multicast rpf-lookup-mode %s%s",
+             ipv4_multicast_mode == MCAST_URIB_ONLY ? "urib-only" :
+             ipv4_multicast_mode == MCAST_MRIB_ONLY ? "mrib-only" :
+             ipv4_multicast_mode == MCAST_MIX_MRIB_FIRST ? "mrib-then-urib" :
+             ipv4_multicast_mode == MCAST_MIX_DISTANCE ? "lower-distance" :
+             "longer-prefix",
+             VTY_NEWLINE);
 
   for (i=0;i<ZEBRA_ROUTE_MAX;i++)
     {
@@ -2257,12 +2322,15 @@
 zebra_vty_init (void)
 {
   install_node (&ip_node, zebra_ip_config);
-  install_node (&protocol_node, config_write_protocol);
+  install_node (&protocol_node, config_write_vty);
 
   install_element (CONFIG_NODE, &ip_mroute_cmd);
   install_element (CONFIG_NODE, &ip_mroute_dist_cmd);
   install_element (CONFIG_NODE, &no_ip_mroute_cmd);
   install_element (CONFIG_NODE, &no_ip_mroute_dist_cmd);
+  install_element (CONFIG_NODE, &ip_multicast_mode_cmd);
+  install_element (CONFIG_NODE, &no_ip_multicast_mode_cmd);
+  install_element (CONFIG_NODE, &no_ip_multicast_mode_noarg_cmd);
   install_element (CONFIG_NODE, &ip_protocol_cmd);
   install_element (CONFIG_NODE, &no_ip_protocol_cmd);
   install_element (VIEW_NODE, &show_ip_protocol_cmd);
diff --git a/zebra/zserv.c b/zebra/zserv.c
index e678f3a..afd722a 100644
--- a/zebra/zserv.c
+++ b/zebra/zserv.c
@@ -605,27 +605,13 @@
   Returns both route metric and protocol distance.
 */
 static int
-zsend_ipv4_nexthop_lookup_mrib (struct zserv *client, struct in_addr addr)
+zsend_ipv4_nexthop_lookup_mrib (struct zserv *client, struct in_addr addr,
+				struct rib *rib)
 {
   struct stream *s;
-  struct rib *rib;
   unsigned long nump;
   u_char num;
   struct nexthop *nexthop;
-  int skip_bgp = 0; /* bool */
-
-  /* Lookup nexthop. */
-  rib = rib_match_ipv4_safi (addr, SAFI_MULTICAST, skip_bgp, NULL);
-
-  if (IS_ZEBRA_DEBUG_PACKET && IS_ZEBRA_DEBUG_RECV)
-    zlog_debug("%s: %s mrib entry found.", __func__, rib ? "Matching" : "No matching");
-
-  if (!rib) {
-    /* Retry lookup with unicast rib */
-    rib = rib_match_ipv4_safi (addr, SAFI_UNICAST, skip_bgp);
-    if (IS_ZEBRA_DEBUG_PACKET && IS_ZEBRA_DEBUG_RECV)
-      zlog_debug("%s: %s rib entry found.", __func__, rib ? "Matching" : "No matching");
-  }
 
   /* Get output stream. */
   s = client->obuf;
@@ -1009,9 +995,11 @@
 zread_ipv4_nexthop_lookup_mrib (struct zserv *client, u_short length)
 {
   struct in_addr addr;
+  struct rib *rib;
 
   addr.s_addr = stream_get_ipv4 (client->ibuf);
-  return zsend_ipv4_nexthop_lookup_mrib (client, addr);
+  rib = rib_match_ipv4_multicast (addr, NULL);
+  return zsend_ipv4_nexthop_lookup_mrib (client, addr, rib);
 }
 
 /* Nexthop lookup for IPv4. */