isisd: merge osr/google-is-is

this is essentially half of a rewrite of isisd. please note that a lot
of things are still broken and isisd is not ready for production use.
diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am
index 1b17d38..3051555 100644
--- a/bgpd/Makefile.am
+++ b/bgpd/Makefile.am
@@ -15,14 +15,14 @@
 	bgp_debug.c bgp_route.c bgp_zebra.c bgp_open.c bgp_routemap.c \
 	bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \
 	bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \
-	bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c
+	bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c
 
 noinst_HEADERS = \
 	bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \
 	bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \
 	bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \
 	bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \
-	bgp_advertise.h bgp_snmp.h bgp_vty.h
+	bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h
 
 bgpd_SOURCES = bgp_main.c
 bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@
diff --git a/bgpd/bgp_aspath.c b/bgpd/bgp_aspath.c
index 776c712..64b3677 100644
--- a/bgpd/bgp_aspath.c
+++ b/bgpd/bgp_aspath.c
@@ -1819,7 +1819,7 @@
 }
 
 /* If two aspath have same value then return 1 else return 0 */
-static int
+int
 aspath_cmp (const void *arg1, const void *arg2)
 {
   const struct assegment *seg1 = ((const struct aspath *)arg1)->segments;
diff --git a/bgpd/bgp_aspath.h b/bgpd/bgp_aspath.h
index d55f9ce..fc4dd6b 100644
--- a/bgpd/bgp_aspath.h
+++ b/bgpd/bgp_aspath.h
@@ -72,6 +72,7 @@
 extern struct aspath *aspath_filter_exclude (struct aspath *, struct aspath *);
 extern struct aspath *aspath_add_seq (struct aspath *, as_t);
 extern struct aspath *aspath_add_confed_seq (struct aspath *, as_t);
+extern int aspath_cmp (const void *, const void *);
 extern int aspath_cmp_left (const struct aspath *, const struct aspath *);
 extern int aspath_cmp_left_confed (const struct aspath *, const struct aspath *);
 extern struct aspath *aspath_delete_confed_seq (struct aspath *);
diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c
index 440c15a..9f4aaa4 100644
--- a/bgpd/bgp_ecommunity.c
+++ b/bgpd/bgp_ecommunity.c
@@ -99,7 +99,7 @@
 /* This function takes pointer to Extended Communites strucutre then
    create a new Extended Communities structure by uniq and sort each
    Extended Communities value.  */
-static struct ecommunity *
+struct ecommunity *
 ecommunity_uniq_sort (struct ecommunity *ecom)
 {
   int i;
diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h
index 2f59dc4..92affcc 100644
--- a/bgpd/bgp_ecommunity.h
+++ b/bgpd/bgp_ecommunity.h
@@ -71,6 +71,7 @@
 extern struct ecommunity *ecommunity_parse (u_int8_t *, u_short);
 extern struct ecommunity *ecommunity_dup (struct ecommunity *);
 extern struct ecommunity *ecommunity_merge (struct ecommunity *, struct ecommunity *);
+extern struct ecommunity *ecommunity_uniq_sort (struct ecommunity *);
 extern struct ecommunity *ecommunity_intern (struct ecommunity *);
 extern int ecommunity_cmp (const void *, const void *);
 extern void ecommunity_unintern (struct ecommunity **);
diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c
index 8dede58..0f1d482 100644
--- a/bgpd/bgp_main.c
+++ b/bgpd/bgp_main.c
@@ -35,6 +35,7 @@
 #include "routemap.h"
 #include "filter.h"
 #include "plist.h"
+#include "stream.h"
 
 #include "bgpd/bgpd.h"
 #include "bgpd/bgp_attr.h"
@@ -47,6 +48,7 @@
 #include "bgpd/bgp_clist.h"
 #include "bgpd/bgp_debug.h"
 #include "bgpd/bgp_filter.h"
+#include "bgpd/bgp_zebra.h"
 
 /* bgpd options, we use GNU getopt library. */
 static const struct option longopts[] = 
@@ -297,6 +299,8 @@
     zclient_free (zclient);
   if (zlookup)
     zclient_free (zlookup);
+  if (bgp_nexthop_buf)
+    stream_free (bgp_nexthop_buf);
 
   /* reverse bgp_master_init */
   if (master)
diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c
new file mode 100644
index 0000000..d07830d
--- /dev/null
+++ b/bgpd/bgp_mpath.c
@@ -0,0 +1,737 @@
+/* $QuaggaId: Format:%an, %ai, %h$ $
+ *
+ * BGP Multipath
+ * Copyright (C) 2010 Google Inc.
+ *
+ * This file is part of Quagga
+ *
+ * Quagga is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * Quagga is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Quagga; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "prefix.h"
+#include "linklist.h"
+#include "sockunion.h"
+#include "memory.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_aspath.h"
+#include "bgpd/bgp_community.h"
+#include "bgpd/bgp_ecommunity.h"
+#include "bgpd/bgp_mpath.h"
+
+/*
+ * bgp_maximum_paths_set
+ *
+ * Record maximum-paths configuration for BGP instance
+ */
+int
+bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi,
+                       int peertype, u_int16_t maxpaths)
+{
+  if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
+    return -1;
+
+  switch (peertype)
+    {
+    case BGP_PEER_IBGP:
+      bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths;
+      break;
+    case BGP_PEER_EBGP:
+      bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths;
+      break;
+    default:
+      return -1;
+    }
+
+  return 0;
+}
+
+/*
+ * bgp_maximum_paths_unset
+ *
+ * Remove maximum-paths configuration from BGP instance
+ */
+int
+bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi,
+                         int peertype)
+{
+  if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
+    return -1;
+
+  switch (peertype)
+    {
+    case BGP_PEER_IBGP:
+      bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
+      break;
+    case BGP_PEER_EBGP:
+      bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
+      break;
+    default:
+      return -1;
+    }
+
+  return 0;
+}
+
+/*
+ * bgp_info_nexthop_cmp
+ *
+ * Compare the nexthops of two paths. Return value is less than, equal to,
+ * or greater than zero if bi1 is respectively less than, equal to,
+ * or greater than bi2.
+ */
+static int
+bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2)
+{
+  struct attr_extra *ae1, *ae2;
+  int compare;
+
+  ae1 = bi1->attr->extra;
+  ae2 = bi2->attr->extra;
+
+  compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop);
+
+  if (!compare && ae1 && ae2 && (ae1->mp_nexthop_len == ae2->mp_nexthop_len))
+    {
+      switch (ae1->mp_nexthop_len)
+        {
+        case 4:
+        case 12:
+          compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in,
+                                   &ae2->mp_nexthop_global_in);
+          break;
+#ifdef HAVE_IPV6
+        case 16:
+          compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
+                                   &ae2->mp_nexthop_global);
+          break;
+        case 32:
+          compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
+                                   &ae2->mp_nexthop_global);
+          if (!compare)
+            compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local,
+                                     &ae2->mp_nexthop_local);
+          break;
+#endif /* HAVE_IPV6 */
+        }
+    }
+
+  return compare;
+}
+
+/*
+ * bgp_info_mpath_cmp
+ *
+ * This function determines our multipath list ordering. By ordering
+ * the list we can deterministically select which paths are included
+ * in the multipath set. The ordering also helps in detecting changes
+ * in the multipath selection so we can detect whether to send an
+ * update to zebra.
+ *
+ * The order of paths is determined first by received nexthop, and then
+ * by peer address if the nexthops are the same.
+ */
+static int
+bgp_info_mpath_cmp (void *val1, void *val2)
+{
+  struct bgp_info *bi1, *bi2;
+  int compare;
+
+  bi1 = val1;
+  bi2 = val2;
+
+  compare = bgp_info_nexthop_cmp (bi1, bi2);
+
+  if (!compare)
+    compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote);
+
+  return compare;
+}
+
+/*
+ * bgp_mp_list_init
+ *
+ * Initialize the mp_list, which holds the list of multipaths
+ * selected by bgp_best_selection
+ */
+void
+bgp_mp_list_init (struct list *mp_list)
+{
+  assert (mp_list);
+  memset (mp_list, 0, sizeof (struct list));
+  mp_list->cmp = bgp_info_mpath_cmp;
+}
+
+/*
+ * bgp_mp_list_clear
+ *
+ * Clears all entries out of the mp_list
+ */
+void
+bgp_mp_list_clear (struct list *mp_list)
+{
+  assert (mp_list);
+  list_delete_all_node (mp_list);
+}
+
+/*
+ * bgp_mp_list_add
+ *
+ * Adds a multipath entry to the mp_list
+ */
+void
+bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo)
+{
+  assert (mp_list && mpinfo);
+  listnode_add_sort (mp_list, mpinfo);
+}
+
+/*
+ * bgp_info_mpath_new
+ *
+ * Allocate and zero memory for a new bgp_info_mpath element
+ */
+static struct bgp_info_mpath *
+bgp_info_mpath_new (void)
+{
+  struct bgp_info_mpath *new_mpath;
+  new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath));
+  return new_mpath;
+}
+
+/*
+ * bgp_info_mpath_free
+ *
+ * Release resources for a bgp_info_mpath element and zero out pointer
+ */
+void
+bgp_info_mpath_free (struct bgp_info_mpath **mpath)
+{
+  if (mpath && *mpath)
+    {
+      if ((*mpath)->mp_attr)
+        bgp_attr_unintern ((*mpath)->mp_attr);
+      XFREE (MTYPE_BGP_MPATH_INFO, *mpath);
+      *mpath = NULL;
+    }
+}
+
+/*
+ * bgp_info_mpath_get
+ *
+ * Fetch the mpath element for the given bgp_info. Used for
+ * doing lazy allocation.
+ */
+static struct bgp_info_mpath *
+bgp_info_mpath_get (struct bgp_info *binfo)
+{
+  struct bgp_info_mpath *mpath;
+  if (!binfo->mpath)
+    {
+      mpath = bgp_info_mpath_new();
+      if (!mpath)
+        return NULL;
+      binfo->mpath = mpath;
+      mpath->mp_info = binfo;
+    }
+  return binfo->mpath;
+}
+
+/*
+ * bgp_info_mpath_enqueue
+ *
+ * Enqueue a path onto the multipath list given the previous multipath
+ * list entry
+ */
+static void
+bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo)
+{
+  struct bgp_info_mpath *prev, *mpath;
+
+  prev = bgp_info_mpath_get (prev_info);
+  mpath = bgp_info_mpath_get (binfo);
+  if (!prev || !mpath)
+    return;
+
+  mpath->mp_next = prev->mp_next;
+  mpath->mp_prev = prev;
+  if (prev->mp_next)
+    prev->mp_next->mp_prev = mpath;
+  prev->mp_next = mpath;
+
+  SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
+}
+
+/*
+ * bgp_info_mpath_dequeue
+ *
+ * Remove a path from the multipath list
+ */
+void
+bgp_info_mpath_dequeue (struct bgp_info *binfo)
+{
+  struct bgp_info_mpath *mpath = binfo->mpath;
+  if (!mpath)
+    return;
+  if (mpath->mp_prev)
+    mpath->mp_prev->mp_next = mpath->mp_next;
+  if (mpath->mp_next)
+    mpath->mp_next->mp_prev = mpath->mp_prev;
+  mpath->mp_next = mpath->mp_prev = NULL;
+  UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
+}
+
+/*
+ * bgp_info_mpath_next
+ *
+ * Given a bgp_info, return the next multipath entry
+ */
+struct bgp_info *
+bgp_info_mpath_next (struct bgp_info *binfo)
+{
+  if (!binfo->mpath || !binfo->mpath->mp_next)
+    return NULL;
+  return binfo->mpath->mp_next->mp_info;
+}
+
+/*
+ * bgp_info_mpath_first
+ *
+ * Given bestpath bgp_info, return the first multipath entry.
+ */
+struct bgp_info *
+bgp_info_mpath_first (struct bgp_info *binfo)
+{
+  return bgp_info_mpath_next (binfo);
+}
+
+/*
+ * bgp_info_mpath_count
+ *
+ * Given the bestpath bgp_info, return the number of multipath entries
+ */
+u_int32_t
+bgp_info_mpath_count (struct bgp_info *binfo)
+{
+  if (!binfo->mpath)
+    return 0;
+  return binfo->mpath->mp_count;
+}
+
+/*
+ * bgp_info_mpath_count_set
+ *
+ * Sets the count of multipaths into bestpath's mpath element
+ */
+static void
+bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count)
+{
+  struct bgp_info_mpath *mpath;
+  if (!count && !binfo->mpath)
+    return;
+  mpath = bgp_info_mpath_get (binfo);
+  if (!mpath)
+    return;
+  mpath->mp_count = count;
+}
+
+/*
+ * bgp_info_mpath_attr
+ *
+ * Given bestpath bgp_info, return aggregated attribute set used
+ * for advertising the multipath route
+ */
+struct attr *
+bgp_info_mpath_attr (struct bgp_info *binfo)
+{
+  if (!binfo->mpath)
+    return NULL;
+  return binfo->mpath->mp_attr;
+}
+
+/*
+ * bgp_info_mpath_attr_set
+ *
+ * Sets the aggregated attribute into bestpath's mpath element
+ */
+static void
+bgp_info_mpath_attr_set (struct bgp_info *binfo, struct attr *attr)
+{
+  struct bgp_info_mpath *mpath;
+  if (!attr && !binfo->mpath)
+    return;
+  mpath = bgp_info_mpath_get (binfo);
+  if (!mpath)
+    return;
+  mpath->mp_attr = attr;
+}
+
+/*
+ * bgp_info_mpath_update
+ *
+ * Compare and sync up the multipath list with the mp_list generated by
+ * bgp_best_selection
+ */
+void
+bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best,
+                       struct bgp_info *old_best, struct list *mp_list,
+                       struct bgp_maxpaths_cfg *mpath_cfg)
+{
+  u_int16_t maxpaths, mpath_count, old_mpath_count;
+  struct listnode *mp_node, *mp_next_node;
+  struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath;
+  int mpath_changed, debug;
+  char pfx_buf[INET_ADDRSTRLEN], nh_buf[2][INET_ADDRSTRLEN];
+
+  mpath_changed = 0;
+  maxpaths = BGP_DEFAULT_MAXPATHS;
+  mpath_count = 0;
+  cur_mpath = NULL;
+  old_mpath_count = 0;
+  prev_mpath = new_best;
+  mp_node = listhead (mp_list);
+  debug = BGP_DEBUG (events, EVENTS);
+
+  if (debug)
+    prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf));
+
+  if (new_best)
+    {
+      mpath_count++;
+      if (new_best != old_best)
+        bgp_info_mpath_dequeue (new_best);
+      maxpaths = (peer_sort (new_best->peer) == BGP_PEER_IBGP) ?
+        mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp;
+    }
+
+  if (old_best)
+    {
+      cur_mpath = bgp_info_mpath_first (old_best);
+      old_mpath_count = bgp_info_mpath_count (old_best);
+      bgp_info_mpath_count_set (old_best, 0);
+      bgp_info_mpath_dequeue (old_best);
+    }
+
+  /*
+   * We perform an ordered walk through both lists in parallel.
+   * The reason for the ordered walk is that if there are paths
+   * that were previously multipaths and are still multipaths, the walk
+   * should encounter them in both lists at the same time. Otherwise
+   * there will be paths that are in one list or another, and we
+   * will deal with these separately.
+   *
+   * Note that new_best might be somewhere in the mp_list, so we need
+   * to skip over it
+   */
+  while (mp_node || cur_mpath)
+    {
+      /*
+       * We can bail out of this loop if all existing paths on the
+       * multipath list have been visited (for cleanup purposes) and
+       * the maxpath requirement is fulfulled
+       */
+      if (!cur_mpath && (mpath_count >= maxpaths))
+        break;
+
+      mp_next_node = mp_node ? listnextnode (mp_node) : NULL;
+      next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL;
+
+      /*
+       * If equal, the path was a multipath and is still a multipath.
+       * Insert onto new multipath list if maxpaths allows.
+       */
+      if (mp_node && (listgetdata (mp_node) == cur_mpath))
+        {
+          list_delete_node (mp_list, mp_node);
+          bgp_info_mpath_dequeue (cur_mpath);
+          if ((mpath_count < maxpaths) &&
+              bgp_info_nexthop_cmp (prev_mpath, cur_mpath))
+            {
+              bgp_info_mpath_enqueue (prev_mpath, cur_mpath);
+              prev_mpath = cur_mpath;
+              mpath_count++;
+            }
+          else
+            {
+              mpath_changed = 1;
+              if (debug)
+                zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
+                            inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
+                                       nh_buf[0], sizeof (nh_buf[0])),
+                            sockunion2str (cur_mpath->peer->su_remote,
+                                           nh_buf[1], sizeof (nh_buf[1])));
+            }
+          mp_node = mp_next_node;
+          cur_mpath = next_mpath;
+          continue;
+        }
+
+      if (cur_mpath && (!mp_node ||
+                        (bgp_info_mpath_cmp (cur_mpath,
+                                             listgetdata (mp_node)) < 0)))
+        {
+          /*
+           * If here, we have an old multipath and either the mp_list
+           * is finished or the next mp_node points to a later
+           * multipath, so we need to purge this path from the
+           * multipath list
+           */
+          bgp_info_mpath_dequeue (cur_mpath);
+          mpath_changed = 1;
+          if (debug)
+            zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
+                        inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
+                                   nh_buf[0], sizeof (nh_buf[0])),
+                        sockunion2str (cur_mpath->peer->su_remote,
+                                       nh_buf[1], sizeof (nh_buf[1])));
+          cur_mpath = next_mpath;
+        }
+      else
+        {
+          /*
+           * If here, we have a path on the mp_list that was not previously
+           * a multipath (due to non-equivalance or maxpaths exceeded),
+           * or the matching multipath is sorted later in the multipath
+           * list. Before we enqueue the path on the new multipath list,
+           * make sure its not on the old_best multipath list or referenced
+           * via next_mpath:
+           * - If next_mpath points to this new path, update next_mpath to
+           *   point to the multipath after this one
+           * - Dequeue the path from the multipath list just to make sure
+           */
+          new_mpath = listgetdata (mp_node);
+          list_delete_node (mp_list, mp_node);
+          if ((mpath_count < maxpaths) && (new_mpath != new_best) &&
+              bgp_info_nexthop_cmp (prev_mpath, new_mpath))
+            {
+              if (new_mpath == next_mpath)
+                next_mpath = bgp_info_mpath_next (new_mpath);
+              bgp_info_mpath_dequeue (new_mpath);
+
+              bgp_info_mpath_enqueue (prev_mpath, new_mpath);
+              prev_mpath = new_mpath;
+              mpath_changed = 1;
+              mpath_count++;
+              if (debug)
+                zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf,
+                            inet_ntop (AF_INET, &new_mpath->attr->nexthop,
+                                       nh_buf[0], sizeof (nh_buf[0])),
+                            sockunion2str (new_mpath->peer->su_remote,
+                                           nh_buf[1], sizeof (nh_buf[1])));
+            }
+          mp_node = mp_next_node;
+        }
+    }
+
+  if (new_best)
+    {
+      bgp_info_mpath_count_set (new_best, mpath_count-1);
+      if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count))
+        SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG);
+    }
+}
+
+/*
+ * bgp_mp_dmed_deselect
+ *
+ * Clean up multipath information for BGP_INFO_DMED_SELECTED path that
+ * is not selected as best path
+ */
+void
+bgp_mp_dmed_deselect (struct bgp_info *dmed_best)
+{
+  struct bgp_info *mpinfo, *mpnext;
+
+  if (!dmed_best)
+    return;
+
+  for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext)
+    {
+      mpnext = bgp_info_mpath_next (mpinfo);
+      bgp_info_mpath_dequeue (mpinfo);
+    }
+
+  bgp_info_mpath_count_set (dmed_best, 0);
+  UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG);
+  assert (bgp_info_mpath_first (dmed_best) == 0);
+}
+
+/*
+ * bgp_info_mpath_aggregate_update
+ *
+ * Set the multipath aggregate attribute. We need to see if the
+ * aggregate has changed and then set the ATTR_CHANGED flag on the
+ * bestpath info so that a peer update will be generated. The
+ * change is detected by generating the current attribute,
+ * interning it, and then comparing the interned pointer with the
+ * current value. We can skip this generate/compare step if there
+ * is no change in multipath selection and no attribute change in
+ * any multipath.
+ */
+void
+bgp_info_mpath_aggregate_update (struct bgp_info *new_best,
+                                 struct bgp_info *old_best)
+{
+  struct bgp_info *mpinfo;
+  struct aspath *aspath;
+  struct aspath *asmerge;
+  struct attr *new_attr, *old_attr;
+  u_char origin, attr_chg;
+  struct community *community, *commerge;
+  struct ecommunity *ecomm, *ecommerge;
+  struct attr_extra *ae;
+  struct attr attr = { 0 };
+
+  if (old_best && (old_best != new_best) &&
+      (old_attr = bgp_info_mpath_attr (old_best)))
+    {
+      bgp_attr_unintern (old_attr);
+      bgp_info_mpath_attr_set (old_best, NULL);
+    }
+
+  if (!new_best)
+    return;
+
+  if (!bgp_info_mpath_count (new_best))
+    {
+      if ((new_attr = bgp_info_mpath_attr (new_best)))
+        {
+          bgp_attr_unintern (new_attr);
+          bgp_info_mpath_attr_set (new_best, NULL);
+          SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
+        }
+      return;
+    }
+
+  /*
+   * Bail out here if the following is true:
+   * - MULTIPATH_CHG bit is not set on new_best, and
+   * - No change in bestpath, and
+   * - ATTR_CHANGED bit is not set on new_best or any of the multipaths
+   */
+  if (!CHECK_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG) &&
+      (old_best == new_best))
+    {
+      attr_chg = 0;
+
+      if (CHECK_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED))
+        attr_chg = 1;
+      else
+        for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
+             mpinfo = bgp_info_mpath_next (mpinfo))
+          {
+            if (CHECK_FLAG (mpinfo->flags, BGP_INFO_ATTR_CHANGED))
+              {
+                attr_chg = 1;
+                break;
+              }
+          }
+
+      if (!attr_chg)
+        {
+          assert (bgp_info_mpath_attr (new_best));
+          return;
+        }
+    }
+
+  bgp_attr_dup (&attr, new_best->attr);
+
+  /* aggregate attribute from multipath constituents */
+  aspath = aspath_dup (attr.aspath);
+  origin = attr.origin;
+  community = attr.community ? community_dup (attr.community) : NULL;
+  ae = attr.extra;
+  ecomm = (ae && ae->ecommunity) ? ecommunity_dup (ae->ecommunity) : NULL;
+
+  for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
+       mpinfo = bgp_info_mpath_next (mpinfo))
+    {
+      asmerge = aspath_aggregate (aspath, mpinfo->attr->aspath);
+      aspath_free (aspath);
+      aspath = asmerge;
+
+      if (origin < mpinfo->attr->origin)
+        origin = mpinfo->attr->origin;
+
+      if (mpinfo->attr->community)
+        {
+          if (community)
+            {
+              commerge = community_merge (community, mpinfo->attr->community);
+              community = community_uniq_sort (commerge);
+              community_free (commerge);
+            }
+          else
+            community = community_dup (mpinfo->attr->community);
+        }
+
+      ae = mpinfo->attr->extra;
+      if (ae && ae->ecommunity)
+        {
+          if (ecomm)
+            {
+              ecommerge = ecommunity_merge (ecomm, ae->ecommunity);
+              ecomm = ecommunity_uniq_sort (ecommerge);
+              ecommunity_free (ecommerge);
+            }
+          else
+            ecomm = ecommunity_dup (ae->ecommunity);
+        }
+    }
+
+  attr.aspath = aspath;
+  attr.origin = origin;
+  if (community)
+    {
+      attr.community = community;
+      attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_COMMUNITIES);
+    }
+  if (ecomm)
+    {
+      ae = bgp_attr_extra_get (&attr);
+      ae->ecommunity = ecomm;
+      attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES);
+    }
+
+  /* Zap multipath attr nexthop so we set nexthop to self */
+  attr.nexthop.s_addr = 0;
+#ifdef HAVE_IPV6
+  if (attr.extra)
+    memset (&attr.extra->mp_nexthop_global, 0, sizeof (struct in6_addr));
+#endif /* HAVE_IPV6 */
+
+  /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */
+
+  new_attr = bgp_attr_intern (&attr);
+  bgp_attr_extra_free (&attr);
+
+  if (new_attr != bgp_info_mpath_attr (new_best))
+    {
+      if ((old_attr = bgp_info_mpath_attr (new_best)))
+        bgp_attr_unintern (old_attr);
+      bgp_info_mpath_attr_set (new_best, new_attr);
+      SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
+    }
+  else
+    bgp_attr_unintern (new_attr);
+}
diff --git a/bgpd/bgp_mpath.h b/bgpd/bgp_mpath.h
new file mode 100644
index 0000000..37b9ac8
--- /dev/null
+++ b/bgpd/bgp_mpath.h
@@ -0,0 +1,80 @@
+/* $QuaggaId: Format:%an, %ai, %h$ $
+ *
+ * BGP Multipath
+ * Copyright (C) 2010 Google Inc.
+ *
+ * This file is part of Quagga
+ *
+ * Quagga is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * Quagga is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Quagga; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _QUAGGA_BGP_MPATH_H
+#define _QUAGGA_BGP_MPATH_H
+
+/* BGP default maximum-paths */
+#define BGP_DEFAULT_MAXPATHS 1
+
+/* Supplemental information linked to bgp_info for keeping track of
+ * multipath selections, lazily allocated to save memory
+ */
+struct bgp_info_mpath
+{
+  /* Points to the first multipath (on bestpath) or the next multipath */
+  struct bgp_info_mpath *mp_next;
+
+  /* Points to the previous multipath or NULL on bestpath */
+  struct bgp_info_mpath *mp_prev;
+
+  /* Points to bgp_info associated with this multipath info */
+  struct bgp_info *mp_info;
+
+  /* When attached to best path, the number of selected multipaths */
+  u_int32_t mp_count;
+
+  /* Aggregated attribute for advertising multipath route */
+  struct attr *mp_attr;
+};
+
+/* Functions to support maximum-paths configuration */
+extern int bgp_maximum_paths_set (struct bgp *, afi_t, safi_t, int, u_int16_t);
+extern int bgp_maximum_paths_unset (struct bgp *, afi_t, safi_t, int);
+
+/* Functions used by bgp_best_selection to record current
+ * multipath selections
+ */
+extern void bgp_mp_list_init (struct list *);
+extern void bgp_mp_list_clear (struct list *);
+extern void bgp_mp_list_add (struct list *, struct bgp_info *);
+extern void bgp_mp_dmed_deselect (struct bgp_info *);
+extern void bgp_info_mpath_update (struct bgp_node *, struct bgp_info *,
+                                   struct bgp_info *, struct list *,
+                                   struct bgp_maxpaths_cfg *);
+extern void bgp_info_mpath_aggregate_update (struct bgp_info *,
+                                             struct bgp_info *);
+
+/* Unlink and free multipath information associated with a bgp_info */
+extern void bgp_info_mpath_dequeue (struct bgp_info *);
+extern void bgp_info_mpath_free (struct bgp_info_mpath **);
+
+/* Walk list of multipaths associated with a best path */
+extern struct bgp_info *bgp_info_mpath_first (struct bgp_info *);
+extern struct bgp_info *bgp_info_mpath_next (struct bgp_info *);
+
+/* Accessors for multipath information */
+extern u_int32_t bgp_info_mpath_count (struct bgp_info *);
+extern struct attr *bgp_info_mpath_attr (struct bgp_info *);
+
+#endif /* _QUAGGA_BGP_MPATH_H */
diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c
index b5b50bb..0326d01 100644
--- a/bgpd/bgp_open.c
+++ b/bgpd/bgp_open.c
@@ -464,11 +464,16 @@
   [CAPABILITY_CODE_ORF_OLD]	= sizeof (struct capability_orf_entry),
 };
 
-/* Parse given capability.
+/**
+ * Parse given capability.
  * XXX: This is reading into a stream, but not using stream API
+ *
+ * @param[out] mp_capability Set to 1 on return iff one or more Multiprotocol
+ *                           capabilities were encountered.
  */
 static int
-bgp_capability_parse (struct peer *peer, size_t length, u_char **error)
+bgp_capability_parse (struct peer *peer, size_t length, int *mp_capability,
+		      u_char **error)
 {
   int ret;
   struct stream *s = BGP_INPUT (peer);
@@ -540,6 +545,8 @@
         {
           case CAPABILITY_CODE_MP:
             {
+	      *mp_capability = 1;
+
               /* Ignore capability when override-capability is set. */
               if (! CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY))
                 {
@@ -712,9 +719,13 @@
   return as4;
 }
 
-/* Parse open option */
+/**
+ * Parse open option.
+ *
+ * @param[out] mp_capability @see bgp_capability_parse() for semantics.
+ */
 int
-bgp_open_option_parse (struct peer *peer, u_char length, int *capability)
+bgp_open_option_parse (struct peer *peer, u_char length, int *mp_capability)
 {
   int ret;
   u_char *error;
@@ -767,8 +778,7 @@
 	  ret = bgp_auth_parse (peer, opt_length);
 	  break;
 	case BGP_OPEN_OPT_CAP:
-	  ret = bgp_capability_parse (peer, opt_length, &error);
-	  *capability = 1;
+	  ret = bgp_capability_parse (peer, opt_length, mp_capability, &error);
 	  break;
 	default:
 	  bgp_notify_send (peer, 
@@ -811,9 +821,10 @@
 	}
     }
 
-  /* Check there is no common capability send Unsupported Capability
+  /* Check there are no common AFI/SAFIs and send Unsupported Capability
      error. */
-  if (*capability && ! CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY))
+  if (*mp_capability &&
+      ! CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY))
     {
       if (! peer->afc_nego[AFI_IP][SAFI_UNICAST] 
 	  && ! peer->afc_nego[AFI_IP][SAFI_MULTICAST]
@@ -821,7 +832,9 @@
 	  && ! peer->afc_nego[AFI_IP6][SAFI_UNICAST]
 	  && ! peer->afc_nego[AFI_IP6][SAFI_MULTICAST])
 	{
-	  plog_err (peer->log, "%s [Error] No common capability", peer->host);
+	  plog_err (peer->log, "%s [Error] Configured AFI/SAFIs do not "
+		    "overlap with received MP capabilities",
+		    peer->host);
 
 	  if (error != error_data)
 
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index 5d8087a..390b556 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -1152,7 +1152,7 @@
   as_t as4 = 0;
   struct peer *realpeer;
   struct in_addr remote_id;
-  int capability;
+  int mp_capability;
   u_int8_t notify_data_remote_as[2];
   u_int8_t notify_data_remote_id[4];
 
@@ -1174,7 +1174,7 @@
 	        inet_ntoa (remote_id));
   
   /* BEGIN to read the capability here, but dont do it yet */
-  capability = 0;
+  mp_capability = 0;
   optlen = stream_getc (peer->ibuf);
   
   if (optlen != 0)
@@ -1459,7 +1459,7 @@
   /* Open option part parse. */
   if (optlen != 0) 
     {
-      if ((ret = bgp_open_option_parse (peer, optlen, &capability)) < 0)
+      if ((ret = bgp_open_option_parse (peer, optlen, &mp_capability)) < 0)
         {
           bgp_notify_send (peer,
                  BGP_NOTIFY_OPEN_ERR,
@@ -1474,8 +1474,13 @@
 		   peer->host);
     }
 
-  /* Override capability. */
-  if (! capability || CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY))
+  /* 
+   * Assume that the peer supports the locally configured set of
+   * AFI/SAFIs if the peer did not send us any Mulitiprotocol
+   * capabilities, or if 'override-capability' is configured.
+   */
+  if (! mp_capability ||
+      CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY))
     {
       peer->afc_nego[AFI_IP][SAFI_UNICAST] = peer->afc[AFI_IP][SAFI_UNICAST];
       peer->afc_nego[AFI_IP][SAFI_MULTICAST] = peer->afc[AFI_IP][SAFI_MULTICAST];
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index ba53032..087f839 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -54,6 +54,7 @@
 #include "bgpd/bgp_advertise.h"
 #include "bgpd/bgp_zebra.h"
 #include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_mpath.h"
 
 /* Extern from bgp_dump.c */
 extern const char *bgp_origin_str[];
@@ -140,6 +141,7 @@
     bgp_attr_unintern (&binfo->attr);
   
   bgp_info_extra_free (&binfo->extra);
+  bgp_info_mpath_free (&binfo->mpath);
 
   peer_unlock (binfo->peer); /* bgp_info peer reference */
 
@@ -210,6 +212,7 @@
   else
     rn->info = ri->next;
   
+  bgp_info_mpath_dequeue (ri);
   bgp_info_unlock (ri);
   bgp_unlock_node (rn);
 }
@@ -316,7 +319,8 @@
 
 /* Compare two bgp route entity.  br is preferable then return 1. */
 static int
-bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist)
+bgp_info_cmp (struct bgp *bgp, struct bgp_info *new, struct bgp_info *exist,
+	      int *paths_eq)
 {
   u_int32_t new_pref;
   u_int32_t exist_pref;
@@ -331,6 +335,9 @@
   int internal_as_route = 0;
   int confed_as_route = 0;
   int ret;
+  uint32_t newm, existm;
+
+  *paths_eq = 0;
 
   /* 0. Null check. */
   if (new == NULL)
@@ -454,18 +461,32 @@
     return 0;
 
   /* 8. IGP metric check. */
-  if (new->extra || exist->extra)
-    {
-      uint32_t newm = (new->extra ? new->extra->igpmetric : 0);
-      uint32_t existm = (exist->extra ? exist->extra->igpmetric : 0);
-      
-      if (newm < existm)
-        return 1;
-      if (newm > existm)
-        return 0;
-    }
+  newm = (new->extra ? new->extra->igpmetric : 0);
+  existm = (exist->extra ? exist->extra->igpmetric : 0);
+  if (newm < existm)
+    ret = 1;
+  if (newm > existm)
+    ret = 0;
 
   /* 9. Maximum path check. */
+  if (newm == existm)
+    {
+      if ((peer_sort (new->peer) == BGP_PEER_IBGP))
+	{
+	  if (aspath_cmp (new->attr->aspath, exist->attr->aspath))
+	    *paths_eq = 1;
+	}
+      else if (new->peer->as == exist->peer->as)
+	*paths_eq = 1;
+    }
+  else
+    {
+      /*
+       * TODO: If unequal cost ibgp multipath is enabled we can
+       * mark the paths as equal here instead of returning
+       */
+      return ret;
+    }
 
   /* 10. If both paths are external, prefer the path that was received
      first (the oldest one).  This step minimizes route-flap, since a
@@ -764,10 +785,12 @@
   struct bgp *bgp;
   int transparent;
   int reflect;
+  struct attr *riattr;
 
   from = ri->peer;
   filter = &peer->filter[afi][safi];
   bgp = peer->bgp;
+  riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr;
   
   if (DISABLE_BGP_ANNOUNCE)
     return 0;
@@ -782,11 +805,11 @@
 
   /* If peer's id and route's nexthop are same. draft-ietf-idr-bgp4-23 5.1.3 */
   if (p->family == AF_INET
-      && IPV4_ADDR_SAME(&peer->remote_id, &ri->attr->nexthop))
+      && IPV4_ADDR_SAME(&peer->remote_id, &riattr->nexthop))
     return 0;
 #ifdef HAVE_IPV6
   if (p->family == AF_INET6
-     && IPV6_ADDR_SAME(&peer->remote_id, &ri->attr->nexthop))
+     && IPV6_ADDR_SAME(&peer->remote_id, &riattr->nexthop))
     return 0;
 #endif
 
@@ -814,14 +837,14 @@
     transparent = 0;
 
   /* If community is not disabled check the no-export and local. */
-  if (! transparent && bgp_community_filter (peer, ri->attr)) 
+  if (! transparent && bgp_community_filter (peer, riattr))
     return 0;
 
   /* If the attribute has originator-id and it is same as remote
      peer's id. */
-  if (ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
+  if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
     {
-      if (IPV4_ADDR_SAME (&peer->remote_id, &ri->attr->extra->originator_id))
+      if (IPV4_ADDR_SAME (&peer->remote_id, &riattr->extra->originator_id))
 	{
 	  if (BGP_DEBUG (filter, FILTER))  
 	    zlog (peer->log, LOG_DEBUG,
@@ -844,7 +867,7 @@
       }
 
   /* Output filter check. */
-  if (bgp_output_filter (peer, p, ri->attr, afi, safi) == FILTER_DENY)
+  if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY)
     {
       if (BGP_DEBUG (filter, FILTER))
 	zlog (peer->log, LOG_DEBUG,
@@ -857,7 +880,7 @@
 
 #ifdef BGP_SEND_ASPATH_CHECK
   /* AS path loop check. */
-  if (aspath_loop_check (ri->attr->aspath, peer->as))
+  if (aspath_loop_check (riattr->aspath, peer->as))
     {
       if (BGP_DEBUG (filter, FILTER))  
         zlog (peer->log, LOG_DEBUG, 
@@ -870,7 +893,7 @@
   /* If we're a CONFED we need to loop check the CONFED ID too */
   if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION))
     {
-      if (aspath_loop_check(ri->attr->aspath, bgp->confed_id))
+      if (aspath_loop_check(riattr->aspath, bgp->confed_id))
 	{
 	  if (BGP_DEBUG (filter, FILTER))  
 	    zlog (peer->log, LOG_DEBUG, 
@@ -911,7 +934,7 @@
     }
   
   /* For modify attribute, copy it to temporary structure. */
-  bgp_attr_dup (attr, ri->attr);
+  bgp_attr_dup (attr, riattr);
   
   /* If local-preference is not set. */
   if ((peer_sort (peer) == BGP_PEER_IBGP 
@@ -1069,9 +1092,11 @@
   struct bgp_filter *filter;
   struct bgp_info info;
   struct peer *from;
+  struct attr *riattr;
 
   from = ri->peer;
   filter = &rsclient->filter[afi][safi];
+  riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr;
 
   if (DISABLE_BGP_ANNOUNCE)
     return 0;
@@ -1099,10 +1124,10 @@
 
   /* If the attribute has originator-id and it is same as remote
      peer's id. */
-  if (ri->attr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
+  if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID))
     {
       if (IPV4_ADDR_SAME (&rsclient->remote_id, 
-                          &ri->attr->extra->originator_id))
+                          &riattr->extra->originator_id))
         {
          if (BGP_DEBUG (filter, FILTER))
            zlog (rsclient->log, LOG_DEBUG,
@@ -1125,7 +1150,7 @@
       }
 
   /* Output filter check. */
-  if (bgp_output_filter (rsclient, p, ri->attr, afi, safi) == FILTER_DENY)
+  if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY)
     {
       if (BGP_DEBUG (filter, FILTER))
        zlog (rsclient->log, LOG_DEBUG,
@@ -1138,7 +1163,7 @@
 
 #ifdef BGP_SEND_ASPATH_CHECK
   /* AS path loop check. */
-  if (aspath_loop_check (ri->attr->aspath, rsclient->as))
+  if (aspath_loop_check (riattr->aspath, rsclient->as))
     {
       if (BGP_DEBUG (filter, FILTER))
         zlog (rsclient->log, LOG_DEBUG,
@@ -1149,7 +1174,7 @@
 #endif /* BGP_SEND_ASPATH_CHECK */
 
   /* For modify attribute, copy it to temporary structure. */
-  bgp_attr_dup (attr, ri->attr);
+  bgp_attr_dup (attr, riattr);
 
   /* next-hop-set */
   if ((p->family == AF_INET && attr->nexthop.s_addr == 0)
@@ -1265,7 +1290,9 @@
 };
 
 static void
-bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info_pair *result)
+bgp_best_selection (struct bgp *bgp, struct bgp_node *rn,
+		    struct bgp_maxpaths_cfg *mpath_cfg,
+		    struct bgp_info_pair *result)
 {
   struct bgp_info *new_select;
   struct bgp_info *old_select;
@@ -1273,7 +1300,13 @@
   struct bgp_info *ri1;
   struct bgp_info *ri2;
   struct bgp_info *nextri = NULL;
-  
+  int paths_eq, do_mpath;
+  struct list mp_list;
+
+  bgp_mp_list_init (&mp_list);
+  do_mpath = (mpath_cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS ||
+	      mpath_cfg->maxpaths_ibgp != BGP_DEFAULT_MAXPATHS);
+
   /* bgp deterministic-med */
   new_select = NULL;
   if (bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
@@ -1285,6 +1318,9 @@
 	  continue;
 
 	new_select = ri1;
+	if (do_mpath)
+	  bgp_mp_list_add (&mp_list, ri1);
+	old_select = CHECK_FLAG (ri1->flags, BGP_INFO_SELECTED) ? ri1 : NULL;
 	if (ri1->next)
 	  for (ri2 = ri1->next; ri2; ri2 = ri2->next)
 	    {
@@ -1297,17 +1333,30 @@
 		  || aspath_cmp_left_confed (ri1->attr->aspath,
 					     ri2->attr->aspath))
 		{
-		  if (bgp_info_cmp (bgp, ri2, new_select))
+		  if (CHECK_FLAG (ri2->flags, BGP_INFO_SELECTED))
+		    old_select = ri2;
+		  if (bgp_info_cmp (bgp, ri2, new_select, &paths_eq))
 		    {
 		      bgp_info_unset_flag (rn, new_select, BGP_INFO_DMED_SELECTED);
 		      new_select = ri2;
+		      if (do_mpath && !paths_eq)
+			{
+			  bgp_mp_list_clear (&mp_list);
+			  bgp_mp_list_add (&mp_list, ri2);
+			}
 		    }
 
+		  if (do_mpath && paths_eq)
+		    bgp_mp_list_add (&mp_list, ri2);
+
 		  bgp_info_set_flag (rn, ri2, BGP_INFO_DMED_CHECK);
 		}
 	    }
 	bgp_info_set_flag (rn, new_select, BGP_INFO_DMED_CHECK);
 	bgp_info_set_flag (rn, new_select, BGP_INFO_DMED_SELECTED);
+
+	bgp_info_mpath_update (rn, new_select, old_select, &mp_list, mpath_cfg);
+	bgp_mp_list_clear (&mp_list);
       }
 
   /* Check old selected route and new selected route. */
@@ -1339,14 +1388,37 @@
       bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_CHECK);
       bgp_info_unset_flag (rn, ri, BGP_INFO_DMED_SELECTED);
 
-      if (bgp_info_cmp (bgp, ri, new_select))
-	new_select = ri;
+      if (bgp_info_cmp (bgp, ri, new_select, &paths_eq))
+	{
+	  if (do_mpath && bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
+	    bgp_mp_dmed_deselect (new_select);
+
+	  new_select = ri;
+
+	  if (do_mpath && !paths_eq)
+	    {
+	      bgp_mp_list_clear (&mp_list);
+	      bgp_mp_list_add (&mp_list, ri);
+	    }
+	}
+      else if (do_mpath && bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
+	bgp_mp_dmed_deselect (ri);
+
+      if (do_mpath && paths_eq)
+	bgp_mp_list_add (&mp_list, ri);
     }
     
-    result->old = old_select;
-    result->new = new_select;
 
-    return;
+  if (!bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED))
+    bgp_info_mpath_update (rn, new_select, old_select, &mp_list, mpath_cfg);
+
+  bgp_info_mpath_aggregate_update (new_select, old_select);
+  bgp_mp_list_clear (&mp_list);
+
+  result->old = old_select;
+  result->new = new_select;
+
+  return;
 }
 
 static int
@@ -1420,7 +1492,7 @@
   struct peer *rsclient = rn->table->owner;
   
   /* Best path selection. */
-  bgp_best_selection (bgp, rn, &old_and_new);
+  bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new);
   new_select = old_and_new.new;
   old_select = old_and_new.old;
 
@@ -1440,7 +1512,8 @@
               {
                 bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED);
                 bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED);
-              }
+		UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG);
+             }
 
             bgp_process_announce_selected (rsclient, new_select, rn,
                                            afi, safi);
@@ -1454,6 +1527,7 @@
 	{
 	  bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED);
 	  bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED);
+	  UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG);
 	}
       bgp_process_announce_selected (rsclient, new_select, rn, afi, safi);
     }
@@ -1481,7 +1555,7 @@
   struct peer *peer;
   
   /* Best path selection. */
-  bgp_best_selection (bgp, rn, &old_and_new);
+  bgp_best_selection (bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new);
   old_select = old_and_new.old;
   new_select = old_and_new.new;
 
@@ -1490,9 +1564,11 @@
     {
       if (! CHECK_FLAG (old_select->flags, BGP_INFO_ATTR_CHANGED))
         {
-          if (CHECK_FLAG (old_select->flags, BGP_INFO_IGP_CHANGED))
+          if (CHECK_FLAG (old_select->flags, BGP_INFO_IGP_CHANGED) ||
+	      CHECK_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG))
             bgp_zebra_announce (p, old_select, bgp, safi);
           
+	  UNSET_FLAG (old_select->flags, BGP_INFO_MULTIPATH_CHG);
           UNSET_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED);
           return WQ_SUCCESS;
         }
@@ -1504,6 +1580,7 @@
     {
       bgp_info_set_flag (rn, new_select, BGP_INFO_SELECTED);
       bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED);
+      UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG);
     }
 
 
@@ -5932,6 +6009,11 @@
       if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ATOMIC_AGGREGATE))
 	vty_out (vty, ", atomic-aggregate");
 	  
+      if (CHECK_FLAG (binfo->flags, BGP_INFO_MULTIPATH) ||
+	  (CHECK_FLAG (binfo->flags, BGP_INFO_SELECTED) &&
+	   bgp_info_mpath_count (binfo)))
+	vty_out (vty, ", multipath");
+
       if (CHECK_FLAG (binfo->flags, BGP_INFO_SELECTED))
 	vty_out (vty, ", best");
 
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index 7adc573..3d2eea5 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -57,6 +57,10 @@
   /* Extra information */
   struct bgp_info_extra *extra;
   
+
+  /* Multipath information */
+  struct bgp_info_mpath *mpath;
+
   /* Uptime.  */
   time_t uptime;
 
@@ -76,6 +80,8 @@
 #define BGP_INFO_STALE          (1 << 8)
 #define BGP_INFO_REMOVED        (1 << 9)
 #define BGP_INFO_COUNTED	(1 << 10)
+#define BGP_INFO_MULTIPATH      (1 << 11)
+#define BGP_INFO_MULTIPATH_CHG  (1 << 12)
 
   /* BGP route type.  This can be static, RIP, OSPF, BGP etc.  */
   u_char type;
diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c
index f65bb15..cbe0b44 100644
--- a/bgpd/bgp_vty.c
+++ b/bgpd/bgp_vty.c
@@ -48,6 +48,7 @@
 #include "bgpd/bgp_zebra.h"
 #include "bgpd/bgp_table.h"
 #include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_mpath.h"
 
 extern struct in_addr router_id_zebra;
 
@@ -650,6 +651,149 @@
   return CMD_SUCCESS;
 }
 
+/* Maximum-paths configuration */
+DEFUN (bgp_maxpaths,
+       bgp_maxpaths_cmd,
+       "maximum-paths <1-255>",
+       "Forward packets over multiple paths\n"
+       "Number of paths\n")
+{
+  struct bgp *bgp;
+  u_int16_t maxpaths;
+  int ret;
+
+  bgp = vty->index;
+
+  VTY_GET_INTEGER_RANGE ("maximum-paths", maxpaths, argv[0], 1, 255);
+
+  ret = bgp_maximum_paths_set (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+			       BGP_PEER_EBGP, maxpaths);
+  if (ret < 0)
+    {
+      vty_out (vty,
+	       "%% Failed to set maximum-paths %u for afi %u, safi %u%s",
+	       maxpaths, bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (bgp_maxpaths_ibgp,
+       bgp_maxpaths_ibgp_cmd,
+       "maximum-paths ibgp <1-255>",
+       "Forward packets over multiple paths\n"
+       "iBGP-multipath\n"
+       "Number of paths\n")
+{
+  struct bgp *bgp;
+  u_int16_t maxpaths;
+  int ret;
+
+  bgp = vty->index;
+
+  VTY_GET_INTEGER_RANGE ("maximum-paths", maxpaths, argv[0], 1, 255);
+
+  ret = bgp_maximum_paths_set (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+			       BGP_PEER_IBGP, maxpaths);
+  if (ret < 0)
+    {
+      vty_out (vty,
+	       "%% Failed to set maximum-paths ibgp %u for afi %u, safi %u%s",
+	       maxpaths, bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  return CMD_SUCCESS;
+}
+
+DEFUN (no_bgp_maxpaths,
+       no_bgp_maxpaths_cmd,
+       "no maximum-paths",
+       NO_STR
+       "Forward packets over multiple paths\n"
+       "Number of paths\n")
+{
+  struct bgp *bgp;
+  int ret;
+
+  bgp = vty->index;
+
+  ret = bgp_maximum_paths_unset (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+				 BGP_PEER_EBGP);
+  if (ret < 0)
+    {
+      vty_out (vty,
+	       "%% Failed to unset maximum-paths for afi %u, safi %u%s",
+	       bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  return CMD_SUCCESS;
+}
+
+ALIAS (no_bgp_maxpaths,
+       no_bgp_maxpaths_arg_cmd,
+       "no maximum-paths <1-255>",
+       NO_STR
+       "Forward packets over multiple paths\n"
+       "Number of paths\n")
+
+DEFUN (no_bgp_maxpaths_ibgp,
+       no_bgp_maxpaths_ibgp_cmd,
+       "no maximum-paths ibgp",
+       NO_STR
+       "Forward packets over multiple paths\n"
+       "iBGP-multipath\n"
+       "Number of paths\n")
+{
+  struct bgp *bgp;
+  int ret;
+
+  bgp = vty->index;
+
+  ret = bgp_maximum_paths_unset (bgp, bgp_node_afi (vty), bgp_node_safi(vty),
+				 BGP_PEER_IBGP);
+  if (ret < 0)
+    {
+      vty_out (vty,
+	       "%% Failed to unset maximum-paths ibgp for afi %u, safi %u%s",
+	       bgp_node_afi (vty), bgp_node_safi(vty), VTY_NEWLINE);
+      return CMD_WARNING;
+    }
+
+  return CMD_SUCCESS;
+}
+
+ALIAS (no_bgp_maxpaths_ibgp,
+       no_bgp_maxpaths_ibgp_arg_cmd,
+       "no maximum-paths ibgp <1-255>",
+       NO_STR
+       "Forward packets over multiple paths\n"
+       "iBGP-multipath\n"
+       "Number of paths\n")
+
+int
+bgp_config_write_maxpaths (struct vty *vty, struct bgp *bgp, afi_t afi,
+			   safi_t safi, int *write)
+{
+  if (bgp->maxpaths[afi][safi].maxpaths_ebgp != BGP_DEFAULT_MAXPATHS)
+    {
+      bgp_config_write_family_header (vty, afi, safi, write);
+      vty_out (vty, " maximum-paths %d%s",
+	       bgp->maxpaths[afi][safi].maxpaths_ebgp, VTY_NEWLINE);
+    }
+
+  if (bgp->maxpaths[afi][safi].maxpaths_ibgp != BGP_DEFAULT_MAXPATHS)
+    {
+      bgp_config_write_family_header (vty, afi, safi, write);
+      vty_out (vty, " maximum-paths ibgp %d%s",
+	       bgp->maxpaths[afi][safi].maxpaths_ibgp, VTY_NEWLINE);
+    }
+
+  return 0;
+}
+
 /* BGP timers.  */
 
 DEFUN (bgp_timers,
@@ -8937,6 +9081,20 @@
   install_element (BGP_NODE, &bgp_confederation_peers_cmd);
   install_element (BGP_NODE, &no_bgp_confederation_peers_cmd);
 
+  /* "maximum-paths" commands. */
+  install_element (BGP_NODE, &bgp_maxpaths_cmd);
+  install_element (BGP_NODE, &no_bgp_maxpaths_cmd);
+  install_element (BGP_NODE, &no_bgp_maxpaths_arg_cmd);
+  install_element (BGP_IPV4_NODE, &bgp_maxpaths_cmd);
+  install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_cmd);
+  install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_arg_cmd);
+  install_element (BGP_NODE, &bgp_maxpaths_ibgp_cmd);
+  install_element (BGP_NODE, &no_bgp_maxpaths_ibgp_cmd);
+  install_element (BGP_NODE, &no_bgp_maxpaths_ibgp_arg_cmd);
+  install_element (BGP_IPV4_NODE, &bgp_maxpaths_ibgp_cmd);
+  install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_ibgp_cmd);
+  install_element (BGP_IPV4_NODE, &no_bgp_maxpaths_ibgp_arg_cmd);
+
   /* "timers bgp" commands. */
   install_element (BGP_NODE, &bgp_timers_cmd);
   install_element (BGP_NODE, &no_bgp_timers_cmd);
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index 20feba0..5c0dbb8 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -37,11 +37,15 @@
 #include "bgpd/bgp_zebra.h"
 #include "bgpd/bgp_fsm.h"
 #include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_mpath.h"
 
 /* All information about zebra. */
 struct zclient *zclient = NULL;
 struct in_addr router_id_zebra;
 
+/* Growable buffer for nexthops sent to zebra */
+struct stream *bgp_nexthop_buf = NULL;
+
 /* Router-id update message from zebra. */
 static int
 bgp_router_id_update (int command, struct zclient *zclient, zebra_size_t length)
@@ -648,6 +652,8 @@
   int flags;
   u_char distance;
   struct peer *peer;
+  struct bgp_info *mpinfo;
+  size_t oldsize, newsize;
 
   if (zclient->sock < 0)
     return;
@@ -668,6 +674,21 @@
       || CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK))
     SET_FLAG (flags, ZEBRA_FLAG_INTERNAL);
 
+  /* resize nexthop buffer size if necessary */
+  if ((oldsize = stream_get_size (bgp_nexthop_buf)) <
+      (sizeof (struct in_addr *) * (bgp_info_mpath_count (info) + 1)))
+    {
+      newsize = (sizeof (struct in_addr *) * (bgp_info_mpath_count (info) + 1));
+      newsize = stream_resize (bgp_nexthop_buf, newsize);
+      if (newsize == oldsize)
+	{
+	  zlog_err ("can't resize nexthop buffer");
+	  return;
+	}
+    }
+
+  stream_reset (bgp_nexthop_buf);
+
   if (p->family == AF_INET)
     {
       struct zapi_ipv4 api;
@@ -675,13 +696,20 @@
 
       api.flags = flags;
       nexthop = &info->attr->nexthop;
+      stream_put (bgp_nexthop_buf, &nexthop, sizeof (struct in_addr *));
+      for (mpinfo = bgp_info_mpath_first (info); mpinfo;
+	   mpinfo = bgp_info_mpath_next (mpinfo))
+	{
+	  nexthop = &mpinfo->attr->nexthop;
+	  stream_put (bgp_nexthop_buf, &nexthop, sizeof (struct in_addr *));
+	}
 
       api.type = ZEBRA_ROUTE_BGP;
       api.message = 0;
       api.safi = safi;
       SET_FLAG (api.message, ZAPI_MESSAGE_NEXTHOP);
-      api.nexthop_num = 1;
-      api.nexthop = &nexthop;
+      api.nexthop_num = 1 + bgp_info_mpath_count (info);
+      api.nexthop = (struct in_addr **)STREAM_DATA (bgp_nexthop_buf);
       api.ifindex_num = 0;
       SET_FLAG (api.message, ZAPI_MESSAGE_METRIC);
       api.metric = info->attr->med;
@@ -696,12 +724,18 @@
 
       if (BGP_DEBUG(zebra, ZEBRA))
 	{
+	  int i;
 	  char buf[2][INET_ADDRSTRLEN];
-	  zlog_debug("Zebra send: IPv4 route add %s/%d nexthop %s metric %u",
+	  zlog_debug("Zebra send: IPv4 route add %s/%d nexthop %s metric %u"
+		     " count %d",
 		     inet_ntop(AF_INET, &p->u.prefix4, buf[0], sizeof(buf[0])),
 		     p->prefixlen,
-		     inet_ntop(AF_INET, nexthop, buf[1], sizeof(buf[1])),
-		     api.metric);
+		     inet_ntop(AF_INET, api.nexthop[0], buf[1], sizeof(buf[1])),
+		     api.metric, api.nexthop_num);
+	  for (i = 1; i < api.nexthop_num; i++)
+	    zlog_debug("Zebra send: IPv4 route add [nexthop %d] %s",
+		       i, inet_ntop(AF_INET, api.nexthop[i], buf[1],
+				    sizeof(buf[1])));
 	}
 
       zapi_ipv4_route (ZEBRA_IPV4_ROUTE_ADD, zclient, 
@@ -1050,4 +1084,6 @@
 
   /* Interface related init. */
   if_init ();
+
+  bgp_nexthop_buf = stream_new(BGP_NEXTHOP_BUF_SIZE);
 }
diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h
index 1351333..8099193 100644
--- a/bgpd/bgp_zebra.h
+++ b/bgpd/bgp_zebra.h
@@ -21,8 +21,14 @@
 #ifndef _QUAGGA_BGP_ZEBRA_H
 #define _QUAGGA_BGP_ZEBRA_H
 
+#define BGP_NEXTHOP_BUF_SIZE (8 * sizeof (struct in_addr *))
+
+extern struct stream *bgp_nexthop_buf;
+
 extern void bgp_zebra_init (void);
 extern int bgp_if_update_all (void);
+extern int bgp_config_write_maxpaths (struct vty *, struct bgp *, afi_t,
+				      safi_t, int *);
 extern int bgp_config_write_redistribute (struct vty *, struct bgp *, afi_t, safi_t,
 				   int *);
 extern void bgp_zebra_announce (struct prefix *, struct bgp_info *, struct bgp *, safi_t);
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 5dc014b..9c8eda8 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -57,6 +57,7 @@
 #include "bgpd/bgp_advertise.h"
 #include "bgpd/bgp_network.h"
 #include "bgpd/bgp_vty.h"
+#include "bgpd/bgp_mpath.h"
 #ifdef HAVE_SNMP
 #include "bgpd/bgp_snmp.h"
 #endif /* HAVE_SNMP */
@@ -1947,6 +1948,8 @@
 	bgp->route[afi][safi] = bgp_table_init (afi, safi);
 	bgp->aggregate[afi][safi] = bgp_table_init (afi, safi);
 	bgp->rib[afi][safi] = bgp_table_init (afi, safi);
+	bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
+	bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
       }
 
   bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF;
@@ -5121,6 +5124,9 @@
 	    }
 	}
     }
+
+  bgp_config_write_maxpaths (vty, bgp, afi, safi, &write);
+
   if (write)
     vty_out (vty, " exit-address-family%s", VTY_NEWLINE);
 
@@ -5294,6 +5300,9 @@
 	    bgp_config_write_peer (vty, bgp, peer, AFI_IP, SAFI_UNICAST);
 	}
 
+      /* maximum-paths */
+      bgp_config_write_maxpaths (vty, bgp, AFI_IP, SAFI_UNICAST, &write);
+
       /* Distance configuration. */
       bgp_config_write_distance (vty, bgp);
       
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 892e7de..09a3435 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -162,6 +162,12 @@
   /* BGP graceful restart */
   u_int32_t restart_time;
   u_int32_t stalepath_time;
+
+  /* Maximum-paths configuration */
+  struct bgp_maxpaths_cfg {
+    u_int16_t maxpaths_ebgp;
+    u_int16_t maxpaths_ibgp;
+  } maxpaths[AFI_MAX][SAFI_MAX];
 };
 
 /* BGP peer-group support. */
diff --git a/lib/memtypes.c b/lib/memtypes.c
index 2ded1d6..acbd16b 100644
--- a/lib/memtypes.c
+++ b/lib/memtypes.c
@@ -113,6 +113,7 @@
   { MTYPE_BGP_SYNCHRONISE,	"BGP synchronise"		},
   { MTYPE_BGP_ADJ_IN,		"BGP adj in"			},
   { MTYPE_BGP_ADJ_OUT,		"BGP adj out"			},
+  { MTYPE_BGP_MPATH_INFO,	"BGP multipath info"		},
   { 0, NULL },
   { MTYPE_AS_LIST,		"BGP AS list"			},
   { MTYPE_AS_FILTER,		"BGP AS filter"			},
diff --git a/ospfd/ospf_lsa.h b/ospfd/ospf_lsa.h
index 297cd98..6c95ff1 100644
--- a/ospfd/ospf_lsa.h
+++ b/ospfd/ospf_lsa.h
@@ -153,7 +153,14 @@
 };
 
 /* OSPF Router-LSAs structure. */
-#define OSPF_ROUTER_LSA_MIN_SIZE                  16U /* w/1 link descriptor */
+#define OSPF_ROUTER_LSA_MIN_SIZE                   4U /* w/0 link descriptors */
+/* There is an edge case, when number of links in a Router-LSA may be 0 without
+   breaking the specification. A router, which has no other links to backbone
+   area besides one virtual link, will not put any VL descriptor blocks into
+   the Router-LSA generated for area 0 until a full adjacency over the VL is
+   reached (RFC2328 12.4.1.3). In this case the Router-LSA initially received
+   by the other end of the VL will have 0 link descriptor blocks, but soon will
+   be replaced with the next revision having 1 descriptor block. */
 struct router_lsa
 {
   struct lsa_header header;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4ab507b..2e98ff7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -6,7 +6,7 @@
 
 noinst_PROGRAMS = testsig testbuffer testmemory heavy heavywq heavythread \
 		aspathtest testprivs teststream testbgpcap ecommtest \
-		testbgpmpattr testchecksum
+		testbgpmpattr testchecksum testbgpmpath
 
 testsig_SOURCES = test-sig.c
 testbuffer_SOURCES = test-buffer.c
@@ -21,6 +21,7 @@
 ecommtest_SOURCES = ecommunity_test.c
 testbgpmpattr_SOURCES =  bgp_mp_attr_test.c
 testchecksum_SOURCES = test-checksum.c
+testbgpmpath_SOURCES = bgp_mpath_test.c
 
 testsig_LDADD = ../lib/libzebra.la @LIBCAP@
 testbuffer_LDADD = ../lib/libzebra.la @LIBCAP@
@@ -35,3 +36,4 @@
 ecommtest_LDADD = ../lib/libzebra.la @LIBCAP@ -lm ../bgpd/libbgp.a
 testbgpmpattr_LDADD = ../lib/libzebra.la @LIBCAP@ -lm ../bgpd/libbgp.a
 testchecksum_LDADD = ../lib/libzebra.la @LIBCAP@ 
+testbgpmpath_LDADD = ../lib/libzebra.la @LIBCAP@ -lm ../bgpd/libbgp.a
diff --git a/tests/bgp_mpath_test.c b/tests/bgp_mpath_test.c
new file mode 100644
index 0000000..15e450a
--- /dev/null
+++ b/tests/bgp_mpath_test.c
@@ -0,0 +1,479 @@
+/* $QuaggaId: Format:%an, %ai, %h$ $
+ *
+ * BGP Multipath Unit Test
+ * Copyright (C) 2010 Google Inc.
+ *
+ * This file is part of Quagga
+ *
+ * Quagga is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * Quagga is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Quagga; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "vty.h"
+#include "stream.h"
+#include "privs.h"
+#include "linklist.h"
+#include "memory.h"
+#include "zclient.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_mpath.h"
+
+#define VT100_RESET "\x1b[0m"
+#define VT100_RED "\x1b[31m"
+#define VT100_GREEN "\x1b[32m"
+#define VT100_YELLOW "\x1b[33m"
+#define OK VT100_GREEN "OK" VT100_RESET
+#define FAILED VT100_RED "failed" VT100_RESET
+
+#define TEST_PASSED 0
+#define TEST_FAILED -1
+
+#define EXPECT_TRUE(expr, res)                                          \
+  if (!(expr))                                                          \
+    {                                                                   \
+      printf ("Test failure in %s line %u: %s\n",                       \
+              __FUNCTION__, __LINE__, #expr);                           \
+      (res) = TEST_FAILED;                                              \
+    }
+
+typedef struct testcase_t__ testcase_t;
+
+typedef int (*test_setup_func)(testcase_t *);
+typedef int (*test_run_func)(testcase_t *);
+typedef int (*test_cleanup_func)(testcase_t *);
+
+struct testcase_t__ {
+  const char *desc;
+  void *test_data;
+  void *verify_data;
+  void *tmp_data;
+  test_setup_func setup;
+  test_run_func run;
+  test_cleanup_func cleanup;
+};
+
+/* need these to link in libbgp */
+struct thread_master *master = NULL;
+struct zclient *zclient;
+struct zebra_privs_t bgpd_privs =
+{
+  .user = NULL,
+  .group = NULL,
+  .vty_group = NULL,
+};
+
+static int tty = 0;
+
+/* Create fake bgp instance */
+static struct bgp *
+bgp_create_fake (as_t *as, const char *name)
+{
+  struct bgp *bgp;
+  afi_t afi;
+  safi_t safi;
+
+  if ( (bgp = XCALLOC (MTYPE_BGP, sizeof (struct bgp))) == NULL)
+    return NULL;
+
+  bgp_lock (bgp);
+  //bgp->peer_self = peer_new (bgp);
+  //bgp->peer_self->host = XSTRDUP (MTYPE_BGP_PEER_HOST, "Static announcement");
+
+  bgp->peer = list_new ();
+  //bgp->peer->cmp = (int (*)(void *, void *)) peer_cmp;
+
+  bgp->group = list_new ();
+  //bgp->group->cmp = (int (*)(void *, void *)) peer_group_cmp;
+
+  bgp->rsclient = list_new ();
+  //bgp->rsclient->cmp = (int (*)(void*, void*)) peer_cmp;
+
+  for (afi = AFI_IP; afi < AFI_MAX; afi++)
+    for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
+      {
+        bgp->route[afi][safi] = bgp_table_init (afi, safi);
+        bgp->aggregate[afi][safi] = bgp_table_init (afi, safi);
+        bgp->rib[afi][safi] = bgp_table_init (afi, safi);
+        bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
+        bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
+      }
+
+  bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF;
+  bgp->default_holdtime = BGP_DEFAULT_HOLDTIME;
+  bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE;
+  bgp->restart_time = BGP_DEFAULT_RESTART_TIME;
+  bgp->stalepath_time = BGP_DEFAULT_STALEPATH_TIME;
+
+  bgp->as = *as;
+
+  if (name)
+    bgp->name = strdup (name);
+
+  return bgp;
+}
+
+/*=========================================================
+ * Testcase for maximum-paths configuration
+ */
+static int
+setup_bgp_cfg_maximum_paths (testcase_t *t)
+{
+  as_t asn = 1;
+  t->tmp_data = bgp_create_fake (&asn, NULL);
+  if (!t->tmp_data)
+    return -1;
+  return 0;
+}
+
+static int
+run_bgp_cfg_maximum_paths (testcase_t *t)
+{
+  afi_t afi;
+  safi_t safi;
+  struct bgp *bgp;
+  int api_result;
+  int test_result = TEST_PASSED;
+
+  bgp = t->tmp_data;
+  for (afi = AFI_IP; afi < AFI_MAX; afi++)
+    for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
+      {
+        /* test bgp_maximum_paths_set */
+        api_result = bgp_maximum_paths_set (bgp, afi, safi, BGP_PEER_EBGP, 10);
+        EXPECT_TRUE (api_result == 0, test_result);
+        api_result = bgp_maximum_paths_set (bgp, afi, safi, BGP_PEER_IBGP, 10);
+        EXPECT_TRUE (api_result == 0, test_result);
+        EXPECT_TRUE (bgp->maxpaths[afi][safi].maxpaths_ebgp == 10, test_result);
+        EXPECT_TRUE (bgp->maxpaths[afi][safi].maxpaths_ibgp == 10, test_result);
+
+        /* test bgp_maximum_paths_unset */
+        api_result = bgp_maximum_paths_unset (bgp, afi, safi, BGP_PEER_EBGP);
+        EXPECT_TRUE (api_result == 0, test_result);
+        api_result = bgp_maximum_paths_unset (bgp, afi, safi, BGP_PEER_IBGP);
+        EXPECT_TRUE (api_result == 0, test_result);
+        EXPECT_TRUE ((bgp->maxpaths[afi][safi].maxpaths_ebgp ==
+                      BGP_DEFAULT_MAXPATHS), test_result);
+        EXPECT_TRUE ((bgp->maxpaths[afi][safi].maxpaths_ibgp ==
+                      BGP_DEFAULT_MAXPATHS), test_result);
+      }
+
+  return test_result;
+}
+
+static int
+cleanup_bgp_cfg_maximum_paths (testcase_t *t)
+{
+  return bgp_delete ((struct bgp *)t->tmp_data);
+}
+
+testcase_t test_bgp_cfg_maximum_paths = {
+  .desc = "Test bgp maximum-paths config",
+  .setup = setup_bgp_cfg_maximum_paths,
+  .run = run_bgp_cfg_maximum_paths,
+  .cleanup = cleanup_bgp_cfg_maximum_paths,
+};
+
+/*=========================================================
+ * Testcase for bgp_mp_list
+ */
+struct peer test_mp_list_peer[] = {
+  { .local_as = 1, .as = 2 },
+  { .local_as = 1, .as = 2 },
+  { .local_as = 1, .as = 2 },
+  { .local_as = 1, .as = 2 },
+  { .local_as = 1, .as = 2 },
+};
+int test_mp_list_peer_count = sizeof (test_mp_list_peer)/ sizeof (struct peer);
+struct attr test_mp_list_attr[4];
+struct bgp_info test_mp_list_info[] = {
+  { .peer = &test_mp_list_peer[0], .attr = &test_mp_list_attr[0] },
+  { .peer = &test_mp_list_peer[1], .attr = &test_mp_list_attr[1] },
+  { .peer = &test_mp_list_peer[2], .attr = &test_mp_list_attr[1] },
+  { .peer = &test_mp_list_peer[3], .attr = &test_mp_list_attr[2] },
+  { .peer = &test_mp_list_peer[4], .attr = &test_mp_list_attr[3] },
+};
+int test_mp_list_info_count =
+  sizeof (test_mp_list_info)/sizeof (struct bgp_info);
+
+static int
+setup_bgp_mp_list (testcase_t *t)
+{
+  test_mp_list_attr[0].nexthop.s_addr = 0x01010101;
+  test_mp_list_attr[1].nexthop.s_addr = 0x02020202;
+  test_mp_list_attr[2].nexthop.s_addr = 0x03030303;
+  test_mp_list_attr[3].nexthop.s_addr = 0x04040404;
+
+  if ((test_mp_list_peer[0].su_remote = sockunion_str2su ("1.1.1.1")) == NULL)
+    return -1;
+  if ((test_mp_list_peer[1].su_remote = sockunion_str2su ("2.2.2.2")) == NULL)
+    return -1;
+  if ((test_mp_list_peer[2].su_remote = sockunion_str2su ("3.3.3.3")) == NULL)
+    return -1;
+  if ((test_mp_list_peer[3].su_remote = sockunion_str2su ("4.4.4.4")) == NULL)
+    return -1;
+  if ((test_mp_list_peer[4].su_remote = sockunion_str2su ("5.5.5.5")) == NULL)
+    return -1;
+
+  return 0;
+}
+
+static int
+run_bgp_mp_list (testcase_t *t)
+{
+  struct list mp_list;
+  struct listnode *mp_node;
+  struct bgp_info *info;
+  int i;
+  int test_result = TEST_PASSED;
+  bgp_mp_list_init (&mp_list);
+  EXPECT_TRUE (listcount(&mp_list) == 0, test_result);
+
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[1]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[4]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[2]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[3]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[0]);
+
+  for (i = 0, mp_node = listhead(&mp_list); i < test_mp_list_info_count;
+       i++, mp_node = listnextnode(mp_node))
+    {
+      info = listgetdata(mp_node);
+      EXPECT_TRUE (info == &test_mp_list_info[i], test_result);
+    }
+
+  bgp_mp_list_clear (&mp_list);
+  EXPECT_TRUE (listcount(&mp_list) == 0, test_result);
+
+  return test_result;
+}
+
+static int
+cleanup_bgp_mp_list (testcase_t *t)
+{
+  int i;
+
+  for (i = 0; i < test_mp_list_peer_count; i++)
+    sockunion_free (test_mp_list_peer[i].su_remote);
+
+  return 0;
+}
+
+testcase_t test_bgp_mp_list = {
+  .desc = "Test bgp_mp_list",
+  .setup = setup_bgp_mp_list,
+  .run = run_bgp_mp_list,
+  .cleanup = cleanup_bgp_mp_list,
+};
+
+/*=========================================================
+ * Testcase for bgp_info_mpath_update
+ */
+
+struct bgp_node test_rn;
+
+static int
+setup_bgp_info_mpath_update (testcase_t *t)
+{
+  int i;
+  str2prefix ("42.1.1.0/24", &test_rn.p);
+  setup_bgp_mp_list (t);
+  for (i = 0; i < test_mp_list_info_count; i++)
+    bgp_info_add (&test_rn, &test_mp_list_info[i]);
+  return 0;
+}
+
+static int
+run_bgp_info_mpath_update (testcase_t *t)
+{
+  struct bgp_info *new_best, *old_best, *mpath;
+  struct list mp_list;
+  struct bgp_maxpaths_cfg mp_cfg = { 3, 3 };
+  int test_result = TEST_PASSED;
+  bgp_mp_list_init (&mp_list);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[4]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[3]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[0]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[1]);
+  new_best = &test_mp_list_info[3];
+  old_best = NULL;
+  bgp_info_mpath_update (&test_rn, new_best, old_best, &mp_list, &mp_cfg);
+  bgp_mp_list_clear (&mp_list);
+  EXPECT_TRUE (bgp_info_mpath_count (new_best) == 2, test_result);
+  mpath = bgp_info_mpath_first (new_best);
+  EXPECT_TRUE (mpath == &test_mp_list_info[0], test_result);
+  EXPECT_TRUE (CHECK_FLAG (mpath->flags, BGP_INFO_MULTIPATH), test_result);
+  mpath = bgp_info_mpath_next (mpath);
+  EXPECT_TRUE (mpath == &test_mp_list_info[1], test_result);
+  EXPECT_TRUE (CHECK_FLAG (mpath->flags, BGP_INFO_MULTIPATH), test_result);
+
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[0]);
+  bgp_mp_list_add (&mp_list, &test_mp_list_info[1]);
+  new_best = &test_mp_list_info[0];
+  old_best = &test_mp_list_info[3];
+  bgp_info_mpath_update (&test_rn, new_best, old_best, &mp_list, &mp_cfg);
+  bgp_mp_list_clear (&mp_list);
+  EXPECT_TRUE (bgp_info_mpath_count (new_best) == 1, test_result);
+  mpath = bgp_info_mpath_first (new_best);
+  EXPECT_TRUE (mpath == &test_mp_list_info[1], test_result);
+  EXPECT_TRUE (CHECK_FLAG (mpath->flags, BGP_INFO_MULTIPATH), test_result);
+  EXPECT_TRUE (!CHECK_FLAG (test_mp_list_info[0].flags, BGP_INFO_MULTIPATH),
+               test_result);
+
+  return test_result;
+}
+
+static int
+cleanup_bgp_info_mpath_update (testcase_t *t)
+{
+  int i;
+
+  for (i = 0; i < test_mp_list_peer_count; i++)
+    sockunion_free (test_mp_list_peer[i].su_remote);
+
+  return 0;
+}
+
+testcase_t test_bgp_info_mpath_update = {
+  .desc = "Test bgp_info_mpath_update",
+  .setup = setup_bgp_info_mpath_update,
+  .run = run_bgp_info_mpath_update,
+  .cleanup = cleanup_bgp_info_mpath_update,
+};
+
+/*=========================================================
+ * Set up testcase vector
+ */
+testcase_t *all_tests[] = {
+  &test_bgp_cfg_maximum_paths,
+  &test_bgp_mp_list,
+  &test_bgp_info_mpath_update,
+};
+
+int all_tests_count = (sizeof(all_tests)/sizeof(testcase_t *));
+
+/*=========================================================
+ * Test Driver Functions
+ */
+static int
+global_test_init (void)
+{
+  master = thread_master_create ();
+  zclient = zclient_new ();
+  bgp_master_init ();
+
+  if (fileno (stdout) >= 0)
+    tty = isatty (fileno (stdout));
+  return 0;
+}
+
+static int
+global_test_cleanup (void)
+{
+  zclient_free (zclient);
+  thread_master_free (master);
+  return 0;
+}
+
+static void
+display_result (testcase_t *test, int result)
+{
+  if (tty)
+    printf ("%s: %s\n", test->desc, result == TEST_PASSED ? OK : FAILED);
+  else
+    printf ("%s: %s\n", test->desc, result == TEST_PASSED ? "OK" : "FAILED");
+}
+
+static int
+setup_test (testcase_t *t)
+{
+  int res = 0;
+  if (t->setup)
+    res = t->setup (t);
+  return res;
+}
+
+static int
+cleanup_test (testcase_t *t)
+{
+  int res = 0;
+  if (t->cleanup)
+    res = t->cleanup (t);
+  return res;
+}
+
+static void
+run_tests (testcase_t *tests[], int num_tests, int *pass_count, int *fail_count)
+{
+  int test_index, result;
+  testcase_t *cur_test;
+
+  *pass_count = *fail_count = 0;
+
+  for (test_index = 0; test_index < num_tests; test_index++)
+    {
+      cur_test = tests[test_index];
+      if (!cur_test->desc)
+        {
+          printf ("error: test %d has no description!\n", test_index);
+          continue;
+        }
+      if (!cur_test->run)
+        {
+          printf ("error: test %s has no run function!\n", cur_test->desc);
+          continue;
+        }
+      if (setup_test (cur_test) != 0)
+        {
+          printf ("error: setup failed for test %s\n", cur_test->desc);
+          continue;
+        }
+      result = cur_test->run (cur_test);
+      if (result == TEST_PASSED)
+        *pass_count += 1;
+      else
+        *fail_count += 1;
+      display_result (cur_test, result);
+      if (cleanup_test (cur_test) != 0)
+        {
+          printf ("error: cleanup failed for test %s\n", cur_test->desc);
+          continue;
+        }
+    }
+}
+
+int
+main (void)
+{
+  int pass_count, fail_count;
+  time_t cur_time;
+
+  time (&cur_time);
+  printf("BGP Multipath Tests Run at %s", ctime(&cur_time));
+  if (global_test_init () != 0)
+    {
+      printf("Global init failed. Terminating.\n");
+      exit(1);
+    }
+  run_tests (all_tests, all_tests_count, &pass_count, &fail_count);
+  global_test_cleanup ();
+  printf("Total pass/fail: %d/%d\n", pass_count, fail_count);
+  return fail_count;
+}