bgpd: efficient NLRI packing for AFs != ipv4-unicast

ISSUE:

  Currently, for non-ipv4-unicast address families where prefixes are
  encoded in MP_REACH/MP_UNREACH attributes, BGP ends up sending one
  prefix per UPDATE message. This is quite inefficient. The patch
  addresses the issue.

PATCH:

  We introduce a scratch buffer in the peer structure that stores the
  MP_REACH/MP_UNREACH attributes for non-ipv4-unicast families. This
  enables us to encode multiple prefixes. In the end, the two buffers
  are merged to create the UPDATE packet.

Signed-off-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com>
Reviewed-by: Daniel Walton <dwalton@cumulusnetworks.com>
[DL: removed no longer existing bgp_packet_withdraw prototype]
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c
index a0dfc65..f284758 100644
--- a/bgpd/bgp_attr.c
+++ b/bgpd/bgp_attr.c
@@ -2058,12 +2058,106 @@
 
 int stream_put_prefix (struct stream *, struct prefix *);
 
+size_t
+bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi,
+			 struct attr *attr)
+{
+  size_t sizep;
+
+  /* Set extended bit always to encode the attribute length as 2 bytes */
+  stream_putc (s, BGP_ATTR_FLAG_OPTIONAL|BGP_ATTR_FLAG_EXTLEN);
+  stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
+  sizep = stream_get_endp (s);
+  stream_putw (s, 0);	/* Marker: Attribute length. */
+  stream_putw (s, afi);	/* AFI */
+  stream_putc (s, safi);	/* SAFI */
+
+  /* Nexthop */
+  switch (afi)
+    {
+    case AFI_IP:
+      switch (safi)
+	{
+	case SAFI_UNICAST:
+	case SAFI_MULTICAST:
+	  stream_putc (s, 4);
+	  stream_put_ipv4 (s, attr->nexthop.s_addr);
+	  break;
+	case SAFI_MPLS_VPN:
+	  stream_putc (s, 12);
+	  stream_putl (s, 0);
+	  stream_putl (s, 0);
+	  stream_put (s, &attr->extra->mp_nexthop_global_in, 4);
+	  break;
+	default:
+	  break;
+	}
+      break;
+#ifdef HAVE_IPV6
+    case AFI_IP6:
+      switch (safi)
+      {
+      case SAFI_UNICAST:
+      case SAFI_MULTICAST:
+	{
+	  unsigned long sizep;
+	  struct attr_extra *attre = attr->extra;
+
+	  assert (attr->extra);
+	  stream_putc (s, attre->mp_nexthop_len);
+	  stream_put (s, &attre->mp_nexthop_global, 16);
+	  if (attre->mp_nexthop_len == 32)
+	    stream_put (s, &attre->mp_nexthop_local, 16);
+	}
+      default:
+	break;
+      }
+      break;
+#endif /*HAVE_IPV6*/
+    default:
+      break;
+    }
+
+  /* SNPA */
+  stream_putc (s, 0);
+  return sizep;
+}
+
+void
+bgp_packet_mpattr_prefix (struct stream *s, afi_t afi, safi_t safi,
+			  struct prefix *p, struct prefix_rd *prd,
+			  u_char *tag)
+{
+  switch (safi)
+    {
+    case SAFI_MPLS_VPN:
+      /* Tag, RD, Prefix write. */
+      stream_putc (s, p->prefixlen + 88);
+      stream_put (s, tag, 3);
+      stream_put (s, prd->val, 8);
+      stream_put (s, &p->u.prefix, PSIZE (p->prefixlen));
+      break;
+    default:
+      /* Prefix write. */
+      stream_put_prefix (s, p);
+      break;
+    }
+}
+
+void
+bgp_packet_mpattr_end (struct stream *s, size_t sizep)
+{
+  /* Set MP attribute length. Don't count the (2) bytes used to encode
+     the attr length */
+  stream_putw_at (s, sizep, (stream_get_endp (s) - sizep) - 2);
+}
+
 /* Make attribute packet. */
 bgp_size_t
 bgp_packet_attribute (struct bgp *bgp, struct peer *peer,
-		      struct stream *s, struct attr *attr, struct prefix *p,
-		      afi_t afi, safi_t safi, struct peer *from,
-		      struct prefix_rd *prd, u_char *tag)
+		      struct stream *s, struct attr *attr,
+		      struct prefix *p, afi_t afi, safi_t safi,
+		      struct peer *from, struct prefix_rd *prd, u_char *tag)
 {
   size_t cp;
   size_t aspath_sizep;
@@ -2071,6 +2165,7 @@
   int send_as4_path = 0;
   int send_as4_aggregator = 0;
   int use32bit = (CHECK_FLAG (peer->cap, PEER_CAP_AS4_RCV)) ? 1 : 0;
+  size_t mpattrlen_pos = 0;
 
   if (! bgp)
     bgp = bgp_get_default ();
@@ -2078,6 +2173,13 @@
   /* Remember current pointer. */
   cp = stream_get_endp (s);
 
+  if (p && !(afi == AFI_IP && safi == SAFI_UNICAST))
+    {
+      mpattrlen_pos = bgp_packet_mpattr_start(s, afi, safi, attr);
+      bgp_packet_mpattr_prefix(s, afi, safi, p, prd, tag);
+      bgp_packet_mpattr_end(s, mpattrlen_pos);
+    }
+
   /* Origin attribute. */
   stream_putc (s, BGP_ATTR_FLAG_TRANS);
   stream_putc (s, BGP_ATTR_ORIGIN);
@@ -2286,96 +2388,6 @@
 	}
     }
 
-#ifdef HAVE_IPV6
-  /* If p is IPv6 address put it into attribute. */
-  if (p->family == AF_INET6)
-    {
-      unsigned long sizep;
-      struct attr_extra *attre = attr->extra;
-      
-      assert (attr->extra);
-      
-      stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
-      stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
-      sizep = stream_get_endp (s);
-      stream_putc (s, 0);	/* Marker: Attribute length. */
-      stream_putw (s, AFI_IP6);	/* AFI */
-      stream_putc (s, safi);	/* SAFI */
-
-      stream_putc (s, attre->mp_nexthop_len);
-
-      if (attre->mp_nexthop_len == 16)
-	stream_put (s, &attre->mp_nexthop_global, 16);
-      else if (attre->mp_nexthop_len == 32)
-	{
-	  stream_put (s, &attre->mp_nexthop_global, 16);
-	  stream_put (s, &attre->mp_nexthop_local, 16);
-	}
-      
-      /* SNPA */
-      stream_putc (s, 0);
-
-      /* Prefix write. */
-      stream_put_prefix (s, p);
-
-      /* Set MP attribute length. */
-      stream_putc_at (s, sizep, (stream_get_endp (s) - sizep) - 1);
-    }
-#endif /* HAVE_IPV6 */
-
-  if (p->family == AF_INET && safi == SAFI_MULTICAST)
-    {
-      unsigned long sizep;
-
-      stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
-      stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
-      sizep = stream_get_endp (s);
-      stream_putc (s, 0);	/* Marker: Attribute Length. */
-      stream_putw (s, AFI_IP);	/* AFI */
-      stream_putc (s, SAFI_MULTICAST);	/* SAFI */
-
-      stream_putc (s, 4);
-      stream_put_ipv4 (s, attr->nexthop.s_addr);
-
-      /* SNPA */
-      stream_putc (s, 0);
-
-      /* Prefix write. */
-      stream_put_prefix (s, p);
-
-      /* Set MP attribute length. */
-      stream_putc_at (s, sizep, (stream_get_endp (s) - sizep) - 1);
-    }
-
-  if (p->family == AF_INET && safi == SAFI_MPLS_VPN)
-    {
-      unsigned long sizep;
-
-      stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
-      stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
-      sizep = stream_get_endp (s);
-      stream_putc (s, 0);	/* Length of this attribute. */
-      stream_putw (s, AFI_IP);	/* AFI */
-      stream_putc (s, SAFI_MPLS_LABELED_VPN);	/* SAFI */
-
-      stream_putc (s, 12);
-      stream_putl (s, 0);
-      stream_putl (s, 0);
-      stream_put (s, &attr->extra->mp_nexthop_global_in, 4);
-
-      /* SNPA */
-      stream_putc (s, 0);
-
-      /* Tag, RD, Prefix write. */
-      stream_putc (s, p->prefixlen + 88);
-      stream_put (s, tag, 3);
-      stream_put (s, prd->val, 8);
-      stream_put (s, &p->u.prefix, PSIZE (p->prefixlen));
-
-      /* Set MP attribute length. */
-      stream_putc_at (s, sizep, (stream_get_endp (s) - sizep) - 1);
-    }
-
   /* Extended Communities attribute. */
   if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SEND_EXT_COMMUNITY) 
       && (attr->flag & ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES)))
@@ -2497,50 +2509,49 @@
   return stream_get_endp (s) - cp;
 }
 
-bgp_size_t
-bgp_packet_withdraw (struct peer *peer, struct stream *s, struct prefix *p,
-		     afi_t afi, safi_t safi, struct prefix_rd *prd,
-		     u_char *tag)
+size_t
+bgp_packet_mpunreach_start (struct stream *s, afi_t afi, safi_t safi)
 {
-  unsigned long cp;
   unsigned long attrlen_pnt;
-  bgp_size_t size;
 
-  cp = stream_get_endp (s);
-
-  stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
+  /* Set extended bit always to encode the attribute length as 2 bytes */
+  stream_putc (s, BGP_ATTR_FLAG_OPTIONAL|BGP_ATTR_FLAG_EXTLEN);
   stream_putc (s, BGP_ATTR_MP_UNREACH_NLRI);
 
   attrlen_pnt = stream_get_endp (s);
-  stream_putc (s, 0);		/* Length of this attribute. */
+  stream_putw (s, 0);		/* Length of this attribute. */
 
-  stream_putw (s, family2afi (p->family));
+  stream_putw (s, afi);
+  safi = (safi == SAFI_MPLS_VPN) ? SAFI_MPLS_LABELED_VPN : safi;
+  stream_putc (s, safi);
+  return attrlen_pnt;
+}
 
+void
+bgp_packet_mpunreach_prefix (struct stream *s, struct prefix *p,
+			     afi_t afi, safi_t safi, struct prefix_rd *prd,
+			     u_char *tag)
+{
   if (safi == SAFI_MPLS_VPN)
     {
-      /* SAFI */
-      stream_putc (s, SAFI_MPLS_LABELED_VPN);
-
-      /* prefix. */
       stream_putc (s, p->prefixlen + 88);
       stream_put (s, tag, 3);
       stream_put (s, prd->val, 8);
       stream_put (s, &p->u.prefix, PSIZE (p->prefixlen));
     }
   else
-    {
-      /* SAFI */
-      stream_putc (s, safi);
+    stream_put_prefix (s, p);
+}
 
-      /* prefix */
-      stream_put_prefix (s, p);
-    }
+void
+bgp_packet_mpunreach_end (struct stream *s, size_t attrlen_pnt)
+{
+  bgp_size_t size;
 
-  /* Set MP attribute length. */
-  size = stream_get_endp (s) - attrlen_pnt - 1;
-  stream_putc_at (s, attrlen_pnt, size);
-
-  return stream_get_endp (s) - cp;
+  /* Set MP attribute length. Don't count the (2) bytes used to encode
+     the attr length */
+  size = stream_get_endp (s) - attrlen_pnt - 2;
+  stream_putw_at (s, attrlen_pnt, size);
 }
 
 /* Initialization of attribute. */
diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h
index df87c86..cdd5467 100644
--- a/bgpd/bgp_attr.h
+++ b/bgpd/bgp_attr.h
@@ -157,13 +157,11 @@
 extern struct attr *bgp_attr_aggregate_intern (struct bgp *, u_char,
                                         struct aspath *, 
                                         struct community *, int as_set);
-extern bgp_size_t bgp_packet_attribute (struct bgp *bgp, struct peer *, 
-                                 struct stream *, struct attr *, 
-                                 struct prefix *, afi_t, safi_t, 
-                                 struct peer *, struct prefix_rd *, u_char *);
-extern bgp_size_t bgp_packet_withdraw (struct peer *peer, struct stream *s, 
-                                struct prefix *p, afi_t, safi_t, 
-                                struct prefix_rd *, u_char *);
+extern bgp_size_t bgp_packet_attribute (struct bgp *bgp, struct peer *,
+					struct stream *, struct attr *,
+					struct prefix *, afi_t, safi_t,
+					struct peer *, struct prefix_rd *,
+					u_char *);
 extern void bgp_dump_routes_attr (struct stream *, struct attr *,
 				  struct prefix *);
 extern int attrhash_cmp (const void *, const void *);
@@ -194,4 +192,24 @@
 extern int bgp_mp_unreach_parse (struct bgp_attr_parser_args *args,
                                  struct bgp_nlri *);
 
+/**
+ * Set of functions to encode MP_REACH_NLRI and MP_UNREACH_NLRI attributes.
+ * Typical call sequence is to call _start(), followed by multiple _prefix(),
+ * one for each NLRI that needs to be encoded into the UPDATE message, and
+ * finally the _end() function.
+ */
+extern size_t bgp_packet_mpattr_start(struct stream *s, afi_t afi, safi_t safi,
+				      struct attr *attr);
+extern void bgp_packet_mpattr_prefix(struct stream *s, afi_t afi, safi_t safi,
+				     struct prefix *p, struct prefix_rd *prd,
+				     u_char *tag);
+extern void bgp_packet_mpattr_end(struct stream *s, size_t sizep);
+
+extern size_t bgp_packet_mpunreach_start (struct stream *s, afi_t afi,
+					  safi_t safi);
+extern void bgp_packet_mpunreach_prefix (struct stream *s, struct prefix *p,
+			     afi_t afi, safi_t safi, struct prefix_rd *prd,
+			     u_char *tag);
+extern void bgp_packet_mpunreach_end (struct stream *s, size_t attrlen_pnt);
+
 #endif /* _QUAGGA_BGP_ATTR_H */
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index d71df08..d5f2417 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -142,16 +142,21 @@
 bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi)
 {
   struct stream *s;
+  struct stream *snlri;
   struct bgp_adj_out *adj;
   struct bgp_advertise *adv;
   struct stream *packet;
   struct bgp_node *rn = NULL;
   struct bgp_info *binfo = NULL;
   bgp_size_t total_attr_len = 0;
-  unsigned long pos;
+  unsigned long attrlen_pos = 0;
+  size_t mpattrlen_pos = 0;
+  size_t mpattr_pos = 0;
 
   s = peer->work;
   stream_reset (s);
+  snlri = peer->scratch;
+  stream_reset (snlri);
 
   adv = FIFO_HEAD (&peer->sync[afi][safi]->update);
 
@@ -164,39 +169,61 @@
         binfo = adv->binfo;
 
       /* When remaining space can't include NLRI and it's length.  */
-      if (STREAM_REMAIN (s) <= BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen))
+      if (STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) <=
+	  (BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen)))
 	break;
 
       /* If packet is empty, set attribute. */
       if (stream_empty (s))
 	{
-	  struct prefix_rd *prd = NULL;
-	  u_char *tag = NULL;
 	  struct peer *from = NULL;
-	  
-	  if (rn->prn)
-	    prd = (struct prefix_rd *) &rn->prn->p;
+
           if (binfo)
-            {
-              from = binfo->peer;
-              if (binfo->extra)
-                tag = binfo->extra->tag;
-            }
-          
+	    from = binfo->peer;
+
+	  /* 1: Write the BGP message header - 16 bytes marker, 2 bytes length,
+	   * one byte message type.
+	   */
 	  bgp_packet_set_marker (s, BGP_MSG_UPDATE);
-	  stream_putw (s, 0);		
-	  pos = stream_get_endp (s);
+
+	  /* 2: withdrawn routes length */
 	  stream_putw (s, 0);
-	  total_attr_len = bgp_packet_attribute (NULL, peer, s, 
+
+	  /* 3: total attributes length - attrlen_pos stores the position */
+	  attrlen_pos = stream_get_endp (s);
+	  stream_putw (s, 0);
+
+	  /* 4: if there is MP_REACH_NLRI attribute, that should be the first
+	   * attribute, according to draft-ietf-idr-error-handling. Save the
+	   * position.
+	   */
+	  mpattr_pos = stream_get_endp(s);
+
+	  /* 5: Encode all the attributes, except MP_REACH_NLRI attr. */
+	  total_attr_len = bgp_packet_attribute (NULL, peer, s,
 	                                         adv->baa->attr,
-	                                         &rn->p, afi, safi, 
-	                                         from, prd, tag);
-	  stream_putw_at (s, pos, total_attr_len);
+	                                         NULL, afi, safi,
+	                                         from, NULL, NULL);
 	}
 
       if (afi == AFI_IP && safi == SAFI_UNICAST)
 	stream_put_prefix (s, &rn->p);
-      
+      else
+	{
+	  /* Encode the prefix in MP_REACH_NLRI attribute */
+	  struct prefix_rd *prd = NULL;
+	  u_char *tag = NULL;
+
+	  if (rn->prn)
+	    prd = (struct prefix_rd *) &rn->prn->p;
+	  if (binfo && binfo->extra)
+	    tag = binfo->extra->tag;
+
+	  if (stream_empty(snlri))
+	    mpattrlen_pos = bgp_packet_mpattr_start(snlri, afi, safi,
+						    adv->baa->attr);
+	  bgp_packet_mpattr_prefix(snlri, afi, safi, &rn->p, prd, tag);
+	}
       if (BGP_DEBUG (update, UPDATE_OUT))
         {
           char buf[INET6_BUFSIZ];
@@ -216,18 +243,28 @@
       adj->attr = bgp_attr_intern (adv->baa->attr);
 
       adv = bgp_advertise_clean (peer, adj, afi, safi);
-
-      if (! (afi == AFI_IP && safi == SAFI_UNICAST))
-	break;
     }
-	 
+
   if (! stream_empty (s))
     {
-      bgp_packet_set_size (s);
-      packet = stream_dup (s);
+      if (!stream_empty(snlri))
+	{
+	  bgp_packet_mpattr_end(snlri, mpattrlen_pos);
+	  total_attr_len += stream_get_endp(snlri);
+	}
+
+      /* set the total attribute length correctly */
+      stream_putw_at (s, attrlen_pos, total_attr_len);
+
+      if (!stream_empty(snlri))
+	packet = stream_dupcat(s, snlri, mpattr_pos);
+      else
+	packet = stream_dup (s);
+      bgp_packet_set_size (packet);
       bgp_packet_add (peer, packet);
       BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
       stream_reset (s);
+      stream_reset (snlri);
       return packet;
     }
   return NULL;
@@ -277,6 +314,15 @@
 }
 
 /* Make BGP withdraw packet.  */
+/* For ipv4 unicast:
+   16-octet marker | 2-octet length | 1-octet type |
+    2-octet withdrawn route length | withdrawn prefixes | 2-octet attrlen (=0)
+*/
+/* For other afi/safis:
+   16-octet marker | 2-octet length | 1-octet type |
+    2-octet withdrawn route length (=0) | 2-octet attrlen |
+     mp_unreach attr type | attr len | afi | safi | withdrawn prefixes
+*/
 static struct stream *
 bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t safi)
 {
@@ -288,6 +334,10 @@
   unsigned long pos;
   bgp_size_t unfeasible_len;
   bgp_size_t total_attr_len;
+  size_t mp_start = 0;
+  size_t attrlen_pos = 0;
+  size_t mplen_pos = 0;
+  u_char first_time = 1;
 
   s = peer->work;
   stream_reset (s);
@@ -298,31 +348,38 @@
       adj = adv->adj;
       rn = adv->rn;
 
-      if (STREAM_REMAIN (s) 
+      if (STREAM_REMAIN (s)
 	  < (BGP_NLRI_LENGTH + BGP_TOTAL_ATTR_LEN + PSIZE (rn->p.prefixlen)))
 	break;
 
       if (stream_empty (s))
 	{
 	  bgp_packet_set_marker (s, BGP_MSG_UPDATE);
-	  stream_putw (s, 0);
+	  stream_putw (s, 0); /* unfeasible routes length */
 	}
+      else
+	first_time = 0;
 
       if (afi == AFI_IP && safi == SAFI_UNICAST)
 	stream_put_prefix (s, &rn->p);
       else
 	{
 	  struct prefix_rd *prd = NULL;
-	  
+
 	  if (rn->prn)
 	    prd = (struct prefix_rd *) &rn->prn->p;
-	  pos = stream_get_endp (s);
-	  stream_putw (s, 0);
-	  total_attr_len
-	    = bgp_packet_withdraw (peer, s, &rn->p, afi, safi, prd, NULL);
-      
-	  /* Set total path attribute length. */
-	  stream_putw_at (s, pos, total_attr_len);
+
+	  /* If first time, format the MP_UNREACH header */
+	  if (first_time)
+	    {
+	      attrlen_pos = stream_get_endp (s);
+	      /* total attr length = 0 for now. reevaluate later */
+	      stream_putw (s, 0);
+	      mp_start = stream_get_endp (s);
+	      mplen_pos = bgp_packet_mpunreach_start(s, afi, safi);
+	    }
+
+	  bgp_packet_mpunreach_prefix(s, &rn->p, afi, safi, prd, NULL);
 	}
 
       if (BGP_DEBUG (update, UPDATE_OUT))
@@ -339,20 +396,26 @@
 
       bgp_adj_out_remove (rn, adj, peer, afi, safi);
       bgp_unlock_node (rn);
-
-      if (! (afi == AFI_IP && safi == SAFI_UNICAST))
-	break;
     }
 
   if (! stream_empty (s))
     {
       if (afi == AFI_IP && safi == SAFI_UNICAST)
 	{
-	  unfeasible_len 
+	  unfeasible_len
 	    = stream_get_endp (s) - BGP_HEADER_SIZE - BGP_UNFEASIBLE_LEN;
 	  stream_putw_at (s, BGP_HEADER_SIZE, unfeasible_len);
 	  stream_putw (s, 0);
 	}
+      else
+	{
+	  /* Set the mp_unreach attr's length */
+	  bgp_packet_mpunreach_end(s, mplen_pos);
+
+	  /* Set total path attribute length. */
+	  total_attr_len = stream_get_endp(s) - mp_start;
+	  stream_putw_at (s, attrlen_pos, total_attr_len);
+	}
       bgp_packet_set_size (s);
       packet = stream_dup (s);
       bgp_packet_add (peer, packet);
@@ -439,10 +502,12 @@
   struct stream *s;
   struct stream *packet;
   struct prefix p;
-  unsigned long pos;
+  unsigned long attrlen_pos = 0;
   unsigned long cp;
   bgp_size_t unfeasible_len;
   bgp_size_t total_attr_len;
+  size_t mp_start = 0;
+  size_t mplen_pos = 0;
 
   if (DISABLE_BGP_ANNOUNCE)
     return;
@@ -455,7 +520,6 @@
 #endif /* HAVE_IPV6 */
 
   total_attr_len = 0;
-  pos = 0;
 
   if (BGP_DEBUG (update, UPDATE_OUT))
     {
@@ -490,12 +554,18 @@
     }
   else
     {
-      pos = stream_get_endp (s);
+      attrlen_pos = stream_get_endp (s);
       stream_putw (s, 0);
-      total_attr_len = bgp_packet_withdraw (peer, s, &p, afi, safi, NULL, NULL);
+      mp_start = stream_get_endp (s);
+      mplen_pos = bgp_packet_mpunreach_start(s, afi, safi);
+      bgp_packet_mpunreach_prefix(s, &p, afi, safi, NULL, NULL);
+
+      /* Set the mp_unreach attr's length */
+      bgp_packet_mpunreach_end(s, mplen_pos);
 
       /* Set total path attribute length. */
-      stream_putw_at (s, pos, total_attr_len);
+      total_attr_len = stream_get_endp(s) - mp_start;
+      stream_putw_at (s, attrlen_pos, total_attr_len);
     }
 
   bgp_packet_set_size (s);
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 88d13ed..6a21b11 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -832,6 +832,7 @@
   peer->ibuf = stream_new (BGP_MAX_PACKET_SIZE);
   peer->obuf = stream_fifo_new ();
   peer->work = stream_new (BGP_MAX_PACKET_SIZE);
+  peer->scratch = stream_new (BGP_MAX_PACKET_SIZE);
 
   bgp_sync_init (peer);
 
@@ -1272,8 +1273,10 @@
     stream_fifo_free (peer->obuf);
   if (peer->work)
     stream_free (peer->work);
+  if (peer->scratch)
+    stream_free(peer->scratch);
   peer->obuf = NULL;
-  peer->work = peer->ibuf = NULL;
+  peer->work = peer->scratch = peer->ibuf = NULL;
 
   /* Local and remote addresses. */
   if (peer->su_local)
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 3d516d3..688f459 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -313,6 +313,12 @@
   struct stream_fifo *obuf;
   struct stream *work;
 
+  /* We use a separate stream to encode MP_REACH_NLRI for efficient
+   * NLRI packing. peer->work stores all the other attributes. The
+   * actual packet is then constructed by concatenating the two.
+   */
+  struct stream *scratch;
+
   /* Status of the peer. */
   int status;
   int ostatus;