bgpd: efficient NLRI packing for AFs != ipv4-unicast
ISSUE:
Currently, for non-ipv4-unicast address families where prefixes are
encoded in MP_REACH/MP_UNREACH attributes, BGP ends up sending one
prefix per UPDATE message. This is quite inefficient. The patch
addresses the issue.
PATCH:
We introduce a scratch buffer in the peer structure that stores the
MP_REACH/MP_UNREACH attributes for non-ipv4-unicast families. This
enables us to encode multiple prefixes. In the end, the two buffers
are merged to create the UPDATE packet.
Signed-off-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com>
Reviewed-by: Daniel Walton <dwalton@cumulusnetworks.com>
[DL: removed no longer existing bgp_packet_withdraw prototype]
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c
index a0dfc65..f284758 100644
--- a/bgpd/bgp_attr.c
+++ b/bgpd/bgp_attr.c
@@ -2058,12 +2058,106 @@
int stream_put_prefix (struct stream *, struct prefix *);
+size_t
+bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi,
+ struct attr *attr)
+{
+ size_t sizep;
+
+ /* Set extended bit always to encode the attribute length as 2 bytes */
+ stream_putc (s, BGP_ATTR_FLAG_OPTIONAL|BGP_ATTR_FLAG_EXTLEN);
+ stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
+ sizep = stream_get_endp (s);
+ stream_putw (s, 0); /* Marker: Attribute length. */
+ stream_putw (s, afi); /* AFI */
+ stream_putc (s, safi); /* SAFI */
+
+ /* Nexthop */
+ switch (afi)
+ {
+ case AFI_IP:
+ switch (safi)
+ {
+ case SAFI_UNICAST:
+ case SAFI_MULTICAST:
+ stream_putc (s, 4);
+ stream_put_ipv4 (s, attr->nexthop.s_addr);
+ break;
+ case SAFI_MPLS_VPN:
+ stream_putc (s, 12);
+ stream_putl (s, 0);
+ stream_putl (s, 0);
+ stream_put (s, &attr->extra->mp_nexthop_global_in, 4);
+ break;
+ default:
+ break;
+ }
+ break;
+#ifdef HAVE_IPV6
+ case AFI_IP6:
+ switch (safi)
+ {
+ case SAFI_UNICAST:
+ case SAFI_MULTICAST:
+ {
+ unsigned long sizep;
+ struct attr_extra *attre = attr->extra;
+
+ assert (attr->extra);
+ stream_putc (s, attre->mp_nexthop_len);
+ stream_put (s, &attre->mp_nexthop_global, 16);
+ if (attre->mp_nexthop_len == 32)
+ stream_put (s, &attre->mp_nexthop_local, 16);
+ }
+ default:
+ break;
+ }
+ break;
+#endif /*HAVE_IPV6*/
+ default:
+ break;
+ }
+
+ /* SNPA */
+ stream_putc (s, 0);
+ return sizep;
+}
+
+void
+bgp_packet_mpattr_prefix (struct stream *s, afi_t afi, safi_t safi,
+ struct prefix *p, struct prefix_rd *prd,
+ u_char *tag)
+{
+ switch (safi)
+ {
+ case SAFI_MPLS_VPN:
+ /* Tag, RD, Prefix write. */
+ stream_putc (s, p->prefixlen + 88);
+ stream_put (s, tag, 3);
+ stream_put (s, prd->val, 8);
+ stream_put (s, &p->u.prefix, PSIZE (p->prefixlen));
+ break;
+ default:
+ /* Prefix write. */
+ stream_put_prefix (s, p);
+ break;
+ }
+}
+
+void
+bgp_packet_mpattr_end (struct stream *s, size_t sizep)
+{
+ /* Set MP attribute length. Don't count the (2) bytes used to encode
+ the attr length */
+ stream_putw_at (s, sizep, (stream_get_endp (s) - sizep) - 2);
+}
+
/* Make attribute packet. */
bgp_size_t
bgp_packet_attribute (struct bgp *bgp, struct peer *peer,
- struct stream *s, struct attr *attr, struct prefix *p,
- afi_t afi, safi_t safi, struct peer *from,
- struct prefix_rd *prd, u_char *tag)
+ struct stream *s, struct attr *attr,
+ struct prefix *p, afi_t afi, safi_t safi,
+ struct peer *from, struct prefix_rd *prd, u_char *tag)
{
size_t cp;
size_t aspath_sizep;
@@ -2071,6 +2165,7 @@
int send_as4_path = 0;
int send_as4_aggregator = 0;
int use32bit = (CHECK_FLAG (peer->cap, PEER_CAP_AS4_RCV)) ? 1 : 0;
+ size_t mpattrlen_pos = 0;
if (! bgp)
bgp = bgp_get_default ();
@@ -2078,6 +2173,13 @@
/* Remember current pointer. */
cp = stream_get_endp (s);
+ if (p && !(afi == AFI_IP && safi == SAFI_UNICAST))
+ {
+ mpattrlen_pos = bgp_packet_mpattr_start(s, afi, safi, attr);
+ bgp_packet_mpattr_prefix(s, afi, safi, p, prd, tag);
+ bgp_packet_mpattr_end(s, mpattrlen_pos);
+ }
+
/* Origin attribute. */
stream_putc (s, BGP_ATTR_FLAG_TRANS);
stream_putc (s, BGP_ATTR_ORIGIN);
@@ -2286,96 +2388,6 @@
}
}
-#ifdef HAVE_IPV6
- /* If p is IPv6 address put it into attribute. */
- if (p->family == AF_INET6)
- {
- unsigned long sizep;
- struct attr_extra *attre = attr->extra;
-
- assert (attr->extra);
-
- stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
- stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
- sizep = stream_get_endp (s);
- stream_putc (s, 0); /* Marker: Attribute length. */
- stream_putw (s, AFI_IP6); /* AFI */
- stream_putc (s, safi); /* SAFI */
-
- stream_putc (s, attre->mp_nexthop_len);
-
- if (attre->mp_nexthop_len == 16)
- stream_put (s, &attre->mp_nexthop_global, 16);
- else if (attre->mp_nexthop_len == 32)
- {
- stream_put (s, &attre->mp_nexthop_global, 16);
- stream_put (s, &attre->mp_nexthop_local, 16);
- }
-
- /* SNPA */
- stream_putc (s, 0);
-
- /* Prefix write. */
- stream_put_prefix (s, p);
-
- /* Set MP attribute length. */
- stream_putc_at (s, sizep, (stream_get_endp (s) - sizep) - 1);
- }
-#endif /* HAVE_IPV6 */
-
- if (p->family == AF_INET && safi == SAFI_MULTICAST)
- {
- unsigned long sizep;
-
- stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
- stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
- sizep = stream_get_endp (s);
- stream_putc (s, 0); /* Marker: Attribute Length. */
- stream_putw (s, AFI_IP); /* AFI */
- stream_putc (s, SAFI_MULTICAST); /* SAFI */
-
- stream_putc (s, 4);
- stream_put_ipv4 (s, attr->nexthop.s_addr);
-
- /* SNPA */
- stream_putc (s, 0);
-
- /* Prefix write. */
- stream_put_prefix (s, p);
-
- /* Set MP attribute length. */
- stream_putc_at (s, sizep, (stream_get_endp (s) - sizep) - 1);
- }
-
- if (p->family == AF_INET && safi == SAFI_MPLS_VPN)
- {
- unsigned long sizep;
-
- stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
- stream_putc (s, BGP_ATTR_MP_REACH_NLRI);
- sizep = stream_get_endp (s);
- stream_putc (s, 0); /* Length of this attribute. */
- stream_putw (s, AFI_IP); /* AFI */
- stream_putc (s, SAFI_MPLS_LABELED_VPN); /* SAFI */
-
- stream_putc (s, 12);
- stream_putl (s, 0);
- stream_putl (s, 0);
- stream_put (s, &attr->extra->mp_nexthop_global_in, 4);
-
- /* SNPA */
- stream_putc (s, 0);
-
- /* Tag, RD, Prefix write. */
- stream_putc (s, p->prefixlen + 88);
- stream_put (s, tag, 3);
- stream_put (s, prd->val, 8);
- stream_put (s, &p->u.prefix, PSIZE (p->prefixlen));
-
- /* Set MP attribute length. */
- stream_putc_at (s, sizep, (stream_get_endp (s) - sizep) - 1);
- }
-
/* Extended Communities attribute. */
if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SEND_EXT_COMMUNITY)
&& (attr->flag & ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES)))
@@ -2497,50 +2509,49 @@
return stream_get_endp (s) - cp;
}
-bgp_size_t
-bgp_packet_withdraw (struct peer *peer, struct stream *s, struct prefix *p,
- afi_t afi, safi_t safi, struct prefix_rd *prd,
- u_char *tag)
+size_t
+bgp_packet_mpunreach_start (struct stream *s, afi_t afi, safi_t safi)
{
- unsigned long cp;
unsigned long attrlen_pnt;
- bgp_size_t size;
- cp = stream_get_endp (s);
-
- stream_putc (s, BGP_ATTR_FLAG_OPTIONAL);
+ /* Set extended bit always to encode the attribute length as 2 bytes */
+ stream_putc (s, BGP_ATTR_FLAG_OPTIONAL|BGP_ATTR_FLAG_EXTLEN);
stream_putc (s, BGP_ATTR_MP_UNREACH_NLRI);
attrlen_pnt = stream_get_endp (s);
- stream_putc (s, 0); /* Length of this attribute. */
+ stream_putw (s, 0); /* Length of this attribute. */
- stream_putw (s, family2afi (p->family));
+ stream_putw (s, afi);
+ safi = (safi == SAFI_MPLS_VPN) ? SAFI_MPLS_LABELED_VPN : safi;
+ stream_putc (s, safi);
+ return attrlen_pnt;
+}
+void
+bgp_packet_mpunreach_prefix (struct stream *s, struct prefix *p,
+ afi_t afi, safi_t safi, struct prefix_rd *prd,
+ u_char *tag)
+{
if (safi == SAFI_MPLS_VPN)
{
- /* SAFI */
- stream_putc (s, SAFI_MPLS_LABELED_VPN);
-
- /* prefix. */
stream_putc (s, p->prefixlen + 88);
stream_put (s, tag, 3);
stream_put (s, prd->val, 8);
stream_put (s, &p->u.prefix, PSIZE (p->prefixlen));
}
else
- {
- /* SAFI */
- stream_putc (s, safi);
+ stream_put_prefix (s, p);
+}
- /* prefix */
- stream_put_prefix (s, p);
- }
+void
+bgp_packet_mpunreach_end (struct stream *s, size_t attrlen_pnt)
+{
+ bgp_size_t size;
- /* Set MP attribute length. */
- size = stream_get_endp (s) - attrlen_pnt - 1;
- stream_putc_at (s, attrlen_pnt, size);
-
- return stream_get_endp (s) - cp;
+ /* Set MP attribute length. Don't count the (2) bytes used to encode
+ the attr length */
+ size = stream_get_endp (s) - attrlen_pnt - 2;
+ stream_putw_at (s, attrlen_pnt, size);
}
/* Initialization of attribute. */