blob: 4625ceabbc21ffe948b87cbd99b3ed04dab4a02c [file] [log] [blame]
/* Kernel routing table updates using netlink over GNU/Linux system.
* Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
*
* This file is part of GNU Zebra.
*
* GNU Zebra is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* GNU Zebra is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Zebra; see the file COPYING. If not, write to the Free
* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#include <zebra.h>
/* Hack for GNU libc version 2. */
#ifndef MSG_TRUNC
#define MSG_TRUNC 0x20
#endif /* MSG_TRUNC */
#include "linklist.h"
#include "if.h"
#include "log.h"
#include "prefix.h"
#include "connected.h"
#include "table.h"
#include "memory.h"
#include "rib.h"
#include "thread.h"
#include "privs.h"
#include "vrf.h"
#include "zebra/zserv.h"
#include "zebra/rt.h"
#include "zebra/redistribute.h"
#include "zebra/interface.h"
#include "zebra/debug.h"
#include "rt_netlink.h"
static const struct message nlmsg_str[] = {
{RTM_NEWROUTE, "RTM_NEWROUTE"},
{RTM_DELROUTE, "RTM_DELROUTE"},
{RTM_GETROUTE, "RTM_GETROUTE"},
{RTM_NEWLINK, "RTM_NEWLINK"},
{RTM_DELLINK, "RTM_DELLINK"},
{RTM_GETLINK, "RTM_GETLINK"},
{RTM_NEWADDR, "RTM_NEWADDR"},
{RTM_DELADDR, "RTM_DELADDR"},
{RTM_GETADDR, "RTM_GETADDR"},
{0, NULL}
};
extern struct zebra_t zebrad;
extern struct zebra_privs_t zserv_privs;
extern u_int32_t nl_rcvbufsize;
/* Note: on netlink systems, there should be a 1-to-1 mapping between interface
names and ifindex values. */
static void
set_ifindex(struct interface *ifp, unsigned int ifi_index)
{
struct interface *oifp;
if (((oifp = if_lookup_by_index(ifi_index)) != NULL) && (oifp != ifp))
{
if (ifi_index == IFINDEX_INTERNAL)
zlog_err("Netlink is setting interface %s ifindex to reserved "
"internal value %u", ifp->name, ifi_index);
else
{
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("interface index %d was renamed from %s to %s",
ifi_index, oifp->name, ifp->name);
if (if_is_up(oifp))
zlog_err("interface rename detected on up interface: index %d "
"was renamed from %s to %s, results are uncertain!",
ifi_index, oifp->name, ifp->name);
if_delete_update(oifp);
}
}
ifp->ifindex = ifi_index;
}
#ifndef SO_RCVBUFFORCE
#define SO_RCVBUFFORCE (33)
#endif
static int
netlink_recvbuf (struct nlsock *nl, uint32_t newsize)
{
u_int32_t oldsize;
socklen_t newlen = sizeof(newsize);
socklen_t oldlen = sizeof(oldsize);
int ret;
ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen);
if (ret < 0)
{
zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name,
safe_strerror (errno));
return -1;
}
/* Try force option (linux >= 2.6.14) and fall back to normal set */
if ( zserv_privs.change (ZPRIVS_RAISE) )
zlog_err ("routing_socket: Can't raise privileges");
ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, &nl_rcvbufsize,
sizeof(nl_rcvbufsize));
if ( zserv_privs.change (ZPRIVS_LOWER) )
zlog_err ("routing_socket: Can't lower privileges");
if (ret < 0)
ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &nl_rcvbufsize,
sizeof(nl_rcvbufsize));
if (ret < 0)
{
zlog (NULL, LOG_ERR, "Can't set %s receive buffer size: %s", nl->name,
safe_strerror (errno));
return -1;
}
ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen);
if (ret < 0)
{
zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name,
safe_strerror (errno));
return -1;
}
zlog (NULL, LOG_INFO,
"Setting netlink socket receive buffer size: %u -> %u",
oldsize, newsize);
return 0;
}
/* Make socket for Linux netlink interface. */
static int
netlink_socket (struct nlsock *nl, unsigned long groups, vrf_id_t vrf_id)
{
int ret;
struct sockaddr_nl snl;
int sock;
int namelen;
int save_errno;
if (zserv_privs.change (ZPRIVS_RAISE))
{
zlog (NULL, LOG_ERR, "Can't raise privileges");
return -1;
}
sock = vrf_socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE, vrf_id);
if (sock < 0)
{
zlog (NULL, LOG_ERR, "Can't open %s socket: %s", nl->name,
safe_strerror (errno));
return -1;
}
memset (&snl, 0, sizeof snl);
snl.nl_family = AF_NETLINK;
snl.nl_groups = groups;
/* Bind the socket to the netlink structure for anything. */
ret = bind (sock, (struct sockaddr *) &snl, sizeof snl);
save_errno = errno;
if (zserv_privs.change (ZPRIVS_LOWER))
zlog (NULL, LOG_ERR, "Can't lower privileges");
if (ret < 0)
{
zlog (NULL, LOG_ERR, "Can't bind %s socket to group 0x%x: %s",
nl->name, snl.nl_groups, safe_strerror (save_errno));
close (sock);
return -1;
}
/* multiple netlink sockets will have different nl_pid */
namelen = sizeof snl;
ret = getsockname (sock, (struct sockaddr *) &snl, (socklen_t *) &namelen);
if (ret < 0 || namelen != sizeof snl)
{
zlog (NULL, LOG_ERR, "Can't get %s socket name: %s", nl->name,
safe_strerror (errno));
close (sock);
return -1;
}
nl->snl = snl;
nl->sock = sock;
return ret;
}
/* Get type specified information from netlink. */
static int
netlink_request (int family, int type, struct nlsock *nl)
{
int ret;
struct sockaddr_nl snl;
int save_errno;
struct
{
struct nlmsghdr nlh;
struct rtgenmsg g;
} req;
/* Check netlink socket. */
if (nl->sock < 0)
{
zlog (NULL, LOG_ERR, "%s socket isn't active.", nl->name);
return -1;
}
memset (&snl, 0, sizeof snl);
snl.nl_family = AF_NETLINK;
memset (&req, 0, sizeof req);
req.nlh.nlmsg_len = sizeof req;
req.nlh.nlmsg_type = type;
req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
req.nlh.nlmsg_pid = nl->snl.nl_pid;
req.nlh.nlmsg_seq = ++nl->seq;
req.g.rtgen_family = family;
/* linux appears to check capabilities on every message
* have to raise caps for every message sent
*/
if (zserv_privs.change (ZPRIVS_RAISE))
{
zlog (NULL, LOG_ERR, "Can't raise privileges");
return -1;
}
ret = sendto (nl->sock, (void *) &req, sizeof req, 0,
(struct sockaddr *) &snl, sizeof snl);
save_errno = errno;
if (zserv_privs.change (ZPRIVS_LOWER))
zlog (NULL, LOG_ERR, "Can't lower privileges");
if (ret < 0)
{
zlog (NULL, LOG_ERR, "%s sendto failed: %s", nl->name,
safe_strerror (save_errno));
return -1;
}
return 0;
}
/* Receive message from netlink interface and pass those information
to the given function. */
static int
netlink_parse_info (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *,
vrf_id_t),
struct nlsock *nl, struct zebra_vrf *zvrf)
{
int status;
int ret = 0;
int error;
while (1)
{
char buf[NL_PKT_BUF_SIZE];
struct iovec iov = {
.iov_base = buf,
.iov_len = sizeof buf
};
struct sockaddr_nl snl;
struct msghdr msg = {
.msg_name = (void *) &snl,
.msg_namelen = sizeof snl,
.msg_iov = &iov,
.msg_iovlen = 1
};
struct nlmsghdr *h;
status = recvmsg (nl->sock, &msg, 0);
if (status < 0)
{
if (errno == EINTR)
continue;
if (errno == EWOULDBLOCK || errno == EAGAIN)
break;
zlog (NULL, LOG_ERR, "%s recvmsg overrun: %s",
nl->name, safe_strerror(errno));
continue;
}
if (status == 0)
{
zlog (NULL, LOG_ERR, "%s EOF", nl->name);
return -1;
}
if (msg.msg_namelen != sizeof snl)
{
zlog (NULL, LOG_ERR, "%s sender address length error: length %d",
nl->name, msg.msg_namelen);
return -1;
}
for (h = (struct nlmsghdr *) buf; NLMSG_OK (h, (unsigned int) status);
h = NLMSG_NEXT (h, status))
{
/* Finish of reading. */
if (h->nlmsg_type == NLMSG_DONE)
return ret;
/* Error handling. */
if (h->nlmsg_type == NLMSG_ERROR)
{
struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA (h);
int errnum = err->error;
int msg_type = err->msg.nlmsg_type;
/* If the error field is zero, then this is an ACK */
if (err->error == 0)
{
if (IS_ZEBRA_DEBUG_KERNEL)
{
zlog_debug ("%s: %s ACK: type=%s(%u), seq=%u, pid=%u",
__FUNCTION__, nl->name,
lookup (nlmsg_str, err->msg.nlmsg_type),
err->msg.nlmsg_type, err->msg.nlmsg_seq,
err->msg.nlmsg_pid);
}
/* return if not a multipart message, otherwise continue */
if (!(h->nlmsg_flags & NLM_F_MULTI))
{
return 0;
}
continue;
}
if (h->nlmsg_len < NLMSG_LENGTH (sizeof (struct nlmsgerr)))
{
zlog (NULL, LOG_ERR, "%s error: message truncated",
nl->name);
return -1;
}
/* Deal with errors that occur because of races in link handling */
if (nl == &zvrf->netlink_cmd
&& ((msg_type == RTM_DELROUTE &&
(-errnum == ENODEV || -errnum == ESRCH))
|| (msg_type == RTM_NEWROUTE && -errnum == EEXIST)))
{
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("%s: error: %s type=%s(%u), seq=%u, pid=%u",
nl->name, safe_strerror (-errnum),
lookup (nlmsg_str, msg_type),
msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid);
return 0;
}
zlog_err ("%s error: %s, type=%s(%u), seq=%u, pid=%u",
nl->name, safe_strerror (-errnum),
lookup (nlmsg_str, msg_type),
msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid);
return -1;
}
/* OK we got netlink message. */
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("netlink_parse_info: %s type %s(%u), seq=%u, pid=%u",
nl->name,
lookup (nlmsg_str, h->nlmsg_type), h->nlmsg_type,
h->nlmsg_seq, h->nlmsg_pid);
/* skip unsolicited messages originating from command socket
* linux sets the originators port-id for {NEW|DEL}ADDR messages,
* so this has to be checked here. */
if (nl != &zvrf->netlink_cmd
&& h->nlmsg_pid == zvrf->netlink_cmd.snl.nl_pid
&& (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR))
{
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("netlink_parse_info: %s packet comes from %s",
zvrf->netlink_cmd.name, nl->name);
continue;
}
error = (*filter) (&snl, h, zvrf->vrf_id);
if (error < 0)
{
zlog (NULL, LOG_ERR, "%s filter function error", nl->name);
ret = error;
}
}
/* After error care. */
if (msg.msg_flags & MSG_TRUNC)
{
zlog (NULL, LOG_ERR, "%s error: message truncated", nl->name);
continue;
}
if (status)
{
zlog (NULL, LOG_ERR, "%s error: data remnant size %d", nl->name,
status);
return -1;
}
}
return ret;
}
/* Utility function for parse rtattr. */
static void
netlink_parse_rtattr (struct rtattr **tb, int max, struct rtattr *rta,
int len)
{
while (RTA_OK (rta, len))
{
if (rta->rta_type <= max)
tb[rta->rta_type] = rta;
rta = RTA_NEXT (rta, len);
}
}
/* Utility function to parse hardware link-layer address and update ifp */
static void
netlink_interface_update_hw_addr (struct rtattr **tb, struct interface *ifp)
{
int i;
if (tb[IFLA_ADDRESS])
{
int hw_addr_len;
hw_addr_len = RTA_PAYLOAD (tb[IFLA_ADDRESS]);
if (hw_addr_len > INTERFACE_HWADDR_MAX)
zlog_warn ("Hardware address is too large: %d", hw_addr_len);
else
{
ifp->hw_addr_len = hw_addr_len;
memcpy (ifp->hw_addr, RTA_DATA (tb[IFLA_ADDRESS]), hw_addr_len);
for (i = 0; i < hw_addr_len; i++)
if (ifp->hw_addr[i] != 0)
break;
if (i == hw_addr_len)
ifp->hw_addr_len = 0;
else
ifp->hw_addr_len = hw_addr_len;
}
}
}
/* Called from interface_lookup_netlink(). This function is only used
during bootstrap. */
static int
netlink_interface (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
int len;
struct ifinfomsg *ifi;
struct rtattr *tb[IFLA_MAX + 1];
struct interface *ifp;
char *name;
ifi = NLMSG_DATA (h);
if (h->nlmsg_type != RTM_NEWLINK)
return 0;
len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg));
if (len < 0)
return -1;
/* Looking up interface name. */
memset (tb, 0, sizeof tb);
netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len);
#ifdef IFLA_WIRELESS
/* check for wireless messages to ignore */
if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0))
{
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("%s: ignoring IFLA_WIRELESS message", __func__);
return 0;
}
#endif /* IFLA_WIRELESS */
if (tb[IFLA_IFNAME] == NULL)
return -1;
name = (char *) RTA_DATA (tb[IFLA_IFNAME]);
/* Add interface. */
ifp = if_get_by_name_vrf (name, vrf_id);
set_ifindex(ifp, ifi->ifi_index);
ifp->flags = ifi->ifi_flags & 0x0000fffff;
ifp->mtu6 = ifp->mtu = *(uint32_t *) RTA_DATA (tb[IFLA_MTU]);
ifp->metric = 0;
/* Hardware type and address. */
ifp->hw_type = ifi->ifi_type;
netlink_interface_update_hw_addr (tb, ifp);
if_add_update (ifp);
return 0;
}
/* Lookup interface IPv4/IPv6 address. */
static int
netlink_interface_addr (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
int len;
struct ifaddrmsg *ifa;
struct rtattr *tb[IFA_MAX + 1];
struct interface *ifp;
void *addr;
void *broad;
u_char flags = 0;
char *label = NULL;
ifa = NLMSG_DATA (h);
if (ifa->ifa_family != AF_INET
#ifdef HAVE_IPV6
&& ifa->ifa_family != AF_INET6
#endif /* HAVE_IPV6 */
)
return 0;
if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)
return 0;
len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifaddrmsg));
if (len < 0)
return -1;
memset (tb, 0, sizeof tb);
netlink_parse_rtattr (tb, IFA_MAX, IFA_RTA (ifa), len);
ifp = if_lookup_by_index_vrf (ifa->ifa_index, vrf_id);
if (ifp == NULL)
{
zlog_err ("netlink_interface_addr can't find interface by index %d vrf %u",
ifa->ifa_index, vrf_id);
return -1;
}
if (IS_ZEBRA_DEBUG_KERNEL) /* remove this line to see initial ifcfg */
{
char buf[BUFSIZ];
zlog_debug ("netlink_interface_addr %s %s vrf %u:",
lookup (nlmsg_str, h->nlmsg_type), ifp->name, vrf_id);
if (tb[IFA_LOCAL])
zlog_debug (" IFA_LOCAL %s/%d",
inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_LOCAL]),
buf, BUFSIZ), ifa->ifa_prefixlen);
if (tb[IFA_ADDRESS])
zlog_debug (" IFA_ADDRESS %s/%d",
inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_ADDRESS]),
buf, BUFSIZ), ifa->ifa_prefixlen);
if (tb[IFA_BROADCAST])
zlog_debug (" IFA_BROADCAST %s/%d",
inet_ntop (ifa->ifa_family, RTA_DATA (tb[IFA_BROADCAST]),
buf, BUFSIZ), ifa->ifa_prefixlen);
if (tb[IFA_LABEL] && strcmp (ifp->name, RTA_DATA (tb[IFA_LABEL])))
zlog_debug (" IFA_LABEL %s", (char *)RTA_DATA (tb[IFA_LABEL]));
if (tb[IFA_CACHEINFO])
{
struct ifa_cacheinfo *ci = RTA_DATA (tb[IFA_CACHEINFO]);
zlog_debug (" IFA_CACHEINFO pref %d, valid %d",
ci->ifa_prefered, ci->ifa_valid);
}
}
/* logic copied from iproute2/ip/ipaddress.c:print_addrinfo() */
if (tb[IFA_LOCAL] == NULL)
tb[IFA_LOCAL] = tb[IFA_ADDRESS];
if (tb[IFA_ADDRESS] == NULL)
tb[IFA_ADDRESS] = tb[IFA_LOCAL];
/* local interface address */
addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL);
/* is there a peer address? */
if (tb[IFA_ADDRESS] &&
memcmp(RTA_DATA(tb[IFA_ADDRESS]), RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_ADDRESS])))
{
broad = RTA_DATA(tb[IFA_ADDRESS]);
SET_FLAG (flags, ZEBRA_IFA_PEER);
}
else
/* seeking a broadcast address */
broad = (tb[IFA_BROADCAST] ? RTA_DATA(tb[IFA_BROADCAST]) : NULL);
/* addr is primary key, SOL if we don't have one */
if (addr == NULL)
{
zlog_debug ("%s: NULL address", __func__);
return -1;
}
/* Flags. */
if (ifa->ifa_flags & IFA_F_SECONDARY)
SET_FLAG (flags, ZEBRA_IFA_SECONDARY);
/* Label */
if (tb[IFA_LABEL])
label = (char *) RTA_DATA (tb[IFA_LABEL]);
if (ifp && label && strcmp (ifp->name, label) == 0)
label = NULL;
/* Register interface address to the interface. */
if (ifa->ifa_family == AF_INET)
{
if (h->nlmsg_type == RTM_NEWADDR)
connected_add_ipv4 (ifp, flags,
(struct in_addr *) addr, ifa->ifa_prefixlen,
(struct in_addr *) broad, label);
else
connected_delete_ipv4 (ifp, flags,
(struct in_addr *) addr, ifa->ifa_prefixlen,
(struct in_addr *) broad);
}
#ifdef HAVE_IPV6
if (ifa->ifa_family == AF_INET6)
{
if (h->nlmsg_type == RTM_NEWADDR)
connected_add_ipv6 (ifp, flags,
(struct in6_addr *) addr, ifa->ifa_prefixlen,
(struct in6_addr *) broad, label);
else
connected_delete_ipv6 (ifp,
(struct in6_addr *) addr, ifa->ifa_prefixlen,
(struct in6_addr *) broad);
}
#endif /* HAVE_IPV6 */
return 0;
}
/* Looking up routing table by netlink interface. */
static int
netlink_routing_table (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
int len;
struct rtmsg *rtm;
struct rtattr *tb[RTA_MAX + 1];
u_char flags = 0;
char anyaddr[16] = { 0 };
int index;
int table;
int metric;
u_int32_t mtu = 0;
void *dest;
void *gate;
void *src;
rtm = NLMSG_DATA (h);
if (h->nlmsg_type != RTM_NEWROUTE)
return 0;
if (rtm->rtm_type != RTN_UNICAST)
return 0;
table = rtm->rtm_table;
#if 0 /* we weed them out later in rib_weed_tables () */
if (table != RT_TABLE_MAIN && table != zebrad.rtm_table_default)
return 0;
#endif
len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct rtmsg));
if (len < 0)
return -1;
memset (tb, 0, sizeof tb);
netlink_parse_rtattr (tb, RTA_MAX, RTM_RTA (rtm), len);
if (rtm->rtm_flags & RTM_F_CLONED)
return 0;
if (rtm->rtm_protocol == RTPROT_REDIRECT)
return 0;
if (rtm->rtm_protocol == RTPROT_KERNEL)
return 0;
if (rtm->rtm_src_len != 0)
return 0;
/* Route which inserted by Zebra. */
if (rtm->rtm_protocol == RTPROT_ZEBRA)
flags |= ZEBRA_FLAG_SELFROUTE;
index = 0;
metric = 0;
dest = NULL;
gate = NULL;
src = NULL;
if (tb[RTA_OIF])
index = *(int *) RTA_DATA (tb[RTA_OIF]);
if (tb[RTA_DST])
dest = RTA_DATA (tb[RTA_DST]);
else
dest = anyaddr;
if (tb[RTA_PREFSRC])
src = RTA_DATA (tb[RTA_PREFSRC]);
if (tb[RTA_GATEWAY])
gate = RTA_DATA (tb[RTA_GATEWAY]);
if (tb[RTA_PRIORITY])
metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
if (tb[RTA_METRICS])
{
struct rtattr *mxrta[RTAX_MAX+1];
memset (mxrta, 0, sizeof mxrta);
netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
RTA_PAYLOAD(tb[RTA_METRICS]));
if (mxrta[RTAX_MTU])
mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]);
}
if (rtm->rtm_family == AF_INET)
{
struct prefix_ipv4 p;
p.family = AF_INET;
memcpy (&p.prefix, dest, 4);
p.prefixlen = rtm->rtm_dst_len;
if (!tb[RTA_MULTIPATH])
rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, src, index,
vrf_id, table, metric, mtu, 0, SAFI_UNICAST);
else
{
/* This is a multipath route */
struct rib *rib;
struct rtnexthop *rtnh =
(struct rtnexthop *) RTA_DATA (tb[RTA_MULTIPATH]);
len = RTA_PAYLOAD (tb[RTA_MULTIPATH]);
rib = XCALLOC (MTYPE_RIB, sizeof (struct rib));
rib->type = ZEBRA_ROUTE_KERNEL;
rib->distance = 0;
rib->flags = flags;
rib->metric = metric;
rib->mtu = mtu;
rib->vrf_id = vrf_id;
rib->table = table;
rib->nexthop_num = 0;
rib->uptime = time (NULL);
for (;;)
{
if (len < (int) sizeof (*rtnh) || rtnh->rtnh_len > len)
break;
index = rtnh->rtnh_ifindex;
gate = 0;
if (rtnh->rtnh_len > sizeof (*rtnh))
{
memset (tb, 0, sizeof (tb));
netlink_parse_rtattr (tb, RTA_MAX, RTNH_DATA (rtnh),
rtnh->rtnh_len - sizeof (*rtnh));
if (tb[RTA_GATEWAY])
gate = RTA_DATA (tb[RTA_GATEWAY]);
}
if (gate)
{
if (index)
nexthop_ipv4_ifindex_add (rib, gate, src, index);
else
nexthop_ipv4_add (rib, gate, src);
}
else
nexthop_ifindex_add (rib, index);
len -= NLMSG_ALIGN(rtnh->rtnh_len);
rtnh = RTNH_NEXT(rtnh);
}
if (rib->nexthop_num == 0)
XFREE (MTYPE_RIB, rib);
else
rib_add_ipv4_multipath (&p, rib, SAFI_UNICAST);
}
}
#ifdef HAVE_IPV6
if (rtm->rtm_family == AF_INET6)
{
struct prefix_ipv6 p;
p.family = AF_INET6;
memcpy (&p.prefix, dest, 16);
p.prefixlen = rtm->rtm_dst_len;
rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, flags, &p, gate, index, vrf_id,
table, metric, mtu, 0, SAFI_UNICAST);
}
#endif /* HAVE_IPV6 */
return 0;
}
static const struct message rtproto_str[] = {
{RTPROT_REDIRECT, "redirect"},
{RTPROT_KERNEL, "kernel"},
{RTPROT_BOOT, "boot"},
{RTPROT_STATIC, "static"},
{RTPROT_GATED, "GateD"},
{RTPROT_RA, "router advertisement"},
{RTPROT_MRT, "MRT"},
{RTPROT_ZEBRA, "Zebra"},
#ifdef RTPROT_BIRD
{RTPROT_BIRD, "BIRD"},
#endif /* RTPROT_BIRD */
{0, NULL}
};
/* Routing information change from the kernel. */
static int
netlink_route_change (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
int len;
struct rtmsg *rtm;
struct rtattr *tb[RTA_MAX + 1];
char anyaddr[16] = { 0 };
int index;
int table;
int metric;
u_int32_t mtu = 0;
void *dest;
void *gate;
void *src;
rtm = NLMSG_DATA (h);
if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE))
{
/* If this is not route add/delete message print warning. */
zlog_warn ("Kernel message: %d vrf %u\n", h->nlmsg_type, vrf_id);
return 0;
}
/* Connected route. */
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("%s %s %s proto %s vrf %u",
h->nlmsg_type ==
RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
rtm->rtm_family == AF_INET ? "ipv4" : "ipv6",
rtm->rtm_type == RTN_UNICAST ? "unicast" : "multicast",
lookup (rtproto_str, rtm->rtm_protocol),
vrf_id);
if (rtm->rtm_type != RTN_UNICAST)
{
return 0;
}
table = rtm->rtm_table;
if (table != RT_TABLE_MAIN && table != zebrad.rtm_table_default)
{
return 0;
}
len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct rtmsg));
if (len < 0)
return -1;
memset (tb, 0, sizeof tb);
netlink_parse_rtattr (tb, RTA_MAX, RTM_RTA (rtm), len);
if (rtm->rtm_flags & RTM_F_CLONED)
return 0;
if (rtm->rtm_protocol == RTPROT_REDIRECT)
return 0;
if (rtm->rtm_protocol == RTPROT_KERNEL)
return 0;
if (rtm->rtm_protocol == RTPROT_ZEBRA && h->nlmsg_type == RTM_NEWROUTE)
return 0;
if (rtm->rtm_src_len != 0)
{
zlog_warn ("netlink_route_change(): no src len, vrf %u", vrf_id);
return 0;
}
index = 0;
metric = 0;
dest = NULL;
gate = NULL;
src = NULL;
if (tb[RTA_OIF])
index = *(int *) RTA_DATA (tb[RTA_OIF]);
if (tb[RTA_DST])
dest = RTA_DATA (tb[RTA_DST]);
else
dest = anyaddr;
if (tb[RTA_GATEWAY])
gate = RTA_DATA (tb[RTA_GATEWAY]);
if (tb[RTA_PREFSRC])
src = RTA_DATA (tb[RTA_PREFSRC]);
if (h->nlmsg_type == RTM_NEWROUTE)
{
if (tb[RTA_PRIORITY])
metric = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
if (tb[RTA_METRICS])
{
struct rtattr *mxrta[RTAX_MAX+1];
memset (mxrta, 0, sizeof mxrta);
netlink_parse_rtattr (mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
RTA_PAYLOAD(tb[RTA_METRICS]));
if (mxrta[RTAX_MTU])
mtu = *(u_int32_t *) RTA_DATA(mxrta[RTAX_MTU]);
}
}
if (rtm->rtm_family == AF_INET)
{
struct prefix_ipv4 p;
p.family = AF_INET;
memcpy (&p.prefix, dest, 4);
p.prefixlen = rtm->rtm_dst_len;
if (IS_ZEBRA_DEBUG_KERNEL)
{
char buf[PREFIX_STRLEN];
zlog_debug ("%s %s vrf %u",
h->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
prefix2str (&p, buf, sizeof(buf)), vrf_id);
}
if (h->nlmsg_type == RTM_NEWROUTE)
{
if (!tb[RTA_MULTIPATH])
rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, src, index, vrf_id,
table, metric, mtu, 0, SAFI_UNICAST);
else
{
/* This is a multipath route */
struct rib *rib;
struct rtnexthop *rtnh =
(struct rtnexthop *) RTA_DATA (tb[RTA_MULTIPATH]);
len = RTA_PAYLOAD (tb[RTA_MULTIPATH]);
rib = XCALLOC (MTYPE_RIB, sizeof (struct rib));
rib->type = ZEBRA_ROUTE_KERNEL;
rib->distance = 0;
rib->flags = 0;
rib->metric = metric;
rib->mtu = mtu;
rib->vrf_id = vrf_id;
rib->table = table;
rib->nexthop_num = 0;
rib->uptime = time (NULL);
for (;;)
{
if (len < (int) sizeof (*rtnh) || rtnh->rtnh_len > len)
break;
index = rtnh->rtnh_ifindex;
gate = 0;
if (rtnh->rtnh_len > sizeof (*rtnh))
{
memset (tb, 0, sizeof (tb));
netlink_parse_rtattr (tb, RTA_MAX, RTNH_DATA (rtnh),
rtnh->rtnh_len - sizeof (*rtnh));
if (tb[RTA_GATEWAY])
gate = RTA_DATA (tb[RTA_GATEWAY]);
}
if (gate)
{
if (index)
nexthop_ipv4_ifindex_add (rib, gate, src, index);
else
nexthop_ipv4_add (rib, gate, src);
}
else
nexthop_ifindex_add (rib, index);
len -= NLMSG_ALIGN(rtnh->rtnh_len);
rtnh = RTNH_NEXT(rtnh);
}
if (rib->nexthop_num == 0)
XFREE (MTYPE_RIB, rib);
else
rib_add_ipv4_multipath (&p, rib, SAFI_UNICAST);
}
}
else
rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, vrf_id,
SAFI_UNICAST);
}
#ifdef HAVE_IPV6
if (rtm->rtm_family == AF_INET6)
{
struct prefix_ipv6 p;
p.family = AF_INET6;
memcpy (&p.prefix, dest, 16);
p.prefixlen = rtm->rtm_dst_len;
if (IS_ZEBRA_DEBUG_KERNEL)
{
char buf[PREFIX_STRLEN];
zlog_debug ("%s %s vrf %u",
h->nlmsg_type == RTM_NEWROUTE ? "RTM_NEWROUTE" : "RTM_DELROUTE",
prefix2str (&p, buf, sizeof(buf)), vrf_id);
}
if (h->nlmsg_type == RTM_NEWROUTE)
rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, vrf_id, table,
metric, mtu, 0, SAFI_UNICAST);
else
rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, &p, gate, index, vrf_id,
SAFI_UNICAST);
}
#endif /* HAVE_IPV6 */
return 0;
}
static int
netlink_link_change (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
int len;
struct ifinfomsg *ifi;
struct rtattr *tb[IFLA_MAX + 1];
struct interface *ifp;
char *name;
ifi = NLMSG_DATA (h);
if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK))
{
/* If this is not link add/delete message so print warning. */
zlog_warn ("netlink_link_change: wrong kernel message %d vrf %u\n",
h->nlmsg_type, vrf_id);
return 0;
}
len = h->nlmsg_len - NLMSG_LENGTH (sizeof (struct ifinfomsg));
if (len < 0)
return -1;
/* Looking up interface name. */
memset (tb, 0, sizeof tb);
netlink_parse_rtattr (tb, IFLA_MAX, IFLA_RTA (ifi), len);
#ifdef IFLA_WIRELESS
/* check for wireless messages to ignore */
if ((tb[IFLA_WIRELESS] != NULL) && (ifi->ifi_change == 0))
{
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("%s: ignoring IFLA_WIRELESS message, vrf %u", __func__,
vrf_id);
return 0;
}
#endif /* IFLA_WIRELESS */
if (tb[IFLA_IFNAME] == NULL)
return -1;
name = (char *) RTA_DATA (tb[IFLA_IFNAME]);
/* Add interface. */
if (h->nlmsg_type == RTM_NEWLINK)
{
ifp = if_lookup_by_name_vrf (name, vrf_id);
if (ifp == NULL || !CHECK_FLAG (ifp->status, ZEBRA_INTERFACE_ACTIVE))
{
if (ifp == NULL)
ifp = if_get_by_name_vrf (name, vrf_id);
set_ifindex(ifp, ifi->ifi_index);
ifp->flags = ifi->ifi_flags & 0x0000fffff;
ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]);
ifp->metric = 0;
netlink_interface_update_hw_addr (tb, ifp);
/* If new link is added. */
if_add_update (ifp);
}
else
{
/* Interface status change. */
set_ifindex(ifp, ifi->ifi_index);
ifp->mtu6 = ifp->mtu = *(int *) RTA_DATA (tb[IFLA_MTU]);
ifp->metric = 0;
netlink_interface_update_hw_addr (tb, ifp);
if (if_is_operative (ifp))
{
ifp->flags = ifi->ifi_flags & 0x0000fffff;
if (!if_is_operative (ifp))
if_down (ifp);
else
/* Must notify client daemons of new interface status. */
zebra_interface_up_update (ifp);
}
else
{
ifp->flags = ifi->ifi_flags & 0x0000fffff;
if (if_is_operative (ifp))
if_up (ifp);
}
}
}
else
{
/* RTM_DELLINK. */
ifp = if_lookup_by_name_vrf (name, vrf_id);
if (ifp == NULL)
{
zlog_warn ("interface %s vrf %u is deleted but can't find",
name, vrf_id);
return 0;
}
if_delete_update (ifp);
}
return 0;
}
static int
netlink_information_fetch (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
/* JF: Ignore messages that aren't from the kernel */
if ( snl->nl_pid != 0 )
{
zlog ( NULL, LOG_ERR, "Ignoring message from pid %u", snl->nl_pid );
return 0;
}
switch (h->nlmsg_type)
{
case RTM_NEWROUTE:
return netlink_route_change (snl, h, vrf_id);
break;
case RTM_DELROUTE:
return netlink_route_change (snl, h, vrf_id);
break;
case RTM_NEWLINK:
return netlink_link_change (snl, h, vrf_id);
break;
case RTM_DELLINK:
return netlink_link_change (snl, h, vrf_id);
break;
case RTM_NEWADDR:
return netlink_interface_addr (snl, h, vrf_id);
break;
case RTM_DELADDR:
return netlink_interface_addr (snl, h, vrf_id);
break;
default:
zlog_warn ("Unknown netlink nlmsg_type %d vrf %u\n", h->nlmsg_type,
vrf_id);
break;
}
return 0;
}
/* Interface lookup by netlink socket. */
int
interface_lookup_netlink (struct zebra_vrf *zvrf)
{
int ret;
/* Get interface information. */
ret = netlink_request (AF_PACKET, RTM_GETLINK, &zvrf->netlink_cmd);
if (ret < 0)
return ret;
ret = netlink_parse_info (netlink_interface, &zvrf->netlink_cmd, zvrf);
if (ret < 0)
return ret;
/* Get IPv4 address of the interfaces. */
ret = netlink_request (AF_INET, RTM_GETADDR, &zvrf->netlink_cmd);
if (ret < 0)
return ret;
ret = netlink_parse_info (netlink_interface_addr, &zvrf->netlink_cmd, zvrf);
if (ret < 0)
return ret;
#ifdef HAVE_IPV6
/* Get IPv6 address of the interfaces. */
ret = netlink_request (AF_INET6, RTM_GETADDR, &zvrf->netlink_cmd);
if (ret < 0)
return ret;
ret = netlink_parse_info (netlink_interface_addr, &zvrf->netlink_cmd, zvrf);
if (ret < 0)
return ret;
#endif /* HAVE_IPV6 */
return 0;
}
/* Routing table read function using netlink interface. Only called
bootstrap time. */
int
netlink_route_read (struct zebra_vrf *zvrf)
{
int ret;
/* Get IPv4 routing table. */
ret = netlink_request (AF_INET, RTM_GETROUTE, &zvrf->netlink_cmd);
if (ret < 0)
return ret;
ret = netlink_parse_info (netlink_routing_table, &zvrf->netlink_cmd, zvrf);
if (ret < 0)
return ret;
#ifdef HAVE_IPV6
/* Get IPv6 routing table. */
ret = netlink_request (AF_INET6, RTM_GETROUTE, &zvrf->netlink_cmd);
if (ret < 0)
return ret;
ret = netlink_parse_info (netlink_routing_table, &zvrf->netlink_cmd, zvrf);
if (ret < 0)
return ret;
#endif /* HAVE_IPV6 */
return 0;
}
/* Utility function comes from iproute2.
Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */
int
addattr_l (struct nlmsghdr *n, size_t maxlen, int type, void *data, int alen)
{
size_t len;
struct rtattr *rta;
len = RTA_LENGTH (alen);
if (NLMSG_ALIGN (n->nlmsg_len) + len > maxlen)
return -1;
rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len));
rta->rta_type = type;
rta->rta_len = len;
memcpy (RTA_DATA (rta), data, alen);
n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + len;
return 0;
}
int
rta_addattr_l (struct rtattr *rta, int maxlen, int type, void *data, int alen)
{
int len;
struct rtattr *subrta;
len = RTA_LENGTH (alen);
if (RTA_ALIGN (rta->rta_len) + len > maxlen)
return -1;
subrta = (struct rtattr *) (((char *) rta) + RTA_ALIGN (rta->rta_len));
subrta->rta_type = type;
subrta->rta_len = len;
memcpy (RTA_DATA (subrta), data, alen);
rta->rta_len = NLMSG_ALIGN (rta->rta_len) + len;
return 0;
}
/* Utility function comes from iproute2.
Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */
int
addattr32 (struct nlmsghdr *n, size_t maxlen, int type, int data)
{
size_t len;
struct rtattr *rta;
len = RTA_LENGTH (4);
if (NLMSG_ALIGN (n->nlmsg_len) + len > maxlen)
return -1;
rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len));
rta->rta_type = type;
rta->rta_len = len;
memcpy (RTA_DATA (rta), &data, 4);
n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + len;
return 0;
}
static int
netlink_talk_filter (struct sockaddr_nl *snl, struct nlmsghdr *h,
vrf_id_t vrf_id)
{
zlog_warn ("netlink_talk: ignoring message type 0x%04x vrf %u", h->nlmsg_type,
vrf_id);
return 0;
}
/* sendmsg() to netlink socket then recvmsg(). */
static int
netlink_talk (struct nlmsghdr *n, struct nlsock *nl, struct zebra_vrf *zvrf)
{
int status;
struct sockaddr_nl snl;
struct iovec iov = {
.iov_base = (void *) n,
.iov_len = n->nlmsg_len
};
struct msghdr msg = {
.msg_name = (void *) &snl,
.msg_namelen = sizeof snl,
.msg_iov = &iov,
.msg_iovlen = 1,
};
int save_errno;
memset (&snl, 0, sizeof snl);
snl.nl_family = AF_NETLINK;
n->nlmsg_seq = ++nl->seq;
/* Request an acknowledgement by setting NLM_F_ACK */
n->nlmsg_flags |= NLM_F_ACK;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("netlink_talk: %s type %s(%u), seq=%u", nl->name,
lookup (nlmsg_str, n->nlmsg_type), n->nlmsg_type,
n->nlmsg_seq);
/* Send message to netlink interface. */
if (zserv_privs.change (ZPRIVS_RAISE))
zlog (NULL, LOG_ERR, "Can't raise privileges");
status = sendmsg (nl->sock, &msg, 0);
save_errno = errno;
if (zserv_privs.change (ZPRIVS_LOWER))
zlog (NULL, LOG_ERR, "Can't lower privileges");
if (status < 0)
{
zlog (NULL, LOG_ERR, "netlink_talk sendmsg() error: %s",
safe_strerror (save_errno));
return -1;
}
/*
* Get reply from netlink socket.
* The reply should either be an acknowlegement or an error.
*/
return netlink_parse_info (netlink_talk_filter, nl, zvrf);
}
/* This function takes a nexthop as argument and adds
* the appropriate netlink attributes to an existing
* netlink message.
*
* @param routedesc: Human readable description of route type
* (direct/recursive, single-/multipath)
* @param bytelen: Length of addresses in bytes.
* @param nexthop: Nexthop information
* @param nlmsg: nlmsghdr structure to fill in.
* @param req_size: The size allocated for the message.
*/
static void
_netlink_route_build_singlepath(
const char *routedesc,
int bytelen,
struct nexthop *nexthop,
struct nlmsghdr *nlmsg,
struct rtmsg *rtmsg,
size_t req_size)
{
if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ONLINK))
rtmsg->rtm_flags |= RTNH_F_ONLINK;
if (nexthop->type == NEXTHOP_TYPE_IPV4
|| nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
{
addattr_l (nlmsg, req_size, RTA_GATEWAY,
&nexthop->gate.ipv4, bytelen);
if (nexthop->src.ipv4.s_addr)
addattr_l (nlmsg, req_size, RTA_PREFSRC,
&nexthop->src.ipv4, bytelen);
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via %s if %u",
routedesc,
inet_ntoa (nexthop->gate.ipv4),
nexthop->ifindex);
}
#ifdef HAVE_IPV6
if (nexthop->type == NEXTHOP_TYPE_IPV6
|| nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
|| nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
{
addattr_l (nlmsg, req_size, RTA_GATEWAY,
&nexthop->gate.ipv6, bytelen);
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via %s if %u",
routedesc,
inet6_ntoa (nexthop->gate.ipv6),
nexthop->ifindex);
}
#endif /* HAVE_IPV6 */
if (nexthop->type == NEXTHOP_TYPE_IFINDEX
|| nexthop->type == NEXTHOP_TYPE_IFNAME
|| nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
{
addattr32 (nlmsg, req_size, RTA_OIF, nexthop->ifindex);
if (nexthop->src.ipv4.s_addr)
addattr_l (nlmsg, req_size, RTA_PREFSRC,
&nexthop->src.ipv4, bytelen);
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via if %u", routedesc, nexthop->ifindex);
}
if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX
|| nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME)
{
addattr32 (nlmsg, req_size, RTA_OIF, nexthop->ifindex);
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via if %u", routedesc, nexthop->ifindex);
}
}
/* This function takes a nexthop as argument and
* appends to the given rtattr/rtnexthop pair the
* representation of the nexthop. If the nexthop
* defines a preferred source, the src parameter
* will be modified to point to that src, otherwise
* it will be kept unmodified.
*
* @param routedesc: Human readable description of route type
* (direct/recursive, single-/multipath)
* @param bytelen: Length of addresses in bytes.
* @param nexthop: Nexthop information
* @param rta: rtnetlink attribute structure
* @param rtnh: pointer to an rtnetlink nexthop structure
* @param src: pointer pointing to a location where
* the prefsrc should be stored.
*/
static void
_netlink_route_build_multipath(
const char *routedesc,
int bytelen,
struct nexthop *nexthop,
struct rtattr *rta,
struct rtnexthop *rtnh,
union g_addr **src
)
{
rtnh->rtnh_len = sizeof (*rtnh);
rtnh->rtnh_flags = 0;
rtnh->rtnh_hops = 0;
rta->rta_len += rtnh->rtnh_len;
if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ONLINK))
rtnh->rtnh_flags |= RTNH_F_ONLINK;
if (nexthop->type == NEXTHOP_TYPE_IPV4
|| nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
{
rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTA_GATEWAY,
&nexthop->gate.ipv4, bytelen);
rtnh->rtnh_len += sizeof (struct rtattr) + bytelen;
if (nexthop->src.ipv4.s_addr)
*src = &nexthop->src;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via %s if %u",
routedesc,
inet_ntoa (nexthop->gate.ipv4),
nexthop->ifindex);
}
#ifdef HAVE_IPV6
if (nexthop->type == NEXTHOP_TYPE_IPV6
|| nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
|| nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
{
rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTA_GATEWAY,
&nexthop->gate.ipv6, bytelen);
rtnh->rtnh_len += sizeof (struct rtattr) + bytelen;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via %s if %u",
routedesc,
inet6_ntoa (nexthop->gate.ipv6),
nexthop->ifindex);
}
#endif /* HAVE_IPV6 */
/* ifindex */
if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX
|| nexthop->type == NEXTHOP_TYPE_IFINDEX
|| nexthop->type == NEXTHOP_TYPE_IFNAME)
{
rtnh->rtnh_ifindex = nexthop->ifindex;
if (nexthop->src.ipv4.s_addr)
*src = &nexthop->src;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via if %u", routedesc, nexthop->ifindex);
}
else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
|| nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
{
rtnh->rtnh_ifindex = nexthop->ifindex;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("netlink_route_multipath() (%s): "
"nexthop via if %u", routedesc, nexthop->ifindex);
}
else
{
rtnh->rtnh_ifindex = 0;
}
}
/* Log debug information for netlink_route_multipath
* if debug logging is enabled.
*
* @param cmd: Netlink command which is to be processed
* @param p: Prefix for which the change is due
* @param nexthop: Nexthop which is currently processed
* @param routedesc: Semantic annotation for nexthop
* (recursive, multipath, etc.)
* @param family: Address family which the change concerns
*/
static void
_netlink_route_debug(
int cmd,
struct prefix *p,
struct nexthop *nexthop,
const char *routedesc,
int family,
struct zebra_vrf *zvrf)
{
if (IS_ZEBRA_DEBUG_KERNEL)
{
char buf[PREFIX_STRLEN];
zlog_debug ("netlink_route_multipath() (%s): %s %s vrf %u type %s",
routedesc,
lookup (nlmsg_str, cmd),
prefix2str (p, buf, sizeof(buf)),
zvrf->vrf_id,
nexthop_type_to_str (nexthop->type));
}
}
/* Routing table change via netlink interface. */
static int
netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib)
{
int bytelen;
struct sockaddr_nl snl;
struct nexthop *nexthop = NULL, *tnexthop;
int recursing;
int nexthop_num;
int discard;
int family = PREFIX_FAMILY(p);
const char *routedesc;
struct
{
struct nlmsghdr n;
struct rtmsg r;
char buf[NL_PKT_BUF_SIZE];
} req;
struct zebra_vrf *zvrf = vrf_info_lookup (rib->vrf_id);
memset (&req, 0, sizeof req - NL_PKT_BUF_SIZE);
bytelen = (family == AF_INET ? 4 : 16);
req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
req.n.nlmsg_type = cmd;
req.r.rtm_family = family;
req.r.rtm_table = rib->table;
req.r.rtm_dst_len = p->prefixlen;
req.r.rtm_protocol = RTPROT_ZEBRA;
req.r.rtm_scope = RT_SCOPE_UNIVERSE;
if ((rib->flags & ZEBRA_FLAG_BLACKHOLE) || (rib->flags & ZEBRA_FLAG_REJECT))
discard = 1;
else
discard = 0;
if (cmd == RTM_NEWROUTE)
{
if (discard)
{
if (rib->flags & ZEBRA_FLAG_BLACKHOLE)
req.r.rtm_type = RTN_BLACKHOLE;
else if (rib->flags & ZEBRA_FLAG_REJECT)
req.r.rtm_type = RTN_UNREACHABLE;
else
assert (RTN_BLACKHOLE != RTN_UNREACHABLE); /* false */
}
else
req.r.rtm_type = RTN_UNICAST;
}
addattr_l (&req.n, sizeof req, RTA_DST, &p->u.prefix, bytelen);
/* Metric. */
addattr32 (&req.n, sizeof req, RTA_PRIORITY, rib->metric);
if (rib->mtu || rib->nexthop_mtu)
{
char buf[NL_PKT_BUF_SIZE];
struct rtattr *rta = (void *) buf;
u_int32_t mtu = rib->mtu;
if (!mtu || (rib->nexthop_mtu && rib->nexthop_mtu < mtu))
mtu = rib->nexthop_mtu;
rta->rta_type = RTA_METRICS;
rta->rta_len = RTA_LENGTH(0);
rta_addattr_l (rta, NL_PKT_BUF_SIZE, RTAX_MTU, &mtu, sizeof mtu);
addattr_l (&req.n, NL_PKT_BUF_SIZE, RTA_METRICS, RTA_DATA (rta),
RTA_PAYLOAD (rta));
}
if (discard)
{
if (cmd == RTM_NEWROUTE)
for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
{
/* We shouldn't encounter recursive nexthops on discard routes,
* but it is probably better to handle that case correctly anyway.
*/
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
}
goto skip;
}
/* Count overall nexthops so we can decide whether to use singlepath
* or multipath case. */
nexthop_num = 0;
for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
{
if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
if (cmd == RTM_NEWROUTE && !CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
continue;
if (cmd == RTM_DELROUTE && !CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
continue;
nexthop_num++;
}
/* Singlepath case. */
if (nexthop_num == 1 || MULTIPATH_NUM == 1)
{
nexthop_num = 0;
for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
{
if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
if ((cmd == RTM_NEWROUTE
&& CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
|| (cmd == RTM_DELROUTE
&& CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
{
routedesc = recursing ? "recursive, 1 hop" : "single hop";
_netlink_route_debug(cmd, p, nexthop, routedesc, family, zvrf);
_netlink_route_build_singlepath(routedesc, bytelen,
nexthop, &req.n, &req.r,
sizeof req);
if (cmd == RTM_NEWROUTE)
SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
nexthop_num++;
break;
}
}
}
else
{
char buf[NL_PKT_BUF_SIZE];
struct rtattr *rta = (void *) buf;
struct rtnexthop *rtnh;
union g_addr *src = NULL;
rta->rta_type = RTA_MULTIPATH;
rta->rta_len = RTA_LENGTH (0);
rtnh = RTA_DATA (rta);
nexthop_num = 0;
for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
{
if (MULTIPATH_NUM != 0 && nexthop_num >= MULTIPATH_NUM)
break;
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
if ((cmd == RTM_NEWROUTE
&& CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
|| (cmd == RTM_DELROUTE
&& CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
{
routedesc = recursing ? "recursive, multihop" : "multihop";
nexthop_num++;
_netlink_route_debug(cmd, p, nexthop,
routedesc, family, zvrf);
_netlink_route_build_multipath(routedesc, bytelen,
nexthop, rta, rtnh, &src);
rtnh = RTNH_NEXT (rtnh);
if (cmd == RTM_NEWROUTE)
SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
}
}
if (src)
addattr_l (&req.n, sizeof req, RTA_PREFSRC, &src->ipv4, bytelen);
if (rta->rta_len > RTA_LENGTH (0))
addattr_l (&req.n, NL_PKT_BUF_SIZE, RTA_MULTIPATH, RTA_DATA (rta),
RTA_PAYLOAD (rta));
}
/* If there is no useful nexthop then return. */
if (nexthop_num == 0)
{
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("netlink_route_multipath(): No useful nexthop.");
return 0;
}
skip:
/* Destination netlink address. */
memset (&snl, 0, sizeof snl);
snl.nl_family = AF_NETLINK;
/* Talk to netlink socket. */
return netlink_talk (&req.n, &zvrf->netlink_cmd, zvrf);
}
int
kernel_route_rib (struct prefix *p, struct rib *old, struct rib *new)
{
int ret;
if (!old && new)
return netlink_route_multipath (RTM_NEWROUTE, p, new);
if (old && !new)
return netlink_route_multipath (RTM_DELROUTE, p, old);
/* Replace, can be done atomically if metric does not change;
* netlink uses [prefix, tos, priority] to identify prefix */
if (old->metric == new->metric)
return netlink_route_multipath (RTM_NEWROUTE, p, new);
/* Add + delete so the prefix does not disappear temporarily */
ret = netlink_route_multipath (RTM_NEWROUTE, p, new);
if (netlink_route_multipath (RTM_DELROUTE, p, old) < 0)
ret = -1;
return ret;
}
/* Interface address modification. */
static int
netlink_address (int cmd, int family, struct interface *ifp,
struct connected *ifc)
{
int bytelen;
struct prefix *p;
struct
{
struct nlmsghdr n;
struct ifaddrmsg ifa;
char buf[NL_PKT_BUF_SIZE];
} req;
struct zebra_vrf *zvrf = vrf_info_lookup (ifp->vrf_id);
p = ifc->address;
memset (&req, 0, sizeof req - NL_PKT_BUF_SIZE);
bytelen = (family == AF_INET ? 4 : 16);
req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifaddrmsg));
req.n.nlmsg_flags = NLM_F_REQUEST;
req.n.nlmsg_type = cmd;
req.ifa.ifa_family = family;
req.ifa.ifa_index = ifp->ifindex;
req.ifa.ifa_prefixlen = p->prefixlen;
addattr_l (&req.n, sizeof req, IFA_LOCAL, &p->u.prefix, bytelen);
if (family == AF_INET && cmd == RTM_NEWADDR)
{
if (!CONNECTED_PEER(ifc) && ifc->destination)
{
p = ifc->destination;
addattr_l (&req.n, sizeof req, IFA_BROADCAST, &p->u.prefix,
bytelen);
}
}
if (CHECK_FLAG (ifc->flags, ZEBRA_IFA_SECONDARY))
SET_FLAG (req.ifa.ifa_flags, IFA_F_SECONDARY);
if (ifc->label)
addattr_l (&req.n, sizeof req, IFA_LABEL, ifc->label,
strlen (ifc->label) + 1);
return netlink_talk (&req.n, &zvrf->netlink_cmd, zvrf);
}
int
kernel_address_add_ipv4 (struct interface *ifp, struct connected *ifc)
{
return netlink_address (RTM_NEWADDR, AF_INET, ifp, ifc);
}
int
kernel_address_delete_ipv4 (struct interface *ifp, struct connected *ifc)
{
return netlink_address (RTM_DELADDR, AF_INET, ifp, ifc);
}
extern struct thread_master *master;
/* Kernel route reflection. */
static int
kernel_read (struct thread *thread)
{
struct zebra_vrf *zvrf = (struct zebra_vrf *)THREAD_ARG (thread);
netlink_parse_info (netlink_information_fetch, &zvrf->netlink, zvrf);
zvrf->t_netlink = thread_add_read (zebrad.master, kernel_read, zvrf,
zvrf->netlink.sock);
return 0;
}
/* Filter out messages from self that occur on listener socket,
caused by our actions on the command socket
*/
static void netlink_install_filter (int sock, __u32 pid)
{
struct sock_filter filter[] = {
/* 0: ldh [4] */
BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)),
/* 1: jeq 0x18 jt 3 jf 6 */
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 1, 0),
/* 2: jeq 0x19 jt 3 jf 6 */
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 0, 3),
/* 3: ldw [12] */
BPF_STMT(BPF_LD|BPF_ABS|BPF_W, offsetof(struct nlmsghdr, nlmsg_pid)),
/* 4: jeq XX jt 5 jf 6 */
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htonl(pid), 0, 1),
/* 5: ret 0 (skip) */
BPF_STMT(BPF_RET|BPF_K, 0),
/* 6: ret 0xffff (keep) */
BPF_STMT(BPF_RET|BPF_K, 0xffff),
};
struct sock_fprog prog = {
.len = array_size(filter),
.filter = filter,
};
if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0)
zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno));
}
/* Exported interface function. This function simply calls
netlink_socket (). */
void
kernel_init (struct zebra_vrf *zvrf)
{
unsigned long groups;
groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR;
#ifdef HAVE_IPV6
groups |= RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR;
#endif /* HAVE_IPV6 */
netlink_socket (&zvrf->netlink, groups, zvrf->vrf_id);
netlink_socket (&zvrf->netlink_cmd, 0, zvrf->vrf_id);
/* Register kernel socket. */
if (zvrf->netlink.sock > 0)
{
/* Only want non-blocking on the netlink event socket */
if (fcntl (zvrf->netlink.sock, F_SETFL, O_NONBLOCK) < 0)
zlog_err ("Can't set %s socket flags: %s", zvrf->netlink.name,
safe_strerror (errno));
/* Set receive buffer size if it's set from command line */
if (nl_rcvbufsize)
netlink_recvbuf (&zvrf->netlink, nl_rcvbufsize);
netlink_install_filter (zvrf->netlink.sock, zvrf->netlink_cmd.snl.nl_pid);
zvrf->t_netlink = thread_add_read (zebrad.master, kernel_read, zvrf,
zvrf->netlink.sock);
}
}
void
kernel_terminate (struct zebra_vrf *zvrf)
{
THREAD_READ_OFF (zvrf->t_netlink);
if (zvrf->netlink.sock >= 0)
{
close (zvrf->netlink.sock);
zvrf->netlink.sock = -1;
}
if (zvrf->netlink_cmd.sock >= 0)
{
close (zvrf->netlink_cmd.sock);
zvrf->netlink_cmd.sock = -1;
}
}
/*
* nl_msg_type_to_str
*/
const char *
nl_msg_type_to_str (uint16_t msg_type)
{
return lookup (nlmsg_str, msg_type);
}
/*
* nl_rtproto_to_str
*/
const char *
nl_rtproto_to_str (u_char rtproto)
{
return lookup (rtproto_str, rtproto);
}