ChetanGaonker | 901727c | 2016-11-29 14:05:03 -0800 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | # This code should (try to) follow Google's Shell Style Guide |
| 3 | # (https://google-styleguide.googlecode.com/svn/trunk/shell.xml) |
| 4 | set -e |
| 5 | |
| 6 | case "$1" in |
| 7 | --wait) |
| 8 | WAIT=1 |
| 9 | ;; |
| 10 | esac |
| 11 | |
| 12 | IFNAME=$1 |
| 13 | |
| 14 | # default value set further down if not set here |
| 15 | CONTAINER_IFNAME= |
| 16 | if [ "$2" = "-i" ]; then |
| 17 | CONTAINER_IFNAME=$3 |
| 18 | shift 2 |
| 19 | fi |
| 20 | |
| 21 | if [ "$2" = "-l" ]; then |
| 22 | LOCAL_IFNAME=$3 |
| 23 | shift 2 |
| 24 | fi |
| 25 | |
| 26 | GUESTNAME=$2 |
| 27 | IPADDR=$3 |
| 28 | MACADDR=$4 |
| 29 | |
| 30 | case "$MACADDR" in |
| 31 | *@*) |
| 32 | VLAN="${MACADDR#*@}" |
| 33 | VLAN="${VLAN%%@*}" |
| 34 | MACADDR="${MACADDR%%@*}" |
| 35 | ;; |
| 36 | *) |
| 37 | VLAN= |
| 38 | ;; |
| 39 | esac |
| 40 | |
| 41 | # did they ask to generate a custom MACADDR? |
| 42 | # generate the unique string |
| 43 | case "$MACADDR" in |
| 44 | U:*) |
| 45 | macunique="${MACADDR#*:}" |
| 46 | # now generate a 48-bit hash string from $macunique |
| 47 | MACADDR=$(echo $macunique|md5sum|sed 's/^\(..\)\(..\)\(..\)\(..\)\(..\).*$/02:\1:\2:\3:\4:\5/') |
| 48 | ;; |
| 49 | esac |
| 50 | |
| 51 | |
| 52 | [ "$IPADDR" ] || [ "$WAIT" ] || { |
| 53 | echo "Syntax:" |
| 54 | echo "pipework <hostinterface> [-i containerinterface] [-l localinterfacename] <guest> <ipaddr>/<subnet>[@default_gateway] [macaddr][@vlan]" |
| 55 | echo "pipework <hostinterface> [-i containerinterface] [-l localinterfacename] <guest> dhcp [macaddr][@vlan]" |
| 56 | echo "pipework route <guest> <route_command>" |
| 57 | echo "pipework --wait [-i containerinterface]" |
| 58 | exit 1 |
| 59 | } |
| 60 | |
| 61 | # Succeed if the given utility is installed. Fail otherwise. |
| 62 | # For explanations about `which` vs `type` vs `command`, see: |
| 63 | # http://stackoverflow.com/questions/592620/check-if-a-program-exists-from-a-bash-script/677212#677212 |
| 64 | # (Thanks to @chenhanxiao for pointing this out!) |
| 65 | installed () { |
| 66 | command -v "$1" >/dev/null 2>&1 |
| 67 | } |
| 68 | |
| 69 | # Google Styleguide says error messages should go to standard error. |
| 70 | warn () { |
| 71 | echo "$@" >&2 |
| 72 | } |
| 73 | die () { |
| 74 | status="$1" |
| 75 | shift |
| 76 | warn "$@" |
| 77 | exit "$status" |
| 78 | } |
| 79 | |
| 80 | # First step: determine type of first argument (bridge, physical interface...), |
| 81 | # Unless "--wait" is set (then skip the whole section) |
| 82 | if [ -z "$WAIT" ]; then |
| 83 | if [ -d "/sys/class/net/$IFNAME" ] |
| 84 | then |
| 85 | if [ -d "/sys/class/net/$IFNAME/bridge" ]; then |
| 86 | IFTYPE=bridge |
| 87 | BRTYPE=linux |
| 88 | elif installed ovs-vsctl && ovs-vsctl list-br|grep -q "^${IFNAME}$"; then |
| 89 | IFTYPE=bridge |
| 90 | BRTYPE=openvswitch |
| 91 | elif [ "$(cat "/sys/class/net/$IFNAME/type")" -eq 32 ]; then # InfiniBand IPoIB interface type 32 |
| 92 | IFTYPE=ipoib |
| 93 | # The IPoIB kernel module is fussy, set device name to ib0 if not overridden |
| 94 | CONTAINER_IFNAME=${CONTAINER_IFNAME:-ib0} |
| 95 | PKEY=$VLAN |
| 96 | else IFTYPE=phys |
| 97 | fi |
| 98 | else |
| 99 | case "$IFNAME" in |
| 100 | br*) |
| 101 | IFTYPE=bridge |
| 102 | BRTYPE=linux |
| 103 | ;; |
| 104 | ovs*) |
| 105 | if ! installed ovs-vsctl; then |
| 106 | die 1 "Need OVS installed on the system to create an ovs bridge" |
| 107 | fi |
| 108 | IFTYPE=bridge |
| 109 | BRTYPE=openvswitch |
| 110 | ;; |
| 111 | route*) |
| 112 | IFTYPE=route |
| 113 | ;; |
| 114 | dummy*) |
| 115 | IFTYPE=dummy |
| 116 | ;; |
| 117 | *) die 1 "I do not know how to setup interface $IFNAME." ;; |
| 118 | esac |
| 119 | fi |
| 120 | fi |
| 121 | |
| 122 | # Set the default container interface name to eth1 if not already set |
| 123 | CONTAINER_IFNAME=${CONTAINER_IFNAME:-eth1} |
| 124 | |
| 125 | [ "$WAIT" ] && { |
| 126 | while true; do |
| 127 | # This first method works even without `ip` or `ifconfig` installed, |
| 128 | # but doesn't work on older kernels (e.g. CentOS 6.X). See #128. |
| 129 | grep -q '^1$' "/sys/class/net/$CONTAINER_IFNAME/carrier" && break |
| 130 | # This method hopefully works on those older kernels. |
| 131 | ip link ls dev "$CONTAINER_IFNAME" && break |
| 132 | sleep 1 |
| 133 | done > /dev/null 2>&1 |
| 134 | exit 0 |
| 135 | } |
| 136 | |
| 137 | [ "$IFTYPE" = bridge ] && [ "$BRTYPE" = linux ] && [ "$VLAN" ] && { |
| 138 | die 1 "VLAN configuration currently unsupported for Linux bridge." |
| 139 | } |
| 140 | |
| 141 | [ "$IFTYPE" = ipoib ] && [ "$MACADDR" ] && { |
| 142 | die 1 "MACADDR configuration unsupported for IPoIB interfaces." |
| 143 | } |
| 144 | |
| 145 | # Second step: find the guest (for now, we only support LXC containers) |
| 146 | while read _ mnt fstype options _; do |
| 147 | [ "$fstype" != "cgroup" ] && continue |
| 148 | echo "$options" | grep -qw devices || continue |
| 149 | CGROUPMNT=$mnt |
| 150 | done < /proc/mounts |
| 151 | |
| 152 | [ "$CGROUPMNT" ] || { |
| 153 | die 1 "Could not locate cgroup mount point." |
| 154 | } |
| 155 | |
| 156 | # Try to find a cgroup matching exactly the provided name. |
| 157 | N=$(find "$CGROUPMNT" -name "$GUESTNAME" | wc -l) |
| 158 | case "$N" in |
| 159 | 0) |
| 160 | # If we didn't find anything, try to lookup the container with Docker. |
| 161 | if installed docker; then |
| 162 | RETRIES=3 |
| 163 | while [ "$RETRIES" -gt 0 ]; do |
| 164 | DOCKERPID=$(docker inspect --format='{{ .State.Pid }}' "$GUESTNAME") |
| 165 | [ "$DOCKERPID" != 0 ] && break |
| 166 | sleep 1 |
| 167 | RETRIES=$((RETRIES - 1)) |
| 168 | done |
| 169 | |
| 170 | [ "$DOCKERPID" = 0 ] && { |
| 171 | die 1 "Docker inspect returned invalid PID 0" |
| 172 | } |
| 173 | |
| 174 | [ "$DOCKERPID" = "<no value>" ] && { |
| 175 | die 1 "Container $GUESTNAME not found, and unknown to Docker." |
| 176 | } |
| 177 | else |
| 178 | die 1 "Container $GUESTNAME not found, and Docker not installed." |
| 179 | fi |
| 180 | ;; |
| 181 | 1) true ;; |
| 182 | *) die 1 "Found more than one container matching $GUESTNAME." ;; |
| 183 | esac |
| 184 | |
| 185 | # only check IPADDR if we are not in a route mode |
| 186 | [ "$IFTYPE" != route ] && { |
| 187 | case "$IPADDR" in |
| 188 | # Let's check first if the user asked for DHCP allocation. |
| 189 | dhcp|dhcp:*) |
| 190 | # Use Docker-specific strategy to run the DHCP client |
| 191 | # from the busybox image, in the network namespace of |
| 192 | # the container. |
| 193 | if ! [ "$DOCKERPID" ]; then |
| 194 | warn "You asked for a Docker-specific DHCP method." |
| 195 | warn "However, $GUESTNAME doesn't seem to be a Docker container." |
| 196 | warn "Try to replace 'dhcp' with another option?" |
| 197 | die 1 "Aborting." |
| 198 | fi |
| 199 | DHCP_CLIENT=${IPADDR%%:*} |
| 200 | ;; |
| 201 | udhcpc|udhcpc:*|udhcpc-f|udhcpc-f:*|dhcpcd|dhcpcd:*|dhclient|dhclient:*|dhclient-f|dhclient-f:*) |
| 202 | DHCP_CLIENT=${IPADDR%%:*} |
| 203 | # did they ask for the client to remain? |
| 204 | DHCP_FOREGROUND= |
| 205 | [ "${DHCP_CLIENT: -2}" = '-f' ] && { |
| 206 | DHCP_FOREGROUND=true |
| 207 | } |
| 208 | DHCP_CLIENT=${DHCP_CLIENT%-f} |
| 209 | if ! installed "$DHCP_CLIENT"; then |
| 210 | die 1 "You asked for DHCP client $DHCP_CLIENT, but I can't find it." |
| 211 | fi |
| 212 | ;; |
| 213 | # Alright, no DHCP? Then let's see if we have a subnet *and* gateway. |
| 214 | */*@*) |
| 215 | GATEWAY="${IPADDR#*@}" GATEWAY="${GATEWAY%%@*}" |
| 216 | IPADDR="${IPADDR%%@*}" |
| 217 | ;; |
| 218 | # No gateway? We need at least a subnet, anyway! |
| 219 | */*) : ;; |
| 220 | # ... No? Then stop right here. |
| 221 | *) |
| 222 | warn "The IP address should include a netmask." |
| 223 | die 1 "Maybe you meant $IPADDR/24 ?" |
| 224 | ;; |
| 225 | esac |
| 226 | } |
| 227 | |
| 228 | # If a DHCP method was specified, extract the DHCP options. |
| 229 | if [ "$DHCP_CLIENT" ]; then |
| 230 | case "$IPADDR" in |
| 231 | *:*) DHCP_OPTIONS="${IPADDR#*:}" ;; |
| 232 | esac |
| 233 | fi |
| 234 | |
| 235 | if [ "$DOCKERPID" ]; then |
| 236 | NSPID=$DOCKERPID |
| 237 | else |
| 238 | NSPID=$(head -n 1 "$(find "$CGROUPMNT" -name "$GUESTNAME" | head -n 1)/tasks") |
| 239 | [ "$NSPID" ] || { |
| 240 | # it is an alternative way to get the pid |
| 241 | NSPID=$(lxc-info -n "$GUESTNAME" | grep PID | grep -Eo '[0-9]+') |
| 242 | [ "$NSPID" ] || { |
| 243 | die 1 "Could not find a process inside container $GUESTNAME." |
| 244 | } |
| 245 | } |
| 246 | fi |
| 247 | |
| 248 | # Check if an incompatible VLAN device already exists |
| 249 | [ "$IFTYPE" = phys ] && [ "$VLAN" ] && [ -d "/sys/class/net/$IFNAME.VLAN" ] && { |
| 250 | ip -d link show "$IFNAME.$VLAN" | grep -q "vlan.*id $VLAN" || { |
| 251 | die 1 "$IFNAME.VLAN already exists but is not a VLAN device for tag $VLAN" |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | [ ! -d /var/run/netns ] && mkdir -p /var/run/netns |
| 256 | rm -f "/var/run/netns/$NSPID" |
| 257 | ln -s "/proc/$NSPID/ns/net" "/var/run/netns/$NSPID" |
| 258 | |
| 259 | # Check if we need to create a bridge. |
| 260 | [ "$IFTYPE" = bridge ] && [ ! -d "/sys/class/net/$IFNAME" ] && { |
| 261 | [ "$BRTYPE" = linux ] && { |
| 262 | (ip link add dev "$IFNAME" type bridge > /dev/null 2>&1) || (brctl addbr "$IFNAME") |
| 263 | ip link set "$IFNAME" up |
| 264 | } |
| 265 | [ "$BRTYPE" = openvswitch ] && { |
| 266 | ovs-vsctl add-br "$IFNAME" |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | [ "$IFTYPE" != "route" ] && [ "$IFTYPE" != "dummy" ] && MTU=$(ip link show "$IFNAME" | awk '{print $5}') |
| 271 | |
| 272 | # If it's a bridge, we need to create a veth pair |
| 273 | [ "$IFTYPE" = bridge ] && { |
| 274 | if [ -z "$LOCAL_IFNAME" ]; then |
| 275 | LOCAL_IFNAME="v${CONTAINER_IFNAME}pl${NSPID}" |
| 276 | fi |
| 277 | GUEST_IFNAME="v${CONTAINER_IFNAME}pg${NSPID}" |
| 278 | # Does the link already exist? |
| 279 | if ip link show "$LOCAL_IFNAME" >/dev/null 2>&1; then |
| 280 | # link exists, is it in use? |
| 281 | if ip link show "$LOCAL_IFNAME" up | grep -q "UP"; then |
| 282 | echo "Link $LOCAL_IFNAME exists and is up" |
| 283 | exit 1 |
| 284 | fi |
| 285 | # delete the link so we can re-add it afterwards |
| 286 | ip link del "$LOCAL_IFNAME" |
| 287 | fi |
| 288 | ip link add name "$LOCAL_IFNAME" mtu "$MTU" type veth peer name "$GUEST_IFNAME" mtu "$MTU" |
| 289 | case "$BRTYPE" in |
| 290 | linux) |
| 291 | (ip link set "$LOCAL_IFNAME" master "$IFNAME" > /dev/null 2>&1) || (brctl addif "$IFNAME" "$LOCAL_IFNAME") |
| 292 | ;; |
| 293 | openvswitch) |
| 294 | if ! ovs-vsctl list-ports "$IFNAME" | grep -q "^${LOCAL_IFNAME}$"; then |
| 295 | ovs-vsctl add-port "$IFNAME" "$LOCAL_IFNAME" ${VLAN:+tag="$VLAN"} |
| 296 | fi |
| 297 | ;; |
| 298 | esac |
| 299 | ip link set "$LOCAL_IFNAME" up |
| 300 | } |
| 301 | |
| 302 | # If it's a physical interface, create a macvlan subinterface |
| 303 | [ "$IFTYPE" = phys ] && { |
| 304 | [ "$VLAN" ] && { |
| 305 | [ ! -d "/sys/class/net/${IFNAME}.${VLAN}" ] && { |
| 306 | ip link add link "$IFNAME" name "$IFNAME.$VLAN" mtu "$MTU" type vlan id "$VLAN" |
| 307 | } |
| 308 | ip link set "$IFNAME" up |
| 309 | IFNAME=$IFNAME.$VLAN |
| 310 | } |
| 311 | GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME |
| 312 | ip link add link "$IFNAME" dev "$GUEST_IFNAME" mtu "$MTU" type macvlan mode bridge |
| 313 | ip link set "$IFNAME" up |
| 314 | } |
| 315 | |
| 316 | # If it's an IPoIB interface, create a virtual IPoIB interface (the IPoIB |
| 317 | # equivalent of a macvlan device) |
| 318 | # |
| 319 | # Note: no macvlan subinterface nor Ethernet bridge can be created on top of an |
| 320 | # IPoIB interface. InfiniBand is not Ethernet. IPoIB is an IP layer on top of |
| 321 | # InfiniBand, without an intermediate Ethernet layer. |
| 322 | [ "$IFTYPE" = ipoib ] && { |
| 323 | GUEST_IFNAME="${IFNAME}.${NSPID}" |
| 324 | |
| 325 | # If a partition key is provided, use it |
| 326 | [ "$PKEY" ] && { |
| 327 | GUEST_IFNAME="${IFNAME}.${PKEY}.${NSPID}" |
| 328 | PKEY="pkey 0x$PKEY" |
| 329 | } |
| 330 | |
| 331 | ip link add link "$IFNAME" name "$GUEST_IFNAME" type ipoib $PKEY |
| 332 | ip link set "$IFNAME" up |
| 333 | } |
| 334 | |
| 335 | # If its a dummy interface, create a dummy interface. |
| 336 | [ "$IFTYPE" = dummy ] && { |
| 337 | GUEST_IFNAME=du$NSPID$CONTAINER_IFNAME |
| 338 | ip link add dev "$GUEST_IFNAME" type dummy |
| 339 | } |
| 340 | |
| 341 | # If the `route` command was specified ... |
| 342 | if [ "$IFTYPE" = route ]; then |
| 343 | # ... discard the first two arguments and pass the rest to the route command. |
| 344 | shift 2 |
| 345 | ip netns exec "$NSPID" ip route "$@" |
| 346 | else |
| 347 | # Otherwise, run normally. |
| 348 | ip link set "$GUEST_IFNAME" netns "$NSPID" |
| 349 | ip netns exec "$NSPID" ip link set "$GUEST_IFNAME" name "$CONTAINER_IFNAME" |
| 350 | [ "$MACADDR" ] && ip netns exec "$NSPID" ip link set dev "$CONTAINER_IFNAME" address "$MACADDR" |
| 351 | |
| 352 | # When using any of the DHCP methods, we start a DHCP client in the |
| 353 | # network namespace of the container. With the 'dhcp' method, the |
| 354 | # client used is taken from the Docker busybox image (therefore |
| 355 | # requiring no specific client installed on the host). Other methods |
| 356 | # use a locally installed client. |
| 357 | case "$DHCP_CLIENT" in |
| 358 | dhcp) |
| 359 | docker run -d --net container:$GUESTNAME --cap-add NET_ADMIN \ |
| 360 | busybox udhcpc -i "$CONTAINER_IFNAME" -x "hostname:$GUESTNAME" \ |
| 361 | $DHCP_OPTIONS \ |
| 362 | >/dev/null |
| 363 | ;; |
| 364 | udhcpc) |
| 365 | DHCP_Q="-q" |
| 366 | [ "$DHCP_FOREGROUND" ] && { |
| 367 | DHCP_OPTIONS="$DHCP_OPTIONS -f" |
| 368 | } |
| 369 | ip netns exec "$NSPID" "$DHCP_CLIENT" -qi "$CONTAINER_IFNAME" \ |
| 370 | -x "hostname:$GUESTNAME" \ |
| 371 | -p "/var/run/udhcpc.$GUESTNAME.pid" \ |
| 372 | $DHCP_OPTIONS |
| 373 | [ ! "$DHCP_FOREGROUND" ] && { |
| 374 | rm "/var/run/udhcpc.$GUESTNAME.pid" |
| 375 | } |
| 376 | ;; |
| 377 | dhclient) |
| 378 | ip netns exec "$NSPID" "$DHCP_CLIENT" "$CONTAINER_IFNAME" \ |
| 379 | -pf "/var/run/dhclient.$GUESTNAME.pid" \ |
| 380 | -lf "/etc/dhclient/dhclient.$GUESTNAME.leases" \ |
| 381 | $DHCP_OPTIONS |
| 382 | # kill dhclient after get ip address to prevent device be used after container close |
| 383 | [ ! "$DHCP_FOREGROUND" ] && { |
| 384 | kill "$(cat "/var/run/dhclient.$GUESTNAME.pid")" |
| 385 | rm "/var/run/dhclient.$GUESTNAME.pid" |
| 386 | } |
| 387 | ;; |
| 388 | dhcpcd) |
| 389 | ip netns exec "$NSPID" "$DHCP_CLIENT" -q "$CONTAINER_IFNAME" -h "$GUESTNAME" |
| 390 | ;; |
| 391 | "") |
| 392 | if installed ipcalc; then |
| 393 | eval $(ipcalc -b $IPADDR) |
| 394 | ip netns exec "$NSPID" ip addr add "$IPADDR" brd "$BROADCAST" dev "$CONTAINER_IFNAME" |
| 395 | else |
| 396 | ip netns exec "$NSPID" ip addr add "$IPADDR" dev "$CONTAINER_IFNAME" |
| 397 | fi |
| 398 | |
| 399 | [ "$GATEWAY" ] && { |
| 400 | ip netns exec "$NSPID" ip route delete default >/dev/null 2>&1 && true |
| 401 | } |
| 402 | ip netns exec "$NSPID" ip link set "$CONTAINER_IFNAME" up |
| 403 | [ "$GATEWAY" ] && { |
| 404 | ip netns exec "$NSPID" ip route get "$GATEWAY" >/dev/null 2>&1 || \ |
| 405 | ip netns exec "$NSPID" ip route add "$GATEWAY/32" dev "$CONTAINER_IFNAME" |
| 406 | ip netns exec "$NSPID" ip route replace default via "$GATEWAY" |
| 407 | } |
| 408 | ;; |
| 409 | esac |
| 410 | |
| 411 | # Give our ARP neighbors a nudge about the new interface |
| 412 | if installed arping; then |
| 413 | IPADDR=$(echo "$IPADDR" | cut -d/ -f1) |
| 414 | ip netns exec "$NSPID" arping -c 1 -A -I "$CONTAINER_IFNAME" "$IPADDR" > /dev/null 2>&1 || true |
| 415 | else |
| 416 | echo "Warning: arping not found; interface may not be immediately reachable" |
| 417 | fi |
| 418 | fi |
| 419 | # Remove NSPID to avoid `ip netns` catch it. |
| 420 | rm -f "/var/run/netns/$NSPID" |
| 421 | |
| 422 | # vim: set tabstop=2 shiftwidth=2 softtabstop=2 expandtab : |