Skip to content

Commit bf837e8

Browse files
committed
Merge branch 'nexthop-refactor-and-fix-nexthop-selection-for-multipath-routes'
Benjamin Poirier says: ==================== nexthop: Refactor and fix nexthop selection for multipath routes In order to select a nexthop for multipath routes, fib_select_multipath() is used with legacy nexthops and nexthop_select_path_hthr() is used with nexthop objects. Those two functions perform a validity test on the neighbor related to each nexthop but their logic is structured differently. This causes a divergence in behavior and nexthop_select_path_hthr() may return a nexthop that failed the neighbor validity test even if there was one that passed. Refactor nexthop_select_path_hthr() to make it more similar to fib_select_multipath() and fix the problem mentioned above. v1: https://lore.kernel.org/netdev/20230529201914.69828-1-bpoirier@nvidia.com/ ==================== Link: https://lore.kernel.org/r/20230719-nh_select-v2-0-04383e89f868@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 022add1 + c7e95bb commit bf837e8

2 files changed

Lines changed: 170 additions & 18 deletions

File tree

net/ipv4/nexthop.c

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
11521152
return !!(state & NUD_VALID);
11531153
}
11541154

1155-
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
1155+
static bool nexthop_is_good_nh(const struct nexthop *nh)
1156+
{
1157+
struct nh_info *nhi = rcu_dereference(nh->nh_info);
1158+
1159+
switch (nhi->family) {
1160+
case AF_INET:
1161+
return ipv4_good_nh(&nhi->fib_nh);
1162+
case AF_INET6:
1163+
return ipv6_good_nh(&nhi->fib6_nh);
1164+
}
1165+
1166+
return false;
1167+
}
1168+
1169+
static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
11561170
{
1157-
struct nexthop *rc = NULL;
11581171
int i;
11591172

1160-
for (i = 0; i < nhg->num_nh; ++i) {
1173+
for (i = 0; i < nhg->num_nh; i++) {
11611174
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1162-
struct nh_info *nhi;
11631175

11641176
if (hash > atomic_read(&nhge->hthr.upper_bound))
11651177
continue;
11661178

1167-
nhi = rcu_dereference(nhge->nh->nh_info);
1168-
if (nhi->fdb_nh)
1169-
return nhge->nh;
1179+
return nhge->nh;
1180+
}
1181+
1182+
WARN_ON_ONCE(1);
1183+
return NULL;
1184+
}
1185+
1186+
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
1187+
{
1188+
struct nexthop *rc = NULL;
1189+
int i;
1190+
1191+
if (nhg->fdb_nh)
1192+
return nexthop_select_path_fdb(nhg, hash);
1193+
1194+
for (i = 0; i < nhg->num_nh; ++i) {
1195+
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
11701196

11711197
/* nexthops always check if it is good and does
11721198
* not rely on a sysctl for this behavior
11731199
*/
1174-
switch (nhi->family) {
1175-
case AF_INET:
1176-
if (ipv4_good_nh(&nhi->fib_nh))
1177-
return nhge->nh;
1178-
break;
1179-
case AF_INET6:
1180-
if (ipv6_good_nh(&nhi->fib6_nh))
1181-
return nhge->nh;
1182-
break;
1183-
}
1200+
if (!nexthop_is_good_nh(nhge->nh))
1201+
continue;
11841202

11851203
if (!rc)
11861204
rc = nhge->nh;
1205+
1206+
if (hash > atomic_read(&nhge->hthr.upper_bound))
1207+
continue;
1208+
1209+
return nhge->nh;
11871210
}
11881211

1189-
return rc;
1212+
return rc ? : nhg->nh_entries[0].nh;
11901213
}
11911214

11921215
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)

tools/testing/selftests/net/fib_nexthops.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ IPV4_TESTS="
2929
ipv4_large_res_grp
3030
ipv4_compat_mode
3131
ipv4_fdb_grp_fcnal
32+
ipv4_mpath_select
3233
ipv4_torture
3334
ipv4_res_torture
3435
"
@@ -42,6 +43,7 @@ IPV6_TESTS="
4243
ipv6_large_res_grp
4344
ipv6_compat_mode
4445
ipv6_fdb_grp_fcnal
46+
ipv6_mpath_select
4547
ipv6_torture
4648
ipv6_res_torture
4749
"
@@ -370,6 +372,27 @@ check_large_res_grp()
370372
log_test $? 0 "Dump large (x$buckets) nexthop buckets"
371373
}
372374

375+
get_route_dev()
376+
{
377+
local pfx="$1"
378+
local out
379+
380+
if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
381+
echo "$out"
382+
fi
383+
}
384+
385+
check_route_dev()
386+
{
387+
local pfx="$1"
388+
local expected="$2"
389+
local out
390+
391+
out=$(get_route_dev "$pfx")
392+
393+
check_output "$out" "$expected"
394+
}
395+
373396
start_ip_monitor()
374397
{
375398
local mtype=$1
@@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal()
575598
$IP link del dev vx10
576599
}
577600

601+
ipv4_mpath_select()
602+
{
603+
local rc dev match h addr
604+
605+
echo
606+
echo "IPv4 multipath selection"
607+
echo "------------------------"
608+
if [ ! -x "$(command -v jq)" ]; then
609+
echo "SKIP: Could not run test; need jq tool"
610+
return $ksft_skip
611+
fi
612+
613+
# Use status of existing neighbor entry when determining nexthop for
614+
# multipath routes.
615+
local -A gws
616+
gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
617+
local -A other_dev
618+
other_dev=([veth1]=veth3 [veth3]=veth1)
619+
620+
run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
621+
run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
622+
run_cmd "$IP nexthop add id 1001 group 1/2"
623+
run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
624+
rc=0
625+
for dev in veth1 veth3; do
626+
match=0
627+
for h in {1..254}; do
628+
addr="172.16.101.$h"
629+
if [ "$(get_route_dev "$addr")" = "$dev" ]; then
630+
match=1
631+
break
632+
fi
633+
done
634+
if (( match == 0 )); then
635+
echo "SKIP: Did not find a route using device $dev"
636+
return $ksft_skip
637+
fi
638+
run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
639+
if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
640+
rc=1
641+
break
642+
fi
643+
run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
644+
done
645+
log_test $rc 0 "Use valid neighbor during multipath selection"
646+
647+
run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
648+
run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
649+
run_cmd "$IP route get 172.16.101.1"
650+
# if we did not crash, success
651+
log_test $rc 0 "Multipath selection with no valid neighbor"
652+
}
653+
654+
ipv6_mpath_select()
655+
{
656+
local rc dev match h addr
657+
658+
echo
659+
echo "IPv6 multipath selection"
660+
echo "------------------------"
661+
if [ ! -x "$(command -v jq)" ]; then
662+
echo "SKIP: Could not run test; need jq tool"
663+
return $ksft_skip
664+
fi
665+
666+
# Use status of existing neighbor entry when determining nexthop for
667+
# multipath routes.
668+
local -A gws
669+
gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
670+
local -A other_dev
671+
other_dev=([veth1]=veth3 [veth3]=veth1)
672+
673+
run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
674+
run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
675+
run_cmd "$IP nexthop add id 1001 group 1/2"
676+
run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
677+
rc=0
678+
for dev in veth1 veth3; do
679+
match=0
680+
for h in {1..65535}; do
681+
addr=$(printf "2001:db8:101::%x" $h)
682+
if [ "$(get_route_dev "$addr")" = "$dev" ]; then
683+
match=1
684+
break
685+
fi
686+
done
687+
if (( match == 0 )); then
688+
echo "SKIP: Did not find a route using device $dev"
689+
return $ksft_skip
690+
fi
691+
run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
692+
if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
693+
rc=1
694+
break
695+
fi
696+
run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
697+
done
698+
log_test $rc 0 "Use valid neighbor during multipath selection"
699+
700+
run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
701+
run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
702+
run_cmd "$IP route get 2001:db8:101::1"
703+
# if we did not crash, success
704+
log_test $rc 0 "Multipath selection with no valid neighbor"
705+
}
706+
578707
################################################################################
579708
# basic operations (add, delete, replace) on nexthops and nexthop groups
580709
#

0 commit comments

Comments
 (0)