nat: fix nat44-ed port range with multiple workers

The number of available dynamic ports is set to (0xffff - 1024) =
64511, which is not divisable by the pow2 number of workers - the
only integer divisors are 31 and 2081.
So, total dynamic port range of all workers will be less than it:
    1 wrk: n = (port_per_thread = 64511/1)*1 = 64511 + 1025 = 65536
    2 wrk: n = (port_per_thread = 64511/2)*2 = 64510 + 1025 = 65535
    4 wrk: n = (port_per_thread = 64511/4)*4 = 64508 + 1025 = 65533
    8 wrk: n = (port_per_thread = 64511/8)*8 = 64504 + 1025 = 65529
    ...
As seen, with multiple workers there are unused trailing ports for every
nat pool address and that is the reason of out-of-bound index in the
worker array on out2in path due (port - 1024) / port_per_thread math.
This was fixed in 5c9f9968de63fa627b4a72b344df36cdc686d18a, so packets
to unused ports will go to existing worker and dropped there.

Per RFC 6335 https://www.rfc-editor.org/rfc/rfc6335#section-6:
6.  Port Number Ranges
   o  the System Ports, also known as the Well Known Ports, from 0-1023
      (assigned by IANA)
   o  the User Ports, also known as the Registered Ports, from 1024-
      49151 (assigned by IANA)
   o  the Dynamic Ports, also known as the Private or Ephemeral Ports,
      from 49152-65535 (never assigned)

According that let's allocate dynamic ports from 1024 and have full port
range with a wide range of the workers number - 64 integer divisors in
total, including pow2 ones:
    1 wrk: n = (port_per_thread = 64512/1)*1 = 64512 + 1024 = 65536
    2 wrk: n = (port_per_thread = 64512/2)*2 = 64512 + 1024 = 65536
    3 wrk: n = (port_per_thread = 64512/3)*3 = 64512 + 1024 = 65536
    4 wrk: n = (port_per_thread = 64512/4)*4 = 64512 + 1024 = 65536
    5 wrk: n = (port_per_thread = 64512/5)*5 = 64510 + 1024 = 65534
    6 wrk: n = (port_per_thread = 64512/6)*6 = 64512 + 1024 = 65536
    7 wrk: n = (port_per_thread = 64512/7)*7 = 64512 + 1024 = 65536
    8 wrk: n = (port_per_thread = 64512/8)*8 = 64512 + 1024 = 65536
    ...
Modulo from 5c9f9968de63fa627b4a72b344df36cdc686d18a is still required
when the numbers of workers is not the integer divisor of 64512.

Type: fix
Fixes: 5c9f9968de63fa627b4a72b344df36cdc686d18a
Change-Id: I9edaea07e58ff4888812b0d86cbf41a3784b189e
Signed-off-by: Vladislav Grishenko <themiron@yandex-team.ru>
This commit is contained in:
Vladislav Grishenko 2022-08-19 20:42:22 +05:00 committed by Beno�t Ganne
parent b3778cce70
commit 5f694d1ecf
5 changed files with 207 additions and 9 deletions

View File

@ -762,8 +762,8 @@ get_thread_idx_by_port (u16 e_port)
if (sm->num_workers > 1)
{
thread_idx = sm->first_worker_index +
sm->workers[(e_port - 1024) / sm->port_per_thread %
_vec_len (sm->workers)];
sm->workers[(e_port - ED_USER_PORT_OFFSET) /
sm->port_per_thread % _vec_len (sm->workers)];
}
return thread_idx;
}
@ -2133,7 +2133,7 @@ snat_set_workers (uword * bitmap)
j++;
}
sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
sm->port_per_thread = (65536 - ED_USER_PORT_OFFSET) / _vec_len (sm->workers);
return 0;
}
@ -2384,7 +2384,7 @@ nat_init (vlib_main_t * vm)
}
}
num_threads = tm->n_vlib_mains - 1;
sm->port_per_thread = 0xffff - 1024;
sm->port_per_thread = 65536 - ED_USER_PORT_OFFSET;
vec_validate (sm->per_thread_data, num_threads);
/* Use all available workers by default */

View File

@ -41,6 +41,11 @@
* as if there were no free ports available to conserve resources */
#define ED_PORT_ALLOC_ATTEMPTS (10)
/* system ports range is 0-1023, first user port is 1024 per
* https://www.rfc-editor.org/rfc/rfc6335#section-6
*/
#define ED_USER_PORT_OFFSET 1024
/* NAT buffer flags */
#define SNAT_FLAG_HAIRPINNING (1 << 0)

View File

@ -102,14 +102,15 @@ nat_ed_alloc_addr_and_port_with_snat_address (
u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
ip4_address_t *outside_addr, u16 *outside_port)
{
const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
const u16 port_thread_offset =
(port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
s->o2i.match.daddr = a->addr;
/* first try port suggested by caller */
u16 port = clib_net_to_host_u16 (*outside_port);
u16 port_offset = port - port_thread_offset;
if (port <= port_thread_offset ||
port > port_thread_offset + port_per_thread)
if (port < port_thread_offset ||
port >= port_thread_offset + port_per_thread)
{
/* need to pick a different port, suggested port doesn't fit in
* this thread's port range */

View File

@ -279,7 +279,8 @@ nat44_ed_alloc_i2o_port (snat_main_t *sm, snat_address_t *a, snat_session_t *s,
for (int i = 0; i < ED_PORT_ALLOC_ATTEMPTS; ++i)
{
portnum = (sm->port_per_thread * snat_thread_index) +
snat_random_port (0, sm->port_per_thread - 1) + 1024;
snat_random_port (0, sm->port_per_thread - 1) +
ED_USER_PORT_OFFSET;
portnum = clib_host_to_net_u16 (portnum);
nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, a->addr,
portnum, i2o_fib_index, proto);

View File

@ -71,7 +71,7 @@ class TestNAT44ED(VppTestCase):
@staticmethod
def random_port():
return randint(1025, 65535)
return randint(1024, 65535)
@staticmethod
def proto2layer(proto):
@ -2358,6 +2358,197 @@ class TestNAT44ED(VppTestCase):
% (p_sent[IP].src, p_recvd[IP].src, a),
)
def test_dynamic_edge_ports(self):
"""NAT44ED dynamic translation test: edge ports"""
worker_count = self.vpp_worker_count or 1
port_offset = 1024
port_per_thread = (65536 - port_offset) // worker_count
port_count = port_per_thread * worker_count
# worker thread edge ports
thread_edge_ports = {0, port_offset - 1, 65535}
for i in range(0, worker_count):
port_thread_offset = (port_per_thread * i) + port_offset
for port_range_offset in [0, port_per_thread - 1]:
port = port_thread_offset + port_range_offset
thread_edge_ports.add(port)
thread_drop_ports = set(
filter(
lambda x: x not in range(port_offset, port_offset + port_count),
thread_edge_ports,
)
)
in_if = self.pg7
out_if = self.pg8
self.nat_add_address(self.nat_addr)
try:
self.configure_ip4_interface(in_if, hosts=worker_count)
self.configure_ip4_interface(out_if)
self.nat_add_inside_interface(in_if)
self.nat_add_outside_interface(out_if)
# in2out
tc1 = self.statistics["/nat44-ed/in2out/slowpath/tcp"]
uc1 = self.statistics["/nat44-ed/in2out/slowpath/udp"]
ic1 = self.statistics["/nat44-ed/in2out/slowpath/icmp"]
dc1 = self.statistics["/nat44-ed/in2out/slowpath/drops"]
pkt_count = worker_count * len(thread_edge_ports)
i2o_pkts = [[] for x in range(0, worker_count)]
for i in range(0, worker_count):
remote_host = in_if.remote_hosts[i]
for port in thread_edge_ports:
p = (
Ether(dst=in_if.local_mac, src=in_if.remote_mac)
/ IP(src=remote_host.ip4, dst=out_if.remote_ip4)
/ TCP(sport=port, dport=port)
)
i2o_pkts[i].append(p)
p = (
Ether(dst=in_if.local_mac, src=in_if.remote_mac)
/ IP(src=remote_host.ip4, dst=out_if.remote_ip4)
/ UDP(sport=port, dport=port)
)
i2o_pkts[i].append(p)
p = (
Ether(dst=in_if.local_mac, src=in_if.remote_mac)
/ IP(src=remote_host.ip4, dst=out_if.remote_ip4)
/ ICMP(id=port, seq=port, type="echo-request")
)
i2o_pkts[i].append(p)
for i in range(0, worker_count):
if len(i2o_pkts[i]) > 0:
in_if.add_stream(i2o_pkts[i], worker=i)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
capture = out_if.get_capture(pkt_count * 3)
for packet in capture:
self.assert_packet_checksums_valid(packet)
if packet.haslayer(TCP):
self.assert_in_range(
packet[TCP].sport,
port_offset,
port_offset + port_count,
"src TCP port",
)
elif packet.haslayer(UDP):
self.assert_in_range(
packet[UDP].sport,
port_offset,
port_offset + port_count,
"src UDP port",
)
elif packet.haslayer(ICMP):
self.assert_in_range(
packet[ICMP].id,
port_offset,
port_offset + port_count,
"ICMP id",
)
else:
self.fail(
ppp("Unexpected or invalid packet (outside network):", packet)
)
if_idx = in_if.sw_if_index
tc2 = self.statistics["/nat44-ed/in2out/slowpath/tcp"]
uc2 = self.statistics["/nat44-ed/in2out/slowpath/udp"]
ic2 = self.statistics["/nat44-ed/in2out/slowpath/icmp"]
dc2 = self.statistics["/nat44-ed/in2out/slowpath/drops"]
self.assertEqual(tc2[:, if_idx].sum() - tc1[:, if_idx].sum(), pkt_count)
self.assertEqual(uc2[:, if_idx].sum() - uc1[:, if_idx].sum(), pkt_count)
self.assertEqual(ic2[:, if_idx].sum() - ic1[:, if_idx].sum(), pkt_count)
self.assertEqual(dc2[:, if_idx].sum() - dc1[:, if_idx].sum(), 0)
# out2in
tc1 = self.statistics["/nat44-ed/out2in/fastpath/tcp"]
uc1 = self.statistics["/nat44-ed/out2in/fastpath/udp"]
ic1 = self.statistics["/nat44-ed/out2in/fastpath/icmp"]
dc1 = self.statistics["/nat44-ed/out2in/fastpath/drops"]
dc3 = self.statistics["/nat44-ed/out2in/slowpath/drops"]
# replies to unchanged thread ports should pass on each worker,
# excluding packets outside dynamic port range
drop_count = worker_count * len(thread_drop_ports)
pass_count = worker_count * len(thread_edge_ports) - drop_count
o2i_pkts = [[] for x in range(0, worker_count)]
for i in range(0, worker_count):
for port in thread_edge_ports:
p = (
Ether(dst=out_if.local_mac, src=out_if.remote_mac)
/ IP(src=out_if.remote_ip4, dst=self.nat_addr)
/ TCP(sport=port, dport=port)
)
o2i_pkts[i].append(p)
p = (
Ether(dst=out_if.local_mac, src=out_if.remote_mac)
/ IP(src=out_if.remote_ip4, dst=self.nat_addr)
/ UDP(sport=port, dport=port)
)
o2i_pkts[i].append(p)
p = (
Ether(dst=out_if.local_mac, src=out_if.remote_mac)
/ IP(src=out_if.remote_ip4, dst=self.nat_addr)
/ ICMP(id=port, seq=port, type="echo-reply")
)
o2i_pkts[i].append(p)
for i in range(0, worker_count):
if len(o2i_pkts[i]) > 0:
out_if.add_stream(o2i_pkts[i], worker=i)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
capture = in_if.get_capture(pass_count * 3)
for packet in capture:
self.assert_packet_checksums_valid(packet)
if packet.haslayer(TCP):
self.assertIn(packet[TCP].dport, thread_edge_ports, "dst TCP port")
self.assertEqual(packet[TCP].dport, packet[TCP].sport, "TCP ports")
elif packet.haslayer(UDP):
self.assertIn(packet[UDP].dport, thread_edge_ports, "dst UDP port")
self.assertEqual(packet[UDP].dport, packet[UDP].sport, "UDP ports")
elif packet.haslayer(ICMP):
self.assertIn(packet[ICMP].id, thread_edge_ports, "ICMP id")
self.assertEqual(packet[ICMP].id, packet[ICMP].seq, "ICMP id & seq")
else:
self.fail(
ppp("Unexpected or invalid packet (inside network):", packet)
)
if_idx = out_if.sw_if_index
tc2 = self.statistics["/nat44-ed/out2in/fastpath/tcp"]
uc2 = self.statistics["/nat44-ed/out2in/fastpath/udp"]
ic2 = self.statistics["/nat44-ed/out2in/fastpath/icmp"]
dc2 = self.statistics["/nat44-ed/out2in/fastpath/drops"]
dc4 = self.statistics["/nat44-ed/out2in/slowpath/drops"]
self.assertEqual(tc2[:, if_idx].sum() - tc1[:, if_idx].sum(), pass_count)
self.assertEqual(uc2[:, if_idx].sum() - uc1[:, if_idx].sum(), pass_count)
self.assertEqual(ic2[:, if_idx].sum() - ic1[:, if_idx].sum(), pass_count)
self.assertEqual(dc2[:, if_idx].sum() - dc1[:, if_idx].sum(), 0)
self.assertEqual(
dc4[:, if_idx].sum() - dc3[:, if_idx].sum(), drop_count * 3
)
finally:
in_if.unconfig()
out_if.unconfig()
class TestNAT44EDMW(TestNAT44ED):
"""NAT44ED MW Test Case"""