vlib: don't leak node frames on refork

Free node frames in worker mains on refork. Otherwise these frames are
never returned to free pool and it causes massive memory leaks if
performed under traffic load

Type: fix
Signed-off-by: Dmitry Valter <d-valter@yandex-team.ru>
Change-Id: I15cbf024a3f4b4082445fd5e5aaa10bfcf77f363
This commit is contained in:
Dmitry Valter
2022-09-05 15:30:18 +00:00
committed by Ole Tr�an
parent 10672be9e2
commit 9f5b36926b
10 changed files with 129 additions and 16 deletions

View File

@ -338,8 +338,7 @@ vrrp_adv_send (vrrp_vr_t * vr, int shutdown)
if (-1 == vrrp_adv_l3_build (vr, b, dst))
{
vlib_frame_free (vm, vlib_node_get_runtime (vm, node_index),
to_frame);
vlib_frame_free (vm, to_frame);
vlib_buffer_free (vm, bi, n_buffers);
return -1;
}

View File

@ -236,7 +236,7 @@ process_drop_punt (vlib_main_t * vm,
/* If there is no punt function, free the frame as well. */
if (disposition == ERROR_DISPOSITION_PUNT && !vm->os_punt_frame)
vlib_frame_free (vm, node, frame);
vlib_frame_free (vm, frame);
}
else
vm->os_punt_frame (vm, node, frame);

View File

@ -106,6 +106,7 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
f->vector_offset = to_node->vector_offset;
f->aux_offset = to_node->aux_offset;
f->flags = 0;
f->frame_size_index = to_node->frame_size_index;
fs->n_alloc_frames += 1;
@ -186,17 +187,15 @@ vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f)
/* Free given frame. */
void
vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
vlib_frame_free (vlib_main_t *vm, vlib_frame_t *f)
{
vlib_node_main_t *nm = &vm->node_main;
vlib_node_t *node;
vlib_frame_size_t *fs;
ASSERT (vm == vlib_get_main ());
ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
node = vlib_get_node (vm, r->node_index);
fs = vec_elt_at_index (nm->frame_sizes, node->frame_size_index);
fs = vec_elt_at_index (nm->frame_sizes, f->frame_size_index);
ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
@ -1171,7 +1170,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
/* The node has gained a frame, implying packets from the current frame
were re-queued to this same node. we don't need the saved one
anymore */
vlib_frame_free (vm, n, f);
vlib_frame_free (vm, f);
}
}
else
@ -1179,7 +1178,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
if (f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
{
ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH));
vlib_frame_free (vm, n, f);
vlib_frame_free (vm, f);
}
}

View File

@ -577,7 +577,7 @@ null_node_fn (vlib_main_t * vm,
vlib_node_increment_counter (vm, node->node_index, 0, n_vectors);
vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_vectors);
vlib_frame_free (vm, node, frame);
vlib_frame_free (vm, frame);
return n_vectors;
}

View File

@ -389,6 +389,9 @@ typedef struct vlib_frame_t
/* Number of vector elements currently in frame. */
u16 n_vectors;
/* Index of frame size corresponding to allocated node. */
u16 frame_size_index;
/* Scalar and vector arguments to next node. */
u8 arguments[0];
} vlib_frame_t;

View File

@ -1256,8 +1256,7 @@ vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index)
return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
}
void
vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f);
void vlib_frame_free (vlib_main_t *vm, vlib_frame_t *f);
/* Return the edge index if present, ~0 otherwise */
uword vlib_node_get_next (vlib_main_t * vm, uword node, uword next_node);

View File

@ -913,6 +913,17 @@ vlib_worker_thread_node_refork (void)
vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
vm_clone->error_main.counters_last_clear = old_counters_all_clear;
for (j = 0; j < vec_len (nm_clone->next_frames); j++)
{
vlib_next_frame_t *nf = &nm_clone->next_frames[j];
if ((nf->flags & VLIB_FRAME_IS_ALLOCATED) && nf->frame != NULL)
{
vlib_frame_t *f = nf->frame;
nf->frame = NULL;
vlib_frame_free (vm_clone, f);
}
}
vec_free (nm_clone->next_frames);
nm_clone->next_frames = vec_dup_aligned (nm->next_frames,
CLIB_CACHE_LINE_BYTES);

View File

@ -484,8 +484,7 @@ flow_report_process_send (vlib_main_t *vm, flow_report_main_t *frm,
vlib_put_frame_to_node (vm, next_node, nf);
else
{
vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, next_node);
vlib_frame_free (vm, rt, nf);
vlib_frame_free (vm, nf);
}
}
}

View File

@ -912,7 +912,7 @@ tuntap_punt_frame (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
tuntap_tx (vm, node, frame);
vlib_frame_free (vm, node, frame);
vlib_frame_free (vm, frame);
}
/**
@ -930,7 +930,7 @@ tuntap_nopunt_frame (vlib_main_t * vm,
u32 *buffers = vlib_frame_vector_args (frame);
uword n_packets = frame->n_vectors;
vlib_buffer_free (vm, buffers, n_packets);
vlib_frame_free (vm, node, frame);
vlib_frame_free (vm, frame);
}
/* *INDENT-OFF* */

View File

@ -7,6 +7,9 @@ import signal
from config import config
from framework import VppTestCase, VppTestRunner
from vpp_ip_route import VppIpTable, VppIpRoute, VppRoutePath
from scapy.layers.inet import IP, ICMP
from scapy.layers.l2 import Ether
from scapy.packet import Raw
@unittest.skipUnless(config.gcov, "part of code coverage tests")
@ -220,5 +223,105 @@ class TestVlib(VppTestCase):
self.logger.info(cmd + " FAIL retval " + str(r.retval))
class TestVlibFrameLeak(VppTestCase):
"""Vlib Frame Leak Test Cases"""
vpp_worker_count = 1
@classmethod
def setUpClass(cls):
super(TestVlibFrameLeak, cls).setUpClass()
@classmethod
def tearDownClass(cls):
super(TestVlibFrameLeak, cls).tearDownClass()
def setUp(self):
super(TestVlibFrameLeak, self).setUp()
# create 1 pg interface
self.create_pg_interfaces(range(1))
for i in self.pg_interfaces:
i.admin_up()
i.config_ip4()
i.resolve_arp()
def tearDown(self):
super(TestVlibFrameLeak, self).tearDown()
for i in self.pg_interfaces:
i.unconfig_ip4()
i.admin_down()
def test_vlib_mw_refork_frame_leak(self):
"""Vlib worker thread refork leak test case"""
icmp_id = 0xB
icmp_seq = 5
icmp_load = b"\x0a" * 18
pkt = (
Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
/ IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4)
/ ICMP(id=icmp_id, seq=icmp_seq)
/ Raw(load=icmp_load)
)
# Send a packet
self.pg0.add_stream(pkt)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
rx = self.pg0.get_capture(1)
self.assertEquals(len(rx), 1)
rx = rx[0]
ether = rx[Ether]
ipv4 = rx[IP]
self.assertEqual(ether.src, self.pg0.local_mac)
self.assertEqual(ether.dst, self.pg0.remote_mac)
self.assertEqual(ipv4.src, self.pg0.local_ip4)
self.assertEqual(ipv4.dst, self.pg0.remote_ip4)
# Save allocated frame count
frame_allocated = {}
for fs in self.vapi.cli("show vlib frame-allocation").splitlines()[1:]:
spl = fs.split()
thread = int(spl[0])
size = int(spl[1])
alloc = int(spl[2])
key = (thread, size)
frame_allocated[key] = alloc
# cause reforks
_ = self.create_loopback_interfaces(1)
# send the same packet
self.pg0.add_stream(pkt)
self.pg_enable_capture(self.pg_interfaces)
self.pg_start()
rx = self.pg0.get_capture(1)
self.assertEquals(len(rx), 1)
rx = rx[0]
ether = rx[Ether]
ipv4 = rx[IP]
self.assertEqual(ether.src, self.pg0.local_mac)
self.assertEqual(ether.dst, self.pg0.remote_mac)
self.assertEqual(ipv4.src, self.pg0.local_ip4)
self.assertEqual(ipv4.dst, self.pg0.remote_ip4)
# Check that no frame were leaked during refork
for fs in self.vapi.cli("show vlib frame-allocation").splitlines()[1:]:
spl = fs.split()
thread = int(spl[0])
size = int(spl[1])
alloc = int(spl[2])
key = (thread, size)
self.assertEqual(frame_allocated[key], alloc)
if __name__ == "__main__":
unittest.main(testRunner=VppTestRunner)