dpdk: use vpp physmem allocator for dpdk buffers

This allows us to have single contignuous allocation for DPDK buffers
with single mmap FD, so buffer memory can be easily shared with diffrent
process.

As a consequence dpdk socket-mem is no longer in charge for allocating
buffer memory, but still we need some space allocated for dpdk
structures so default socket-mem is reduced form 256 to 64 MB.

For a default of 16K buffers per numa node, physmem allocation is now
40MB, so basically this change reduces footprint from 256MB per socket
to 48 (64 + 40).

Change-Id: Ic8cfe83930a18411545b37a12b14aac89affd04f
Signed-off-by: Damjan Marion <damarion@cisco.com>
Signed-off-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2017-07-20 18:10:35 +02:00
committed by Neale Ranns
parent 7b7ba572ab
commit 206243c1b7
2 changed files with 144 additions and 63 deletions
+66 -19
View File
@@ -409,13 +409,26 @@ dpdk_packet_template_init (vlib_main_t * vm,
vlib_worker_thread_barrier_release (vm);
}
typedef struct
{
/* must be first */
struct rte_pktmbuf_pool_private mbp_priv;
vlib_physmem_region_index_t region_index;
} dpdk_mempool_private_t;
clib_error_t *
dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
unsigned socket_id)
{
dpdk_main_t *dm = &dpdk_main;
struct rte_mempool *rmp;
int i;
dpdk_mempool_private_t priv;
vlib_physmem_region_t *pr;
vlib_physmem_region_index_t pri;
u8 *pool_name;
unsigned elt_size;
u32 size;
i32 i, ret;
vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
@@ -423,29 +436,64 @@ dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
if (dm->pktmbuf_pools[socket_id])
return 0;
u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
pool_name = format (0, "dpdk_mbuf_pool_socket%u%c", socket_id, 0);
rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */
num_mbufs, /* number of mbufs */
512, /* cache size */
VLIB_BUFFER_HDR_SIZE, /* priv size */
VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */
socket_id); /* cpu socket */
elt_size = sizeof (struct rte_mbuf) +
VLIB_BUFFER_HDR_SIZE /* priv size */ +
VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE; /*data room size */
size = rte_mempool_xmem_size (num_mbufs, elt_size, 21);
clib_error_t *error = 0;
error =
vlib_physmem_region_alloc (vm, (char *) pool_name, size, socket_id,
VLIB_PHYSMEM_F_HAVE_BUFFERS, &pri);
if (error)
clib_error_report (error);
pr = vlib_physmem_get_region (vm, pri);
priv.mbp_priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE +
VLIB_BUFFER_DATA_SIZE;
priv.mbp_priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE;
#if 0
/* Check that pg_shift parameter is valid. */
if (pg_shift > MEMPOOL_PG_SHIFT_MAX)
{
rte_errno = EINVAL;
return NULL;
}
#endif
rmp = rte_mempool_create_empty ((char *) pool_name, /* pool name */
num_mbufs, /* number of mbufs */
elt_size, 512, /* cache size */
sizeof (dpdk_mempool_private_t), /* private data size */
socket_id, 0); /* flags */
if (rmp)
{
{
struct rte_mempool_memhdr *memhdr;
rte_mempool_set_ops_byname (rmp, RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
vlib_buffer_add_mem_range (vm, (uword) memhdr->addr, memhdr->len);
}
if (rmp)
/* call the mempool priv initializer */
rte_pktmbuf_pool_init (rmp, &priv);
ret = rte_mempool_populate_phys_tab (rmp, pr->mem, pr->page_table,
pr->n_pages, pr->log2_page_size,
NULL, NULL);
if (ret == (i32) rmp->size)
{
/* call the object initializers */
rte_mempool_obj_iter (rmp, rte_pktmbuf_init, 0);
dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp);
privp->region_index = pri;
dm->pktmbuf_pools[socket_id] = rmp;
vec_free (pool_name);
return 0;
}
rte_mempool_free (rmp);
}
vec_free (pool_name);
@@ -455,10 +503,9 @@ dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
{
if (dm->pktmbuf_pools[i])
{
clib_warning
("WARNING: Failed to allocate mempool for CPU socket %u. "
"Threads running on socket %u will use socket %u mempool.",
socket_id, socket_id, i);
clib_warning ("WARNING: Failed to allocate mempool for CPU socket "
"%u. Threads running on socket %u will use socket %u "
"mempool.", socket_id, socket_id, i);
dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i];
return 0;
}
+78 -44
View File
@@ -24,6 +24,8 @@
#include <dpdk/device/dpdk.h>
#include <vlib/pci/pci.h>
#include <rte_ring.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -137,6 +139,60 @@ dpdk_device_lock_init (dpdk_device_t * xd)
}
}
static struct rte_mempool_ops *
get_ops_by_name (i8 * ops_name)
{
u32 i;
for (i = 0; i < rte_mempool_ops_table.num_ops; i++)
{
if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name))
return &rte_mempool_ops_table.ops[i];
}
return 0;
}
static int
dpdk_ring_alloc (struct rte_mempool *mp)
{
u32 rg_flags = 0, count;
i32 ret;
i8 rg_name[RTE_RING_NAMESIZE];
struct rte_ring *r;
ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name);
if (ret < 0 || ret >= (i32) sizeof (rg_name))
return -ENAMETOOLONG;
/* ring flags */
if (mp->flags & MEMPOOL_F_SP_PUT)
rg_flags |= RING_F_SP_ENQ;
if (mp->flags & MEMPOOL_F_SC_GET)
rg_flags |= RING_F_SC_DEQ;
count = rte_align32pow2 (mp->size + 1);
/*
* Allocate the ring that will be used to store objects.
* Ring functions will return appropriate errors if we are
* running as a secondary process etc., so no checks made
* in this function for that condition.
*/
/* XXX can we get memory from the right socket? */
r = clib_mem_alloc_aligned (rte_ring_get_memsize (count),
CLIB_CACHE_LINE_BYTES);
/* XXX rte_ring_lookup will not work */
ret = rte_ring_init (r, rg_name, count, rg_flags);
if (ret)
return ret;
mp->pool_data = r;
return 0;
}
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
@@ -420,10 +476,6 @@ dpdk_lib_init (dpdk_main_t * dm)
xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
break;
case VNET_DPDK_PMD_VHOST_ETHER:
xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
break;
default:
xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
}
@@ -987,9 +1039,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
u32 x, *mem_by_socket = 0;
uword c = 0;
u8 use_1g = 1;
u8 use_2m = 1;
u8 less_than_1g = 1;
int rv;
umount ((char *) huge_dir_path);
@@ -1011,9 +1060,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
break;
vec_add1 (mem_by_socket, x);
if (x > 1023)
less_than_1g = 0;
}
/* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
unformat_free (&in);
@@ -1025,39 +1071,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
{
vec_validate(mem_by_socket, c);
mem_by_socket[c] = 256; /* default per-socket mem */
mem_by_socket[c] = 64; /* default per-socket mem */
}
));
/* *INDENT-ON* */
}
/* check if available enough 1GB pages for each socket */
/* *INDENT-OFF* */
clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
{
int pages_avail, page_size, mem;
clib_error_t *e = 0;
clib_error_t *e;
vec_validate(mem_by_socket, c);
mem = mem_by_socket[c];
page_size = 1024;
e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
use_1g = 0;
e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2);
if (e)
clib_error_free (e);
page_size = 2;
e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail);
if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem)
use_2m = 0;
if (e)
clib_error_free (e);
clib_error_report (e);
}));
/* *INDENT-ON* */
@@ -1082,19 +1111,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
goto done;
}
if (use_1g && !(less_than_1g && use_2m))
{
rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0,
"pagesize=1G");
}
else if (use_2m)
{
rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
}
else
{
return clib_error_return (0, "not enough free huge pages");
}
rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL);
if (rv)
{
@@ -1229,6 +1246,23 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
fprintf (stdout, "DPDK physical memory layout:\n");
rte_dump_physmem_layout (stdout);
/* set custom ring memory allocator */
{
struct rte_mempool_ops *ops = NULL;
ops = get_ops_by_name ("ring_sp_sc");
ops->alloc = dpdk_ring_alloc;
ops = get_ops_by_name ("ring_mp_sc");
ops->alloc = dpdk_ring_alloc;
ops = get_ops_by_name ("ring_sp_mc");
ops->alloc = dpdk_ring_alloc;
ops = get_ops_by_name ("ring_mp_mc");
ops->alloc = dpdk_ring_alloc;
}
/* main thread 1st */
error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
if (error)