vppinfra: support main heap with different page sizes

Type: improvement
Change-Id: I381fc3dec8580208d0e24637d791af69011aa83b
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2020-09-11 22:16:53 +02:00
parent c63e2a4f98
commit 6bfd07670b
9 changed files with 501 additions and 87 deletions
+1 -1
View File
@@ -110,7 +110,7 @@ dpdk_early_init (vlib_main_t *vm)
/* check if pagemap is accessible - if we get zero result
dpdk will not be able to get physical memory address and game is over
unless we have IOMMU */
pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1);
pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1);
if (pt && pt[0])
goto check_hugetlb;
+42 -3
View File
@@ -742,6 +742,7 @@ show_memory_usage (vlib_main_t * vm,
clib_mem_main_t *mm = &clib_mem_main;
int verbose __attribute__ ((unused)) = 0;
int api_segment = 0, stats_segment = 0, main_heap = 0, numa_heaps = 0;
int map = 0;
clib_error_t *error;
u32 index = 0;
int i;
@@ -761,6 +762,8 @@ show_memory_usage (vlib_main_t * vm,
main_heap = 1;
else if (unformat (input, "numa-heaps"))
numa_heaps = 1;
else if (unformat (input, "map"))
map = 1;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -769,9 +772,10 @@ show_memory_usage (vlib_main_t * vm,
}
}
if ((api_segment + stats_segment + main_heap + numa_heaps) == 0)
if ((api_segment + stats_segment + main_heap + numa_heaps + map) == 0)
return clib_error_return
(0, "Need one of api-segment, stats-segment, main-heap or numa-heaps");
(0, "Need one of api-segment, stats-segment, main-heap, numa-heaps "
"or map");
if (api_segment)
{
@@ -871,6 +875,41 @@ show_memory_usage (vlib_main_t * vm,
mm->per_numa_mheaps[index], verbose);
}
}
if (map)
{
clib_mem_page_stats_t stats = { };
clib_mem_vm_map_hdr_t *hdr = 0;
u8 *s = 0;
int numa = -1;
s = format (s, "\n%-16s%7s%7s%7s",
"StartAddr", "size", "PageSz", "Pages");
while ((numa = vlib_mem_get_next_numa_node (numa)) != -1)
s = format (s, " Numa%u", numa);
s = format (s, " NotMap");
s = format (s, " Name");
vlib_cli_output (vm, "%v", s);
vec_reset_length (s);
while ((hdr = clib_mem_vm_get_next_map_hdr (hdr)))
{
clib_mem_get_page_stats ((void *) hdr->base_addr,
hdr->log2_page_sz, hdr->num_pages,
&stats);
s = format (s, "%016lx%7U%7U%7lu",
hdr->base_addr, format_memory_size,
hdr->num_pages << hdr->log2_page_sz,
format_log2_page_size, hdr->log2_page_sz,
hdr->num_pages);
while ((numa = vlib_mem_get_next_numa_node (numa)) != -1)
s = format (s, "%6lu", stats.per_numa[numa]);
s = format (s, "%7lu", stats.not_mapped);
s = format (s, " %s", hdr->name);
vlib_cli_output (vm, "%v", s);
vec_reset_length (s);
}
vec_free (s);
}
}
return 0;
}
@@ -879,7 +918,7 @@ show_memory_usage (vlib_main_t * vm,
VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
.path = "show memory",
.short_help = "show memory [api-segment][stats-segment][verbose]\n"
" [numa-heaps]",
" [numa-heaps][map]",
.function = show_memory_usage,
};
/* *INDENT-ON* */
+5 -3
View File
@@ -1132,8 +1132,10 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h,
if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && addr != 0)
flags |= MAP_FIXED;
*result = mmap (addr, size, PROT_READ | PROT_WRITE, flags, fd, offset);
if (*result == (void *) -1)
*result = clib_mem_vm_map_shared (addr, size, fd, offset,
"PCIe %U region %u", format_vlib_pci_addr,
vlib_pci_get_addr (vm, h), bar);
if (*result == CLIB_MEM_VM_MAP_FAILED)
{
error = clib_error_return_unix (0, "mmap `BAR%u'", bar);
if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && (fd != -1))
@@ -1337,7 +1339,7 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h)
{
if (res->size == 0)
continue;
munmap (res->addr, res->size);
clib_mem_vm_unmap (res->addr);
if (res->fd != -1)
close (res->fd);
}
+250 -27
View File
File diff suppressed because it is too large Load Diff
+50
View File
@@ -21,6 +21,56 @@
clib_mem_main_t clib_mem_main;
void *
clib_mem_vm_map (void *base, uword size, clib_mem_page_sz_t log2_page_sz,
char *fmt, ...)
{
va_list va;
void *rv;
u8 *s;
va_start (va, fmt);
s = va_format (0, fmt, &va);
vec_add1 (s, 0);
rv = clib_mem_vm_map_internal (base, log2_page_sz, size, -1, 0, (char *) s);
va_end (va);
vec_free (s);
return rv;
}
void *
clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_sz,
char *fmt, ...)
{
va_list va;
void *rv;
u8 *s;
va_start (va, fmt);
s = va_format (0, fmt, &va);
vec_add1 (s, 0);
rv = clib_mem_vm_map_internal (0, log2_page_sz, size, -1, 0, (char *) s);
va_end (va);
vec_free (s);
return rv;
}
void *
clib_mem_vm_map_shared (void *base, uword size, int fd, uword offset,
char *fmt, ...)
{
va_list va;
void *rv;
u8 *s;
va_start (va, fmt);
s = va_format (0, fmt, &va);
vec_add1 (s, 0);
rv = clib_mem_vm_map_internal (base, 0, size, fd, offset, (char *) s);
va_end (va);
vec_free (s);
return rv;
}
/*
* fd.io coding-style-patch-verification: ON
*
+115 -38
View File
@@ -52,7 +52,8 @@
#include <vppinfra/sanitizer.h>
#define CLIB_MAX_MHEAPS 256
#define CLIB_MAX_NUMAS 8
#define CLIB_MAX_NUMAS 16
#define CLIB_MEM_VM_MAP_FAILED ((void *) ~0)
typedef enum
{
@@ -71,6 +72,25 @@ typedef enum
CLIB_MEM_PAGE_SZ_16G = 34,
} clib_mem_page_sz_t;
typedef struct _clib_mem_vm_map_hdr
{
/* base address */
uword base_addr;
/* number of pages */
uword num_pages;
/* page size (log2) */
clib_mem_page_sz_t log2_page_sz;
/* allocation mame */
#define CLIB_VM_MAP_HDR_NAME_MAX_LEN 64
char name[CLIB_VM_MAP_HDR_NAME_MAX_LEN];
/* linked list */
struct _clib_mem_vm_map_hdr *prev, *next;
} clib_mem_vm_map_hdr_t;
typedef struct
{
/* log2 system page size */
@@ -87,6 +107,9 @@ typedef struct
/* per NUMA heaps */
void *per_numa_mheaps[CLIB_MAX_NUMAS];
/* memory maps */
clib_mem_vm_map_hdr_t *first_map, *last_map;
} clib_mem_main_t;
extern clib_mem_main_t clib_mem_main;
@@ -305,14 +328,14 @@ clib_mem_set_heap (void *heap)
void clib_mem_main_init ();
void *clib_mem_init (void *heap, uword size);
void *clib_mem_init_with_page_size (uword memory_size,
clib_mem_page_sz_t log2_page_sz);
void *clib_mem_init_thread_safe (void *memory, uword memory_size);
void *clib_mem_init_thread_safe_numa (void *memory, uword memory_size,
u8 numa);
void clib_mem_exit (void);
uword clib_mem_get_page_size (void);
void clib_mem_validate (void);
void clib_mem_trace (int enable);
@@ -374,39 +397,18 @@ clib_mem_vm_free (void *addr, uword size)
munmap (addr, size);
}
always_inline void *
clib_mem_vm_unmap (void *addr, uword size)
{
void *mmap_addr;
uword flags = MAP_PRIVATE | MAP_FIXED;
void *clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
uword size, int fd, uword offset, char *name);
/* To unmap we "map" with no protection. If we actually called
munmap then other callers could steal the address space. By
changing to PROT_NONE the kernel can free up the pages which is
really what we want "unmap" to mean. */
mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0);
if (mmap_addr == (void *) -1)
mmap_addr = 0;
else
CLIB_MEM_UNPOISON (mmap_addr, size);
return mmap_addr;
}
always_inline void *
clib_mem_vm_map (void *addr, uword size)
{
void *mmap_addr;
uword flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS;
mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0);
if (mmap_addr == (void *) -1)
mmap_addr = 0;
else
CLIB_MEM_UNPOISON (mmap_addr, size);
return mmap_addr;
}
void *clib_mem_vm_map (void *start, uword size,
clib_mem_page_sz_t log2_page_size, char *fmt, ...);
void *clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_size,
char *fmt, ...);
void *clib_mem_vm_map_shared (void *start, uword size, int fd, uword offset,
char *fmt, ...);
int clib_mem_vm_unmap (void *base);
clib_mem_vm_map_hdr_t *clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t *
hdr);
typedef struct
{
@@ -437,16 +439,36 @@ typedef struct
uword requested_va; /**< Request fixed position mapping */
} clib_mem_vm_alloc_t;
static_always_inline clib_mem_page_sz_t
clib_mem_get_log2_page_size (void)
{
return clib_mem_main.log2_page_sz;
}
static_always_inline uword
clib_mem_get_page_size (void)
{
return 1ULL << clib_mem_main.log2_page_sz;
}
static_always_inline clib_mem_page_sz_t
clib_mem_get_log2_default_hugepage_size ()
{
return clib_mem_main.log2_default_hugepage_sz;
}
clib_error_t *clib_mem_create_fd (char *name, int *fdp);
clib_error_t *clib_mem_create_hugetlb_fd (char *name, int *fdp);
clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a);
void clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a);
u64 clib_mem_get_fd_page_size (int fd);
uword clib_mem_get_fd_page_size (int fd);
uword clib_mem_get_default_hugepage_size (void);
int clib_mem_get_fd_log2_page_size (int fd);
clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd);
uword clib_mem_vm_reserve (uword start, uword size,
clib_mem_page_sz_t log2_page_sz);
u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages);
u64 *clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
int n_pages);
void clib_mem_destroy_mspace (void *mspace);
void clib_mem_destroy (void);
@@ -466,6 +488,61 @@ void mheap_trace (void *v, int enable);
uword clib_mem_trace_enable_disable (uword enable);
void clib_mem_trace (int enable);
always_inline uword
clib_mem_round_to_page_size (uword size, clib_mem_page_sz_t log2_page_size)
{
ASSERT (log2_page_size != CLIB_MEM_PAGE_SZ_UNKNOWN);
if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT)
log2_page_size = clib_mem_get_log2_page_size ();
else if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
log2_page_size = clib_mem_get_log2_default_hugepage_size ();
return round_pow2 (size, 1ULL << log2_page_size);
}
typedef struct
{
uword mapped;
uword not_mapped;
uword per_numa[CLIB_MAX_NUMAS];
uword unknown;
} clib_mem_page_stats_t;
void clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
uword n_pages, clib_mem_page_stats_t * stats);
static_always_inline int
vlib_mem_get_next_numa_node (int numa)
{
clib_mem_main_t *mm = &clib_mem_main;
u32 bitmap = mm->numa_node_bitmap;
if (numa >= 0)
bitmap &= ~pow2_mask (numa + 1);
if (bitmap == 0)
return -1;
return count_trailing_zeros (bitmap);
}
static_always_inline clib_mem_page_sz_t
clib_mem_log2_page_size_validate (clib_mem_page_sz_t log2_page_size)
{
if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT)
return clib_mem_get_log2_page_size ();
if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
return clib_mem_get_log2_default_hugepage_size ();
return log2_page_size;
}
static_always_inline uword
clib_mem_page_bytes (clib_mem_page_sz_t log2_page_size)
{
return 1 << clib_mem_log2_page_size_validate (log2_page_size);
}
#include <vppinfra/error.h> /* clib_panic */
#endif /* _included_clib_mem_h */
+28 -2
View File
@@ -197,7 +197,8 @@ mheap_trace_main_free (mheap_trace_main_t * tm)
/* Initialize CLIB heap based on memory/size given by user.
Set memory to 0 and CLIB will try to allocate its own heap. */
static void *
clib_mem_init_internal (void *memory, uword memory_size, int set_heap)
clib_mem_init_internal (void *memory, uword memory_size,
clib_mem_page_sz_t log2_page_sz, int set_heap)
{
u8 *heap;
@@ -209,7 +210,18 @@ clib_mem_init_internal (void *memory, uword memory_size, int set_heap)
mspace_disable_expand (heap);
}
else
heap = create_mspace (memory_size, 1 /* locked */ );
{
memory_size = round_pow2 (memory_size,
clib_mem_page_bytes (log2_page_sz));
memory = clib_mem_vm_map_internal (0, log2_page_sz, memory_size, -1, 0,
"main heap");
if (memory == CLIB_MEM_VM_MAP_FAILED)
return 0;
heap = create_mspace_with_base (memory, memory_size, 1 /* locked */ );
mspace_disable_expand (heap);
}
CLIB_MEM_POISON (mspace_least_addr (heap), mspace_footprint (heap));
@@ -226,6 +238,15 @@ void *
clib_mem_init (void *memory, uword memory_size)
{
return clib_mem_init_internal (memory, memory_size,
CLIB_MEM_PAGE_SZ_DEFAULT,
1 /* do clib_mem_set_heap */ );
}
void *
clib_mem_init_with_page_size (uword memory_size,
clib_mem_page_sz_t log2_page_sz)
{
return clib_mem_init_internal (0, memory_size, log2_page_sz,
1 /* do clib_mem_set_heap */ );
}
@@ -233,6 +254,7 @@ void *
clib_mem_init_thread_safe (void *memory, uword memory_size)
{
return clib_mem_init_internal (memory, memory_size,
CLIB_MEM_PAGE_SZ_DEFAULT,
1 /* do clib_mem_set_heap */ );
}
@@ -250,7 +272,10 @@ clib_mem_destroy_mspace (void *mspace)
void
clib_mem_destroy (void)
{
void *heap = clib_mem_get_heap ();
void *base = mspace_least_addr (heap);
clib_mem_destroy_mspace (clib_mem_get_heap ());
clib_mem_vm_unmap (base);
}
void *
@@ -270,6 +295,7 @@ clib_mem_init_thread_safe_numa (void *memory, uword memory_size, u8 numa)
}
heap = clib_mem_init_internal (memory, memory_size,
CLIB_MEM_PAGE_SZ_DEFAULT,
0 /* do NOT clib_mem_set_heap */ );
ASSERT (heap);
+9 -9
View File
@@ -70,11 +70,10 @@ clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size)
pagesize = clib_mem_get_default_hugepage_size ();
pm->def_log2_page_sz = min_log2 (pagesize);
pm->sys_log2_page_sz = min_log2 (sysconf (_SC_PAGESIZE));
pm->lookup_log2_page_sz = pm->def_log2_page_sz;
/* check if pagemap is accessible */
pt = clib_mem_vm_get_paddr (&pt, pm->sys_log2_page_sz, 1);
pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1);
if (pt == 0 || pt[0] == 0)
pm->flags |= CLIB_PMALLOC_F_NO_PAGEMAP;
@@ -223,12 +222,12 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
{
va = pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
pa = 0;
seek = (va >> pm->sys_log2_page_sz) * sizeof (pa);
seek = (va >> clib_mem_get_log2_page_size ()) * sizeof (pa);
if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek &&
read (fd, &pa, sizeof (pa)) == (sizeof (pa)) &&
pa & (1ULL << 63) /* page present bit */ )
{
pa = (pa & pow2_mask (55)) << pm->sys_log2_page_sz;
pa = (pa & pow2_mask (55)) << clib_mem_get_log2_page_size ();
}
pm->lookup_table[p] = va - pa;
p++;
@@ -258,7 +257,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
return 0;
}
if (a->log2_subpage_sz != pm->sys_log2_page_sz)
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_sysfs_prealloc_hugepages (numa_node,
a->log2_subpage_sz, n_pages);
@@ -289,7 +288,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
if (a->flags & CLIB_PMALLOC_ARENA_F_SHARED_MEM)
{
mmap_flags |= MAP_SHARED;
if (a->log2_subpage_sz != pm->sys_log2_page_sz)
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
pm->error = clib_mem_create_hugetlb_fd ((char *) a->name, &a->fd);
else
pm->error = clib_mem_create_fd ((char *) a->name, &a->fd);
@@ -300,7 +299,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
}
else
{
if (a->log2_subpage_sz != pm->sys_log2_page_sz)
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
mmap_flags |= MAP_HUGETLB;
mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
@@ -318,7 +317,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
goto error;
}
if (a->log2_subpage_sz != pm->sys_log2_page_sz && mlock (va, size) != 0)
if (a->log2_subpage_sz != clib_mem_get_log2_page_size () &&
mlock (va, size) != 0)
{
pm->error = clib_error_return_unix (0, "Unable to lock pages");
goto error;
@@ -398,7 +398,7 @@ clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name,
if (log2_page_sz == 0)
log2_page_sz = pm->def_log2_page_sz;
else if (log2_page_sz != pm->def_log2_page_sz &&
log2_page_sz != pm->sys_log2_page_sz)
log2_page_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_error_create ("unsupported page size (%uKB)",
1 << (log2_page_sz - 10));
+1 -4
View File
@@ -67,10 +67,7 @@ typedef struct
u8 *base;
/* default page size - typically 2M */
u32 def_log2_page_sz;
/* system page size - typically 4K */
u32 sys_log2_page_sz;
clib_mem_page_sz_t def_log2_page_sz;
/* maximum number of pages, limited by VA preallocation size */
u32 max_pages;