vhost: Add multi-versioning support

It also refactors the vhost code which was in one big file vhost-user.c.
Receive side code is in vhost_user_input.c and
Transmit side code is in vhost_user_output.c

Change-Id: I1b539b5008685889723e228265786a2a3e9f3a78
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
This commit is contained in:
Mohsin Kazmi
2018-06-26 17:20:11 +02:00
committed by Damjan Marion
parent 17ff3c1fa5
commit e7cde313e1
7 changed files with 1675 additions and 1536 deletions

View File

@ -917,16 +917,22 @@ API_FILES += vnet/pg/pg.api
libvnet_la_SOURCES += \
vnet/devices/virtio/device.c \
vnet/devices/virtio/node.c \
vnet/devices/virtio/vhost-user.c \
vnet/devices/virtio/vhost_user.c \
vnet/devices/virtio/vhost_user_input.c \
vnet/devices/virtio/vhost_user_output.c \
vnet/devices/virtio/vhost_user_api.c \
vnet/devices/virtio/virtio.c
nobase_include_HEADERS += \
vnet/devices/virtio/virtio.h \
vnet/devices/virtio/vhost-user.h \
vnet/devices/virtio/vhost_user.h \
vnet/devices/virtio/vhost_user.api.h
libvnet_multiversioning_sources += \
vnet/devices/virtio/vhost_user_input.c \
vnet/devices/virtio/vhost_user_output.c
API_FILES += vnet/devices/virtio/vhost_user.api
########################################

View File

@ -45,6 +45,40 @@
#define VRING_USED_F_NO_NOTIFY 1
#define VRING_AVAIL_F_NO_INTERRUPT 1
#define DBG_SOCK(args...) \
{ \
vhost_user_main_t *_vum = &vhost_user_main; \
if (_vum->debug) \
clib_warning(args); \
};
#define VHOST_DEBUG_VQ 0
#if VHOST_DEBUG_VQ == 1
#define DBG_VQ(args...) clib_warning(args);
#else
#define DBG_VQ(args...)
#endif
#define UNIX_GET_FD(unixfd_idx) ({ \
typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
(__unixfd_idx != ~0) ? \
pool_elt_at_index (file_main.file_pool, \
__unixfd_idx)->file_descriptor : -1; })
#define foreach_virtio_trace_flags \
_ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
_ (SINGLE_DESC, 1, "Single descriptor packet") \
_ (INDIRECT, 2, "Indirect descriptor") \
_ (MAP_ERROR, 4, "Memory mapping error")
typedef enum
{
#define _(n,i,s) VIRTIO_TRACE_F_##n,
foreach_virtio_trace_flags
#undef _
} virtio_trace_flag_t;
#define foreach_virtio_net_feature \
_ (VIRTIO_NET_F_MRG_RXBUF, 15) \
_ (VIRTIO_NET_F_CTRL_VQ, 17) \
@ -56,7 +90,6 @@
_ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
_ (VIRTIO_F_VERSION_1, 32)
typedef enum
{
#define _(f,n) FEAT_##f = (n),
@ -331,6 +364,11 @@ typedef struct
int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_intf_details_t ** out_vuids);
extern vlib_node_registration_t vhost_user_send_interrupt_node;
extern vnet_device_class_t vhost_user_device_class;
extern vlib_node_registration_t vhost_user_input_node;
extern vhost_user_main_t vhost_user_main;
#endif
/*

View File

@ -22,7 +22,7 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
#include <vnet/devices/virtio/vhost-user.h>
#include <vnet/devices/virtio/vhost_user.h>
#include <vnet/vnet_msg_enum.h>

View File

@ -0,0 +1,262 @@
/*
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __VIRTIO_VHOST_USER_INLINE_H__
#define __VIRTIO_VHOST_USER_INLINE_H__
/* vhost-user inline functions */
static_always_inline void *
map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
{
int i = *hint;
if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
((vui->regions[i].guest_phys_addr +
vui->regions[i].memory_size) > addr)))
{
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].guest_phys_addr);
}
#if __SSE4_2__
__m128i rl, rh, al, ah, r;
al = _mm_set1_epi64x (addr + 1);
ah = _mm_set1_epi64x (addr);
rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
rl = _mm_cmpgt_epi64 (al, rl);
rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
rh = _mm_cmpgt_epi64 (rh, ah);
r = _mm_and_si128 (rl, rh);
rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
rl = _mm_cmpgt_epi64 (al, rl);
rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
rh = _mm_cmpgt_epi64 (rh, ah);
r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
rl = _mm_cmpgt_epi64 (al, rl);
rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
rh = _mm_cmpgt_epi64 (rh, ah);
r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
rl = _mm_cmpgt_epi64 (al, rl);
rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
rh = _mm_cmpgt_epi64 (rh, ah);
r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
i = count_trailing_zeros (_mm_movemask_epi8 (r) |
(1 << VHOST_MEMORY_MAX_NREGIONS));
if (i < vui->nregions)
{
*hint = i;
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].guest_phys_addr);
}
#elif __aarch64__ && __ARM_NEON
uint64x2_t al, ah, rl, rh, r;
uint32_t u32 = 0;
al = vdupq_n_u64 (addr + 1);
ah = vdupq_n_u64 (addr);
/*First Iteration */
rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
rl = vcgtq_u64 (al, rl);
rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
rh = vcgtq_u64 (rh, ah);
r = vandq_u64 (rl, rh);
u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
if (u32)
{
i = count_trailing_zeros (u32);
goto vhost_map_guest_mem_done;
}
/*Second Iteration */
rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
rl = vcgtq_u64 (al, rl);
rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
rh = vcgtq_u64 (rh, ah);
r = vandq_u64 (rl, rh);
u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
if (u32)
{
i = count_trailing_zeros (u32);
goto vhost_map_guest_mem_done;
}
/*Third Iteration */
rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
rl = vcgtq_u64 (al, rl);
rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
rh = vcgtq_u64 (rh, ah);
r = vandq_u64 (rl, rh);
u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
i = count_trailing_zeros (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
vhost_map_guest_mem_done:
if (i < vui->nregions)
{
*hint = i;
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].guest_phys_addr);
}
#else
for (i = 0; i < vui->nregions; i++)
{
if ((vui->regions[i].guest_phys_addr <= addr) &&
((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
addr))
{
*hint = i;
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].guest_phys_addr);
}
}
#endif
DBG_VQ ("failed to map guest mem addr %llx", addr);
*hint = 0;
return 0;
}
static_always_inline void *
map_user_mem (vhost_user_intf_t * vui, uword addr)
{
int i;
for (i = 0; i < vui->nregions; i++)
{
if ((vui->regions[i].userspace_addr <= addr) &&
((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
addr))
{
return (void *) (vui->region_mmap_addr[i] + addr -
vui->regions[i].userspace_addr);
}
}
return 0;
}
#define VHOST_LOG_PAGE 0x1000
static_always_inline void
vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
u64 addr, u64 len, u8 is_host_address)
{
if (PREDICT_TRUE (vui->log_base_addr == 0
|| !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL))))
{
return;
}
if (is_host_address)
{
addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
}
if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
{
DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n");
return;
}
CLIB_MEMORY_BARRIER ();
u64 page = addr / VHOST_LOG_PAGE;
while (page * VHOST_LOG_PAGE < addr + len)
{
((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
page++;
}
}
#define vhost_user_log_dirty_ring(vui, vq, member) \
if (PREDICT_FALSE(vq->log_used)) { \
vhost_user_log_dirty_pages_2(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
sizeof(vq->used->member), 0); \
}
static_always_inline u8 *
format_vhost_trace (u8 * s, va_list * va)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
vhost_user_main_t *vum = &vhost_user_main;
vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
t->device_index);
vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
u32 indent = format_get_indent (s);
s = format (s, "%U %U queue %d\n", format_white_space, indent,
format_vnet_sw_interface_name, vnm, sw, t->qid);
s = format (s, "%U virtio flags:\n", format_white_space, indent);
#define _(n,i,st) \
if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
foreach_virtio_trace_flags
#undef _
s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
format_white_space, indent, t->first_desc_len);
s = format (s, "%U flags 0x%02x gso_type %u\n",
format_white_space, indent,
t->hdr.hdr.flags, t->hdr.hdr.gso_type);
if (vui->virtio_net_hdr_sz == 12)
s = format (s, "%U num_buff %u",
format_white_space, indent, t->hdr.num_buffers);
return s;
}
static_always_inline void
vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
{
vhost_user_main_t *vum = &vhost_user_main;
u64 x = 1;
int fd = UNIX_GET_FD (vq->callfd_idx);
int rv;
rv = write (fd, &x, sizeof (x));
if (rv <= 0)
{
clib_unix_warning
("Error: Could not write to unix socket for callfd %d", fd);
return;
}
vq->n_since_last_int = 0;
vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
}
#endif
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff