
netinet/tcp.h and linux/tcp.h have different lenghts but overlap. LDP uses the former while iperf the latter. Accept both lengths for now as we do not support exposing tcp metrics via ldp. Type: improvement Change-Id: I13a149d68715ed9451773630a3595c09c421aa29 Signed-off-by: Florin Coras <fcoras@cisco.com>
2913 lines
62 KiB
C
2913 lines
62 KiB
C
/*
|
|
* Copyright (c) 2016-2019 Cisco and/or its affiliates.
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifdef HAVE_GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <signal.h>
|
|
#include <dlfcn.h>
|
|
#include <pthread.h>
|
|
#include <time.h>
|
|
#include <stdarg.h>
|
|
#include <sys/resource.h>
|
|
#include <netinet/tcp.h>
|
|
#include <netinet/udp.h>
|
|
|
|
#include <vcl/ldp_socket_wrapper.h>
|
|
#include <vcl/ldp.h>
|
|
#include <sys/time.h>
|
|
|
|
#include <vcl/vcl_locked.h>
|
|
#include <vppinfra/time.h>
|
|
#include <vppinfra/bitmap.h>
|
|
#include <vppinfra/lock.h>
|
|
#include <vppinfra/pool.h>
|
|
#include <vppinfra/hash.h>
|
|
|
|
#define HAVE_CONSTRUCTOR_ATTRIBUTE
|
|
#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
|
|
#define CONSTRUCTOR_ATTRIBUTE \
|
|
__attribute__ ((constructor))
|
|
#else
|
|
#define CONSTRUCTOR_ATTRIBUTE
|
|
#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
|
|
|
|
#define HAVE_DESTRUCTOR_ATTRIBUTE
|
|
#ifdef HAVE_DESTRUCTOR_ATTRIBUTE
|
|
#define DESTRUCTOR_ATTRIBUTE \
|
|
__attribute__ ((destructor))
|
|
#else
|
|
#define DESTRUCTOR_ATTRIBUTE
|
|
#endif
|
|
|
|
#define LDP_MAX_NWORKERS 32
|
|
|
|
#ifdef HAVE_GNU_SOURCE
|
|
#define SOCKADDR_GET_SA(__addr) __addr.__sockaddr__;
|
|
#else
|
|
#define SOCKADDR_GET_SA(__addr) _addr;
|
|
#endif
|
|
|
|
#ifndef UDP_SEGMENT
|
|
#define UDP_SEGMENT 103
|
|
#endif
|
|
|
|
#ifndef SO_ORIGINAL_DST
|
|
/* from <linux/netfilter_ipv4.h> */
|
|
#define SO_ORIGINAL_DST 80
|
|
#endif
|
|
|
|
typedef struct ldp_worker_ctx_
|
|
{
|
|
u8 *io_buffer;
|
|
clib_time_t clib_time;
|
|
|
|
/*
|
|
* Select state
|
|
*/
|
|
clib_bitmap_t *rd_bitmap;
|
|
clib_bitmap_t *wr_bitmap;
|
|
clib_bitmap_t *ex_bitmap;
|
|
clib_bitmap_t *si_rd_bitmap;
|
|
clib_bitmap_t *si_wr_bitmap;
|
|
clib_bitmap_t *si_ex_bitmap;
|
|
clib_bitmap_t *libc_rd_bitmap;
|
|
clib_bitmap_t *libc_wr_bitmap;
|
|
clib_bitmap_t *libc_ex_bitmap;
|
|
|
|
/*
|
|
* Poll state
|
|
*/
|
|
vcl_poll_t *vcl_poll;
|
|
struct pollfd *libc_poll;
|
|
u16 *libc_poll_idxs;
|
|
|
|
/*
|
|
* Epoll state
|
|
*/
|
|
u8 epoll_wait_vcl;
|
|
u8 mq_epfd_added;
|
|
int vcl_mq_epfd;
|
|
} ldp_worker_ctx_t;
|
|
|
|
__thread ldp_worker_ctx_t _ldp_worker = {};
|
|
|
|
/* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure
|
|
* they are the same size */
|
|
STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask),
|
|
"ldp bitmap size mismatch");
|
|
STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask),
|
|
"ldp bitmap size mismatch");
|
|
|
|
typedef struct
|
|
{
|
|
int init;
|
|
char app_name[LDP_APP_NAME_MAX];
|
|
u32 vlsh_bit_val;
|
|
u32 vlsh_bit_mask;
|
|
u32 debug;
|
|
|
|
/** vcl needs next epoll_create to go to libc_epoll */
|
|
u8 vcl_needs_real_epoll;
|
|
|
|
/**
|
|
* crypto state used only for testing
|
|
*/
|
|
u8 transparent_tls;
|
|
u32 ckpair_index;
|
|
} ldp_main_t;
|
|
|
|
#define LDP_DEBUG ldp->debug
|
|
|
|
#define LDBG(_lvl, _fmt, _args...) \
|
|
if (ldp->debug > _lvl) \
|
|
{ \
|
|
int errno_saved = errno; \
|
|
fprintf (stderr, "ldp<%d>: " _fmt "\n", getpid(), ##_args); \
|
|
errno = errno_saved; \
|
|
}
|
|
|
|
static ldp_main_t ldp_main = {
|
|
.vlsh_bit_val = (1 << LDP_SID_BIT_MIN),
|
|
.vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1,
|
|
.debug = LDP_DEBUG_INIT,
|
|
.transparent_tls = 0,
|
|
.ckpair_index = ~0,
|
|
};
|
|
|
|
static ldp_main_t *ldp = &ldp_main;
|
|
|
|
static inline ldp_worker_ctx_t *
|
|
ldp_worker_get_current (void)
|
|
{
|
|
return &_ldp_worker;
|
|
}
|
|
|
|
/*
|
|
* RETURN: 0 on success or -1 on error.
|
|
* */
|
|
static inline void
|
|
ldp_set_app_name (char *app_name)
|
|
{
|
|
snprintf (ldp->app_name, LDP_APP_NAME_MAX, "%s-ldp-%d", app_name, getpid ());
|
|
}
|
|
|
|
static inline char *
|
|
ldp_get_app_name ()
|
|
{
|
|
if (ldp->app_name[0] == '\0')
|
|
ldp_set_app_name (program_invocation_short_name);
|
|
|
|
return ldp->app_name;
|
|
}
|
|
|
|
static inline int
|
|
ldp_vlsh_to_fd (vls_handle_t vlsh)
|
|
{
|
|
return (vlsh + ldp->vlsh_bit_val);
|
|
}
|
|
|
|
static inline vls_handle_t
|
|
ldp_fd_to_vlsh (int fd)
|
|
{
|
|
if (fd < ldp->vlsh_bit_val)
|
|
return VLS_INVALID_HANDLE;
|
|
|
|
return (fd - ldp->vlsh_bit_val);
|
|
}
|
|
|
|
static void
|
|
ldp_init_cfg (void)
|
|
{
|
|
char *env_var_str = getenv (LDP_ENV_DEBUG);
|
|
if (env_var_str)
|
|
{
|
|
u32 tmp;
|
|
if (sscanf (env_var_str, "%u", &tmp) != 1)
|
|
clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in"
|
|
" the env var " LDP_ENV_DEBUG " (%s)!", getpid (),
|
|
env_var_str);
|
|
else
|
|
{
|
|
ldp->debug = tmp;
|
|
LDBG (0, "configured LDP debug level (%u) from env var "
|
|
LDP_ENV_DEBUG "!", ldp->debug);
|
|
}
|
|
}
|
|
|
|
env_var_str = getenv (LDP_ENV_APP_NAME);
|
|
if (env_var_str)
|
|
{
|
|
ldp_set_app_name (env_var_str);
|
|
LDBG (0, "configured LDP app name (%s) from the env var "
|
|
LDP_ENV_APP_NAME "!", ldp->app_name);
|
|
}
|
|
|
|
env_var_str = getenv (LDP_ENV_SID_BIT);
|
|
if (env_var_str)
|
|
{
|
|
u32 sb;
|
|
if (sscanf (env_var_str, "%u", &sb) != 1)
|
|
{
|
|
LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var "
|
|
LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str,
|
|
ldp->vlsh_bit_val, ldp->vlsh_bit_val);
|
|
}
|
|
else if (sb < LDP_SID_BIT_MIN)
|
|
{
|
|
ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN);
|
|
ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
|
|
|
|
LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
|
|
LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN"
|
|
" (%d)! sid bit value %d (0x%x)", sb, env_var_str,
|
|
LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
|
|
}
|
|
else if (sb > LDP_SID_BIT_MAX)
|
|
{
|
|
ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX);
|
|
ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
|
|
|
|
LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
|
|
LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX"
|
|
" (%d)! sid bit value %d (0x%x)", sb, env_var_str,
|
|
LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
|
|
}
|
|
else
|
|
{
|
|
ldp->vlsh_bit_val = (1 << sb);
|
|
ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
|
|
|
|
LDBG (0, "configured LDP sid bit (%u) from "
|
|
LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb,
|
|
ldp->vlsh_bit_val, ldp->vlsh_bit_val);
|
|
}
|
|
|
|
/* Make sure there are enough bits in the fd set for vcl sessions */
|
|
if (ldp->vlsh_bit_val > FD_SETSIZE / 2)
|
|
{
|
|
/* Only valid for select/pselect, so just WARNING and not exit */
|
|
LDBG (0,
|
|
"WARNING: LDP vlsh bit value %d > FD_SETSIZE/2 %d, "
|
|
"select/pselect not supported now!",
|
|
ldp->vlsh_bit_val, FD_SETSIZE / 2);
|
|
}
|
|
}
|
|
env_var_str = getenv (LDP_ENV_TLS_TRANS);
|
|
if (env_var_str)
|
|
{
|
|
ldp->transparent_tls = 1;
|
|
}
|
|
}
|
|
|
|
static int
|
|
ldp_init (void)
|
|
{
|
|
int rv;
|
|
|
|
if (ldp->init)
|
|
{
|
|
LDBG (0, "LDP is initialized already");
|
|
return 0;
|
|
}
|
|
|
|
ldp_init_cfg ();
|
|
ldp->init = 1;
|
|
ldp->vcl_needs_real_epoll = 1;
|
|
rv = vls_app_create (ldp_get_app_name ());
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
ldp->vcl_needs_real_epoll = 0;
|
|
if (rv == VPPCOM_EEXIST)
|
|
return 0;
|
|
LDBG (2,
|
|
"\nERROR: ldp_init: vppcom_app_create()"
|
|
" failed! rv = %d (%s)\n",
|
|
rv, vppcom_retval_str (rv));
|
|
ldp->init = 0;
|
|
return rv;
|
|
}
|
|
ldp->vcl_needs_real_epoll = 0;
|
|
|
|
LDBG (0, "LDP initialization: done!");
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define ldp_init_check() \
|
|
if (PREDICT_FALSE (!ldp->init)) \
|
|
{ \
|
|
if ((errno = -ldp_init ())) \
|
|
return -1; \
|
|
}
|
|
|
|
int
|
|
close (int fd)
|
|
{
|
|
vls_handle_t vlsh;
|
|
int rv, epfd;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
epfd = vls_get_libc_epfd (vlsh);
|
|
if (epfd > 0)
|
|
{
|
|
ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
|
|
|
|
LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
|
|
|
|
libc_close (epfd);
|
|
ldpw->mq_epfd_added = 0;
|
|
|
|
vls_set_libc_epfd (vlsh, 0);
|
|
}
|
|
else if (PREDICT_FALSE (epfd < 0))
|
|
{
|
|
errno = -epfd;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh);
|
|
|
|
rv = vls_close (vlsh);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "fd %d: calling libc_close", fd);
|
|
rv = libc_close (fd);
|
|
}
|
|
|
|
done:
|
|
return rv;
|
|
}
|
|
|
|
ssize_t
|
|
read (int fd, void *buf, size_t nbytes)
|
|
{
|
|
vls_handle_t vlsh;
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
size = vls_read (vlsh, buf, nbytes);
|
|
if (size < 0)
|
|
{
|
|
errno = -size;
|
|
size = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_read (fd, buf, nbytes);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
readv (int fd, const struct iovec * iov, int iovcnt)
|
|
{
|
|
int rv = 0, i, total = 0;
|
|
vls_handle_t vlsh;
|
|
ssize_t size = 0;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
for (i = 0; i < iovcnt; ++i)
|
|
{
|
|
rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len);
|
|
if (rv <= 0)
|
|
break;
|
|
else
|
|
{
|
|
total += rv;
|
|
if (rv < iov[i].iov_len)
|
|
break;
|
|
}
|
|
}
|
|
if (rv < 0 && total == 0)
|
|
{
|
|
errno = -rv;
|
|
size = -1;
|
|
}
|
|
else
|
|
size = total;
|
|
}
|
|
else
|
|
{
|
|
size = libc_readv (fd, iov, iovcnt);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
write (int fd, const void *buf, size_t nbytes)
|
|
{
|
|
vls_handle_t vlsh;
|
|
ssize_t size = 0;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
size = vls_write_msg (vlsh, (void *) buf, nbytes);
|
|
if (size < 0)
|
|
{
|
|
errno = -size;
|
|
size = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_write (fd, buf, nbytes);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
writev (int fd, const struct iovec * iov, int iovcnt)
|
|
{
|
|
ssize_t size = 0, total = 0;
|
|
vls_handle_t vlsh;
|
|
int i, rv = 0;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
for (i = 0; i < iovcnt; ++i)
|
|
{
|
|
rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len);
|
|
if (rv < 0)
|
|
break;
|
|
else
|
|
{
|
|
total += rv;
|
|
if (rv < iov[i].iov_len)
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (rv < 0 && total == 0)
|
|
{
|
|
errno = -rv;
|
|
size = -1;
|
|
}
|
|
else
|
|
size = total;
|
|
}
|
|
else
|
|
{
|
|
size = libc_writev (fd, iov, iovcnt);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
static int
|
|
fcntl_internal (int fd, int cmd, va_list ap)
|
|
{
|
|
vls_handle_t vlsh;
|
|
int rv = 0;
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
int flags = va_arg (ap, int);
|
|
u32 size;
|
|
|
|
size = sizeof (flags);
|
|
rv = -EOPNOTSUPP;
|
|
switch (cmd)
|
|
{
|
|
case F_SETFL:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
|
|
break;
|
|
|
|
case F_GETFL:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size);
|
|
if (rv == VPPCOM_OK)
|
|
rv = flags;
|
|
break;
|
|
case F_SETFD:
|
|
/* TODO handle this */
|
|
LDBG (0, "F_SETFD ignored flags %u", flags);
|
|
rv = 0;
|
|
break;
|
|
default:
|
|
rv = -EOPNOTSUPP;
|
|
break;
|
|
}
|
|
if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
#ifdef HAVE_FCNTL64
|
|
rv = libc_vfcntl64 (fd, cmd, ap);
|
|
#else
|
|
rv = libc_vfcntl (fd, cmd, ap);
|
|
#endif
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
fcntl (int fd, int cmd, ...)
|
|
{
|
|
va_list ap;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
va_start (ap, cmd);
|
|
rv = fcntl_internal (fd, cmd, ap);
|
|
va_end (ap);
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
fcntl64 (int fd, int cmd, ...)
|
|
{
|
|
va_list ap;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
va_start (ap, cmd);
|
|
rv = fcntl_internal (fd, cmd, ap);
|
|
va_end (ap);
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
ioctl (int fd, unsigned long int cmd, ...)
|
|
{
|
|
vls_handle_t vlsh;
|
|
va_list ap;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
va_start (ap, cmd);
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
switch (cmd)
|
|
{
|
|
case FIONREAD:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
|
|
break;
|
|
case TIOCOUTQ:
|
|
{
|
|
u32 *buf = va_arg (ap, void *);
|
|
u32 *buflen = va_arg (ap, u32 *);
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITEQ, buf, buflen);
|
|
}
|
|
break;
|
|
case FIONBIO:
|
|
{
|
|
u32 flags = *(va_arg (ap, int *)) ? O_NONBLOCK : 0;
|
|
u32 size = sizeof (flags);
|
|
|
|
/* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
|
|
* non-blocking, the flags should be read here and merged
|
|
* with O_NONBLOCK.
|
|
*/
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
rv = -EOPNOTSUPP;
|
|
break;
|
|
}
|
|
if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rv = libc_vioctl (fd, cmd, ap);
|
|
}
|
|
|
|
va_end (ap);
|
|
return rv;
|
|
}
|
|
|
|
always_inline void
|
|
ldp_select_init_maps (fd_set * __restrict original,
|
|
clib_bitmap_t ** resultb, clib_bitmap_t ** libcb,
|
|
clib_bitmap_t ** vclb, int nfds, u32 minbits,
|
|
u32 n_bytes, uword * si_bits, uword * libc_bits)
|
|
{
|
|
uword si_bits_set, libc_bits_set;
|
|
vls_handle_t vlsh;
|
|
int fd;
|
|
|
|
clib_bitmap_validate (*vclb, minbits);
|
|
clib_bitmap_validate (*libcb, minbits);
|
|
clib_bitmap_validate (*resultb, minbits);
|
|
clib_memcpy_fast (*resultb, original, n_bytes);
|
|
memset (original, 0, n_bytes);
|
|
|
|
clib_bitmap_foreach (fd, *resultb) {
|
|
if (fd > nfds)
|
|
break;
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh == VLS_INVALID_HANDLE)
|
|
clib_bitmap_set_no_check (*libcb, fd, 1);
|
|
else if (vlsh_to_worker_index (vlsh) != vppcom_worker_index ())
|
|
clib_warning ("migration currently not supported");
|
|
else
|
|
*vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
|
|
}
|
|
|
|
si_bits_set = clib_bitmap_last_set (*vclb) + 1;
|
|
*si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
|
|
clib_bitmap_validate (*resultb, *si_bits);
|
|
|
|
libc_bits_set = clib_bitmap_last_set (*libcb) + 1;
|
|
*libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits;
|
|
}
|
|
|
|
always_inline int
|
|
ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
|
|
{
|
|
vls_handle_t vlsh;
|
|
uword si;
|
|
int fd;
|
|
|
|
if (!libcb)
|
|
return 0;
|
|
|
|
clib_bitmap_foreach (si, vclb) {
|
|
vlsh = vls_session_index_to_vlsh (si);
|
|
ASSERT (vlsh != VLS_INVALID_HANDLE);
|
|
fd = ldp_vlsh_to_fd (vlsh);
|
|
if (PREDICT_FALSE (fd < 0))
|
|
{
|
|
errno = EBADFD;
|
|
return -1;
|
|
}
|
|
FD_SET (fd, libcb);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
always_inline void
|
|
ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb)
|
|
{
|
|
uword fd;
|
|
|
|
if (!libcb)
|
|
return;
|
|
|
|
clib_bitmap_foreach (fd, result)
|
|
FD_SET ((int)fd, libcb);
|
|
}
|
|
|
|
int
|
|
ldp_pselect (int nfds, fd_set * __restrict readfds,
|
|
fd_set * __restrict writefds,
|
|
fd_set * __restrict exceptfds,
|
|
const struct timespec *__restrict timeout,
|
|
const __sigset_t * __restrict sigmask)
|
|
{
|
|
u32 minbits = clib_max (nfds, BITS (uword)), n_bytes;
|
|
struct timespec libc_tspec = { 0 };
|
|
f64 time_out, vcl_timeout = 0;
|
|
uword si_bits, libc_bits;
|
|
ldp_worker_ctx_t *ldpw;
|
|
int rv, bits_set = 0;
|
|
|
|
if (nfds < 0)
|
|
{
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
|
|
if (PREDICT_FALSE (vppcom_worker_index () == ~0))
|
|
vls_register_vcl_worker ();
|
|
|
|
ldpw = ldp_worker_get_current ();
|
|
|
|
if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
|
|
clib_time_init (&ldpw->clib_time);
|
|
|
|
if (timeout)
|
|
{
|
|
time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ?
|
|
(f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9;
|
|
|
|
time_out += clib_time_now (&ldpw->clib_time);
|
|
|
|
/* select as fine grained sleep */
|
|
if (!nfds)
|
|
{
|
|
while (clib_time_now (&ldpw->clib_time) < time_out)
|
|
;
|
|
return 0;
|
|
}
|
|
}
|
|
else if (!nfds)
|
|
{
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
else
|
|
time_out = -1;
|
|
|
|
if (nfds <= ldp->vlsh_bit_val)
|
|
{
|
|
rv = libc_pselect (nfds, readfds, writefds, exceptfds,
|
|
timeout, sigmask);
|
|
goto done;
|
|
}
|
|
|
|
si_bits = libc_bits = 0;
|
|
n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0);
|
|
|
|
if (readfds)
|
|
ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap,
|
|
&ldpw->si_rd_bitmap, nfds, minbits, n_bytes,
|
|
&si_bits, &libc_bits);
|
|
if (writefds)
|
|
ldp_select_init_maps (writefds, &ldpw->wr_bitmap,
|
|
&ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds,
|
|
minbits, n_bytes, &si_bits, &libc_bits);
|
|
if (exceptfds)
|
|
ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap,
|
|
&ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds,
|
|
minbits, n_bytes, &si_bits, &libc_bits);
|
|
|
|
if (PREDICT_FALSE (!si_bits && !libc_bits))
|
|
{
|
|
errno = EINVAL;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
if (!si_bits)
|
|
libc_tspec = timeout ? *timeout : libc_tspec;
|
|
|
|
do
|
|
{
|
|
if (si_bits)
|
|
{
|
|
if (readfds)
|
|
clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap,
|
|
vec_len (ldpw->si_rd_bitmap) *
|
|
sizeof (clib_bitmap_t));
|
|
if (writefds)
|
|
clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap,
|
|
vec_len (ldpw->si_wr_bitmap) *
|
|
sizeof (clib_bitmap_t));
|
|
if (exceptfds)
|
|
clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap,
|
|
vec_len (ldpw->si_ex_bitmap) *
|
|
sizeof (clib_bitmap_t));
|
|
|
|
rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL,
|
|
writefds ? ldpw->wr_bitmap : NULL,
|
|
exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout);
|
|
if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
else if (rv > 0)
|
|
{
|
|
if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds))
|
|
{
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds))
|
|
{
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds))
|
|
{
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
bits_set = rv;
|
|
}
|
|
}
|
|
if (libc_bits)
|
|
{
|
|
if (readfds)
|
|
clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap,
|
|
vec_len (ldpw->libc_rd_bitmap) *
|
|
sizeof (clib_bitmap_t));
|
|
if (writefds)
|
|
clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap,
|
|
vec_len (ldpw->libc_wr_bitmap) *
|
|
sizeof (clib_bitmap_t));
|
|
if (exceptfds)
|
|
clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap,
|
|
vec_len (ldpw->libc_ex_bitmap) *
|
|
sizeof (clib_bitmap_t));
|
|
|
|
rv = libc_pselect (libc_bits,
|
|
readfds ? (fd_set *) ldpw->rd_bitmap : NULL,
|
|
writefds ? (fd_set *) ldpw->wr_bitmap : NULL,
|
|
exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL,
|
|
&libc_tspec, sigmask);
|
|
if (rv > 0)
|
|
{
|
|
ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds);
|
|
ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds);
|
|
ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds);
|
|
bits_set += rv;
|
|
}
|
|
}
|
|
|
|
if (bits_set)
|
|
{
|
|
rv = bits_set;
|
|
goto done;
|
|
}
|
|
}
|
|
while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out));
|
|
rv = 0;
|
|
|
|
done:
|
|
/* TBD: set timeout to amount of time left */
|
|
clib_bitmap_zero (ldpw->rd_bitmap);
|
|
clib_bitmap_zero (ldpw->si_rd_bitmap);
|
|
clib_bitmap_zero (ldpw->libc_rd_bitmap);
|
|
clib_bitmap_zero (ldpw->wr_bitmap);
|
|
clib_bitmap_zero (ldpw->si_wr_bitmap);
|
|
clib_bitmap_zero (ldpw->libc_wr_bitmap);
|
|
clib_bitmap_zero (ldpw->ex_bitmap);
|
|
clib_bitmap_zero (ldpw->si_ex_bitmap);
|
|
clib_bitmap_zero (ldpw->libc_ex_bitmap);
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
select (int nfds, fd_set * __restrict readfds,
|
|
fd_set * __restrict writefds,
|
|
fd_set * __restrict exceptfds, struct timeval *__restrict timeout)
|
|
{
|
|
struct timespec tspec;
|
|
|
|
if (timeout)
|
|
{
|
|
tspec.tv_sec = timeout->tv_sec;
|
|
tspec.tv_nsec = timeout->tv_usec * 1000;
|
|
}
|
|
return ldp_pselect (nfds, readfds, writefds, exceptfds,
|
|
timeout ? &tspec : NULL, NULL);
|
|
}
|
|
|
|
#ifdef __USE_XOPEN2K
|
|
int
|
|
pselect (int nfds, fd_set * __restrict readfds,
|
|
fd_set * __restrict writefds,
|
|
fd_set * __restrict exceptfds,
|
|
const struct timespec *__restrict timeout,
|
|
const __sigset_t * __restrict sigmask)
|
|
{
|
|
return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0);
|
|
}
|
|
#endif
|
|
|
|
/* If transparent TLS mode is turned on, then ldp will load key and cert.
|
|
*/
|
|
static int
|
|
load_cert_key_pair (void)
|
|
{
|
|
char *cert_str = getenv (LDP_ENV_TLS_CERT);
|
|
char *key_str = getenv (LDP_ENV_TLS_KEY);
|
|
char cert_buf[4096], key_buf[4096];
|
|
int cert_size, key_size;
|
|
vppcom_cert_key_pair_t crypto;
|
|
int ckp_index;
|
|
FILE *fp;
|
|
|
|
if (!cert_str || !key_str)
|
|
{
|
|
LDBG (0, "ERROR: failed to read LDP environment %s\n",
|
|
LDP_ENV_TLS_CERT);
|
|
return -1;
|
|
}
|
|
|
|
fp = fopen (cert_str, "r");
|
|
if (fp == NULL)
|
|
{
|
|
LDBG (0, "ERROR: failed to open cert file %s \n", cert_str);
|
|
return -1;
|
|
}
|
|
cert_size = fread (cert_buf, sizeof (char), sizeof (cert_buf), fp);
|
|
fclose (fp);
|
|
|
|
fp = fopen (key_str, "r");
|
|
if (fp == NULL)
|
|
{
|
|
LDBG (0, "ERROR: failed to open key file %s \n", key_str);
|
|
return -1;
|
|
}
|
|
key_size = fread (key_buf, sizeof (char), sizeof (key_buf), fp);
|
|
fclose (fp);
|
|
|
|
crypto.cert = cert_buf;
|
|
crypto.key = key_buf;
|
|
crypto.cert_len = cert_size;
|
|
crypto.key_len = key_size;
|
|
ckp_index = vppcom_add_cert_key_pair (&crypto);
|
|
if (ckp_index < 0)
|
|
{
|
|
LDBG (0, "ERROR: failed to add cert key pair\n");
|
|
return -1;
|
|
}
|
|
|
|
ldp->ckpair_index = ckp_index;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
assign_cert_key_pair (vls_handle_t vlsh)
|
|
{
|
|
uint32_t ckp_len;
|
|
|
|
if (ldp->ckpair_index == ~0 && load_cert_key_pair () < 0)
|
|
return -1;
|
|
|
|
ckp_len = sizeof (ldp->ckpair_index);
|
|
return vls_attr (vlsh, VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index, &ckp_len);
|
|
}
|
|
|
|
int
|
|
socket (int domain, int type, int protocol)
|
|
{
|
|
int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
|
|
u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0;
|
|
vls_handle_t vlsh;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (((domain == AF_INET) || (domain == AF_INET6)) &&
|
|
((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
|
|
{
|
|
u8 proto;
|
|
if (ldp->transparent_tls)
|
|
{
|
|
proto = VPPCOM_PROTO_TLS;
|
|
}
|
|
else
|
|
proto = ((sock_type == SOCK_DGRAM) ?
|
|
VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP);
|
|
|
|
LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u",
|
|
proto, vppcom_proto_str (proto), is_nonblocking);
|
|
|
|
vlsh = vls_create (proto, is_nonblocking);
|
|
if (vlsh < 0)
|
|
{
|
|
errno = -vlsh;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
if (ldp->transparent_tls)
|
|
{
|
|
if (assign_cert_key_pair (vlsh) < 0)
|
|
return -1;
|
|
}
|
|
rv = ldp_vlsh_to_fd (vlsh);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "calling libc_socket");
|
|
rv = libc_socket (domain, type, protocol);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* Create two new sockets, of type TYPE in domain DOMAIN and using
|
|
* protocol PROTOCOL, which are connected to each other, and put file
|
|
* descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
|
|
* one will be chosen automatically.
|
|
* Returns 0 on success, -1 for errors.
|
|
* */
|
|
int
|
|
socketpair (int domain, int type, int protocol, int fds[2])
|
|
{
|
|
int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
|
|
|
|
ldp_init_check ();
|
|
|
|
if (((domain == AF_INET) || (domain == AF_INET6)) &&
|
|
((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
|
|
{
|
|
LDBG (0, "LDP-TBD");
|
|
errno = ENOSYS;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
LDBG (1, "calling libc_socketpair");
|
|
rv = libc_socketpair (domain, type, protocol, fds);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
bind (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
|
|
{
|
|
const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
vppcom_endpt_t ep;
|
|
|
|
switch (addr->sa_family)
|
|
{
|
|
case AF_INET:
|
|
if (len != sizeof (struct sockaddr_in))
|
|
{
|
|
LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!",
|
|
fd, vlsh, len);
|
|
errno = EINVAL;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
ep.is_ip4 = VPPCOM_IS_IP4;
|
|
ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
|
|
ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
|
|
break;
|
|
|
|
case AF_INET6:
|
|
if (len != sizeof (struct sockaddr_in6))
|
|
{
|
|
LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!",
|
|
fd, vlsh, len);
|
|
errno = EINVAL;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
ep.is_ip4 = VPPCOM_IS_IP6;
|
|
ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
|
|
ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
|
|
break;
|
|
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!",
|
|
fd, vlsh, addr->sa_family);
|
|
errno = EAFNOSUPPORT;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh,
|
|
addr, len);
|
|
|
|
rv = vls_bind (vlsh, &ep);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len);
|
|
rv = libc_bind (fd, addr, len);
|
|
}
|
|
|
|
done:
|
|
LDBG (1, "fd %d: returning %d", fd, rv);
|
|
|
|
return rv;
|
|
}
|
|
|
|
static inline int
|
|
ldp_copy_ep_to_sockaddr (struct sockaddr *addr, socklen_t *__restrict len,
|
|
vppcom_endpt_t *ep)
|
|
{
|
|
int rv = 0, sa_len, copy_len;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (addr && len && ep)
|
|
{
|
|
addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6;
|
|
switch (addr->sa_family)
|
|
{
|
|
case AF_INET:
|
|
((struct sockaddr_in *) addr)->sin_port = ep->port;
|
|
if (*len > sizeof (struct sockaddr_in))
|
|
*len = sizeof (struct sockaddr_in);
|
|
sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr);
|
|
copy_len = *len - sa_len;
|
|
if (copy_len > 0)
|
|
memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip,
|
|
copy_len);
|
|
break;
|
|
|
|
case AF_INET6:
|
|
((struct sockaddr_in6 *) addr)->sin6_port = ep->port;
|
|
if (*len > sizeof (struct sockaddr_in6))
|
|
*len = sizeof (struct sockaddr_in6);
|
|
sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr);
|
|
copy_len = *len - sa_len;
|
|
if (copy_len > 0)
|
|
memcpy (((struct sockaddr_in6 *) addr)->sin6_addr.
|
|
__in6_u.__u6_addr8, ep->ip, copy_len);
|
|
break;
|
|
|
|
default:
|
|
/* Not possible */
|
|
rv = -EAFNOSUPPORT;
|
|
break;
|
|
}
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
getsockname (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
|
|
{
|
|
struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
vppcom_endpt_t ep;
|
|
u8 addr_buf[sizeof (struct in6_addr)];
|
|
u32 size = sizeof (ep);
|
|
|
|
ep.ip = addr_buf;
|
|
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rv = libc_getsockname (fd, _addr, len);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
connect (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
|
|
{
|
|
const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (!addr)
|
|
{
|
|
LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len);
|
|
errno = EINVAL;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
vppcom_endpt_t ep;
|
|
|
|
switch (addr->sa_family)
|
|
{
|
|
case AF_INET:
|
|
if (len != sizeof (struct sockaddr_in))
|
|
{
|
|
LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!",
|
|
fd, vlsh, len);
|
|
errno = EINVAL;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
ep.is_ip4 = VPPCOM_IS_IP4;
|
|
ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
|
|
ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
|
|
break;
|
|
|
|
case AF_INET6:
|
|
if (len != sizeof (struct sockaddr_in6))
|
|
{
|
|
LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!",
|
|
fd, vlsh, len);
|
|
errno = EINVAL;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
ep.is_ip4 = VPPCOM_IS_IP6;
|
|
ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
|
|
ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
|
|
break;
|
|
|
|
default:
|
|
LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!",
|
|
fd, vlsh, addr->sa_family);
|
|
errno = EAFNOSUPPORT;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd,
|
|
vlsh, addr, len);
|
|
|
|
rv = vls_connect (vlsh, &ep);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u",
|
|
fd, addr, len);
|
|
|
|
rv = libc_connect (fd, addr, len);
|
|
}
|
|
|
|
done:
|
|
LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv);
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
getpeername (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
|
|
{
|
|
struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
vppcom_endpt_t ep;
|
|
u8 addr_buf[sizeof (struct in6_addr)];
|
|
u32 size = sizeof (ep);
|
|
|
|
ep.ip = addr_buf;
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rv = libc_getpeername (fd, addr, len);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
ssize_t
|
|
send (int fd, const void *buf, size_t n, int flags)
|
|
{
|
|
vls_handle_t vlsh = ldp_fd_to_vlsh (fd);
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
size = vls_sendto (vlsh, (void *) buf, n, flags, NULL);
|
|
if (size < VPPCOM_OK)
|
|
{
|
|
errno = -size;
|
|
size = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_send (fd, buf, n, flags);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
sendfile (int out_fd, int in_fd, off_t * offset, size_t len)
|
|
{
|
|
ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
|
|
vls_handle_t vlsh;
|
|
ssize_t size = 0;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (out_fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
int rv;
|
|
ssize_t results = 0;
|
|
size_t n_bytes_left = len;
|
|
size_t bytes_to_read;
|
|
int nbytes;
|
|
u8 eagain = 0;
|
|
u32 flags, flags_len = sizeof (flags);
|
|
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len);
|
|
if (PREDICT_FALSE (rv != VPPCOM_OK))
|
|
{
|
|
LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!",
|
|
out_fd, vlsh, rv, vppcom_retval_str (rv));
|
|
|
|
vec_reset_length (ldpw->io_buffer);
|
|
errno = -rv;
|
|
size = -1;
|
|
goto done;
|
|
}
|
|
|
|
if (offset)
|
|
{
|
|
off_t off = lseek (in_fd, *offset, SEEK_SET);
|
|
if (PREDICT_FALSE (off == -1))
|
|
{
|
|
size = -1;
|
|
goto done;
|
|
}
|
|
|
|
ASSERT (off == *offset);
|
|
}
|
|
|
|
do
|
|
{
|
|
size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0);
|
|
if (size < 0)
|
|
{
|
|
LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %ld (%s)!",
|
|
out_fd, vlsh, size, vppcom_retval_str (size));
|
|
vec_reset_length (ldpw->io_buffer);
|
|
errno = -size;
|
|
size = -1;
|
|
goto done;
|
|
}
|
|
|
|
bytes_to_read = size;
|
|
if (bytes_to_read == 0)
|
|
{
|
|
if (flags & O_NONBLOCK)
|
|
{
|
|
if (!results)
|
|
eagain = 1;
|
|
goto update_offset;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
bytes_to_read = clib_min (n_bytes_left, bytes_to_read);
|
|
vec_validate (ldpw->io_buffer, bytes_to_read);
|
|
nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read);
|
|
if (nbytes < 0)
|
|
{
|
|
if (results == 0)
|
|
{
|
|
vec_reset_length (ldpw->io_buffer);
|
|
size = -1;
|
|
goto done;
|
|
}
|
|
goto update_offset;
|
|
}
|
|
|
|
size = vls_write (vlsh, ldpw->io_buffer, nbytes);
|
|
if (size < 0)
|
|
{
|
|
if (size == VPPCOM_EAGAIN)
|
|
{
|
|
if (flags & O_NONBLOCK)
|
|
{
|
|
if (!results)
|
|
eagain = 1;
|
|
goto update_offset;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
if (results == 0)
|
|
{
|
|
vec_reset_length (ldpw->io_buffer);
|
|
errno = -size;
|
|
size = -1;
|
|
goto done;
|
|
}
|
|
goto update_offset;
|
|
}
|
|
|
|
results += nbytes;
|
|
ASSERT (n_bytes_left >= nbytes);
|
|
n_bytes_left = n_bytes_left - nbytes;
|
|
}
|
|
while (n_bytes_left > 0);
|
|
|
|
update_offset:
|
|
vec_reset_length (ldpw->io_buffer);
|
|
if (offset)
|
|
{
|
|
off_t off = lseek (in_fd, *offset, SEEK_SET);
|
|
if (PREDICT_FALSE (off == -1))
|
|
{
|
|
size = -1;
|
|
goto done;
|
|
}
|
|
|
|
ASSERT (off == *offset);
|
|
*offset += results + 1;
|
|
}
|
|
if (eagain)
|
|
{
|
|
errno = EAGAIN;
|
|
size = -1;
|
|
}
|
|
else
|
|
size = results;
|
|
}
|
|
else
|
|
{
|
|
size = libc_sendfile (out_fd, in_fd, offset, len);
|
|
}
|
|
|
|
done:
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len)
|
|
{
|
|
return sendfile (out_fd, in_fd, offset, len);
|
|
}
|
|
|
|
ssize_t
|
|
recv (int fd, void *buf, size_t n, int flags)
|
|
{
|
|
vls_handle_t vlsh;
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
size = vls_recvfrom (vlsh, buf, n, flags, NULL);
|
|
if (size < 0)
|
|
{
|
|
errno = -size;
|
|
size = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_recv (fd, buf, n, flags);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
__recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
|
|
{
|
|
if (n > buflen)
|
|
return -1;
|
|
|
|
return recv (fd, buf, n, flags);
|
|
}
|
|
|
|
static inline int
|
|
ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
|
|
vppcom_endpt_tlv_t *app_tlvs, int flags,
|
|
__CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
|
|
{
|
|
const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vppcom_endpt_t *ep = 0;
|
|
vppcom_endpt_t _ep;
|
|
|
|
_ep.app_tlvs = app_tlvs;
|
|
|
|
if (addr)
|
|
{
|
|
ep = &_ep;
|
|
switch (addr->sa_family)
|
|
{
|
|
case AF_INET:
|
|
ep->is_ip4 = VPPCOM_IS_IP4;
|
|
ep->ip =
|
|
(uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr;
|
|
ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port;
|
|
break;
|
|
|
|
case AF_INET6:
|
|
ep->is_ip4 = VPPCOM_IS_IP6;
|
|
ep->ip =
|
|
(uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
|
|
ep->port =
|
|
(uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port;
|
|
break;
|
|
|
|
default:
|
|
return EAFNOSUPPORT;
|
|
}
|
|
}
|
|
|
|
return vls_sendto (vlsh, (void *) buf, n, flags, ep);
|
|
}
|
|
|
|
static int
|
|
ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, int flags,
|
|
__SOCKADDR_ARG _addr, socklen_t *__restrict addr_len)
|
|
{
|
|
u8 src_addr[sizeof (struct sockaddr_in6)];
|
|
struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vppcom_endpt_t ep;
|
|
ssize_t size;
|
|
int rv;
|
|
|
|
if (addr)
|
|
{
|
|
ep.ip = src_addr;
|
|
size = vls_recvfrom (vlsh, buf, n, flags, &ep);
|
|
|
|
if (size > 0)
|
|
{
|
|
rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
|
|
if (rv < 0)
|
|
size = rv;
|
|
}
|
|
}
|
|
else
|
|
size = vls_recvfrom (vlsh, buf, n, flags, NULL);
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
sendto (int fd, const void *buf, size_t n, int flags,
|
|
__CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
|
|
{
|
|
const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vls_handle_t vlsh;
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len);
|
|
if (size < 0)
|
|
{
|
|
errno = -size;
|
|
size = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_sendto (fd, buf, n, flags, addr, addr_len);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
ssize_t
|
|
recvfrom (int fd, void *__restrict buf, size_t n, int flags,
|
|
__SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
|
|
{
|
|
vls_handle_t vlsh;
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len);
|
|
if (size < 0)
|
|
{
|
|
errno = -size;
|
|
size = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_recvfrom (fd, buf, n, flags, addr, addr_len);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
static int
|
|
ldp_parse_cmsg (vls_handle_t vlsh, const struct msghdr *msg,
|
|
vppcom_endpt_tlv_t **app_tlvs)
|
|
{
|
|
uint8_t *ad, *at = (uint8_t *) *app_tlvs;
|
|
vppcom_endpt_tlv_t *adh;
|
|
struct in_pktinfo *pi;
|
|
struct cmsghdr *cmsg;
|
|
|
|
cmsg = CMSG_FIRSTHDR (msg);
|
|
|
|
while (cmsg != NULL)
|
|
{
|
|
switch (cmsg->cmsg_level)
|
|
{
|
|
case SOL_UDP:
|
|
switch (cmsg->cmsg_type)
|
|
{
|
|
case UDP_SEGMENT:
|
|
vec_add2 (at, adh, sizeof (*adh));
|
|
adh->data_type = VCL_UDP_SEGMENT;
|
|
adh->data_len = sizeof (uint16_t);
|
|
vec_add2 (at, ad, sizeof (uint16_t));
|
|
*(uint16_t *) ad = *(uint16_t *) CMSG_DATA (cmsg);
|
|
break;
|
|
default:
|
|
LDBG (1, "SOL_UDP cmsg_type %u not supported", cmsg->cmsg_type);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_IP:
|
|
switch (cmsg->cmsg_type)
|
|
{
|
|
case IP_PKTINFO:
|
|
vec_add2 (at, adh, sizeof (*adh));
|
|
adh->data_type = VCL_IP_PKTINFO;
|
|
adh->data_len = sizeof (struct in_addr);
|
|
vec_add2 (at, ad, sizeof (struct in_addr));
|
|
pi = (void *) CMSG_DATA (cmsg);
|
|
clib_memcpy_fast (ad, &pi->ipi_spec_dst,
|
|
sizeof (struct in_addr));
|
|
break;
|
|
default:
|
|
LDBG (1, "SOL_IP cmsg_type %u not supported", cmsg->cmsg_type);
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
LDBG (1, "cmsg_level %u not supported", cmsg->cmsg_level);
|
|
break;
|
|
}
|
|
cmsg = CMSG_NXTHDR ((struct msghdr *) msg, cmsg);
|
|
}
|
|
*app_tlvs = (vppcom_endpt_tlv_t *) at;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
ldp_make_cmsg (vls_handle_t vlsh, struct msghdr *msg)
|
|
{
|
|
u32 optval, optlen = sizeof (optval);
|
|
struct cmsghdr *cmsg;
|
|
|
|
cmsg = CMSG_FIRSTHDR (msg);
|
|
memset (cmsg, 0, sizeof (*cmsg));
|
|
|
|
if (!vls_attr (vlsh, VPPCOM_ATTR_GET_IP_PKTINFO, (void *) &optval, &optlen))
|
|
return 0;
|
|
|
|
if (optval)
|
|
{
|
|
vppcom_endpt_t ep;
|
|
u8 addr_buf[sizeof (struct in_addr)];
|
|
u32 size = sizeof (ep);
|
|
|
|
ep.ip = addr_buf;
|
|
|
|
if (!vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size))
|
|
{
|
|
struct in_pktinfo pi = {};
|
|
|
|
clib_memcpy (&pi.ipi_addr, ep.ip, sizeof (struct in_addr));
|
|
cmsg->cmsg_level = SOL_IP;
|
|
cmsg->cmsg_type = IP_PKTINFO;
|
|
cmsg->cmsg_len = CMSG_LEN (sizeof (pi));
|
|
clib_memcpy (CMSG_DATA (cmsg), &pi, sizeof (pi));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
ssize_t
|
|
sendmsg (int fd, const struct msghdr * msg, int flags)
|
|
{
|
|
vls_handle_t vlsh;
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
vppcom_endpt_tlv_t *app_tlvs = 0;
|
|
struct iovec *iov = msg->msg_iov;
|
|
ssize_t total = 0;
|
|
int i, rv = 0;
|
|
|
|
ldp_parse_cmsg (vlsh, msg, &app_tlvs);
|
|
|
|
for (i = 0; i < msg->msg_iovlen; ++i)
|
|
{
|
|
rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, app_tlvs,
|
|
flags, msg->msg_name, msg->msg_namelen);
|
|
if (rv < 0)
|
|
break;
|
|
else
|
|
{
|
|
total += rv;
|
|
if (rv < iov[i].iov_len)
|
|
break;
|
|
}
|
|
}
|
|
|
|
vec_free (app_tlvs);
|
|
|
|
if (rv < 0 && total == 0)
|
|
{
|
|
errno = -rv;
|
|
size = -1;
|
|
}
|
|
else
|
|
size = total;
|
|
}
|
|
else
|
|
{
|
|
size = libc_sendmsg (fd, msg, flags);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
#ifdef _GNU_SOURCE
|
|
int
|
|
sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
|
|
{
|
|
ssize_t size;
|
|
const char *func_str;
|
|
u32 sh = ldp_fd_to_vlsh (fd);
|
|
|
|
ldp_init_check ();
|
|
|
|
if (sh != VLS_INVALID_HANDLE)
|
|
{
|
|
clib_warning ("LDP<%d>: LDP-TBD", getpid ());
|
|
errno = ENOSYS;
|
|
size = -1;
|
|
}
|
|
else
|
|
{
|
|
func_str = "libc_sendmmsg";
|
|
|
|
if (LDP_DEBUG > 2)
|
|
clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
|
|
"vmessages %p, vlen %u, flags 0x%x",
|
|
getpid (), fd, fd, func_str, vmessages, vlen, flags);
|
|
|
|
size = libc_sendmmsg (fd, vmessages, vlen, flags);
|
|
}
|
|
|
|
if (LDP_DEBUG > 2)
|
|
{
|
|
if (size < 0)
|
|
{
|
|
int errno_val = errno;
|
|
clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
|
|
"rv %d, errno = %d", getpid (), fd, fd,
|
|
func_str, size, errno_val);
|
|
errno = errno_val;
|
|
}
|
|
else
|
|
clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
|
|
getpid (), fd, fd, size, size);
|
|
}
|
|
return size;
|
|
}
|
|
#endif
|
|
|
|
ssize_t
|
|
recvmsg (int fd, struct msghdr * msg, int flags)
|
|
{
|
|
vls_handle_t vlsh;
|
|
ssize_t size;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
struct iovec *iov = msg->msg_iov;
|
|
ssize_t max_deq, total = 0;
|
|
int i, rv = 0;
|
|
|
|
max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
|
|
if (!max_deq)
|
|
return 0;
|
|
|
|
for (i = 0; i < msg->msg_iovlen; i++)
|
|
{
|
|
rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
|
|
(i == 0 ? msg->msg_name : NULL),
|
|
(i == 0 ? &msg->msg_namelen : NULL));
|
|
if (rv <= 0)
|
|
break;
|
|
else
|
|
{
|
|
total += rv;
|
|
if (rv < iov[i].iov_len)
|
|
break;
|
|
}
|
|
if (total >= max_deq)
|
|
break;
|
|
}
|
|
|
|
if (rv < 0 && total == 0)
|
|
{
|
|
errno = -rv;
|
|
size = -1;
|
|
}
|
|
else
|
|
{
|
|
if (msg->msg_controllen)
|
|
ldp_make_cmsg (vlsh, msg);
|
|
size = total;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size = libc_recvmsg (fd, msg, flags);
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
#ifdef _GNU_SOURCE
|
|
int
|
|
recvmmsg (int fd, struct mmsghdr *vmessages,
|
|
unsigned int vlen, int flags, struct timespec *tmo)
|
|
{
|
|
ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
|
|
u32 sh;
|
|
|
|
ldp_init_check ();
|
|
|
|
sh = ldp_fd_to_vlsh (fd);
|
|
|
|
if (sh != VLS_INVALID_HANDLE)
|
|
{
|
|
struct mmsghdr *mh;
|
|
ssize_t rv = 0;
|
|
u32 nvecs = 0;
|
|
f64 time_out;
|
|
|
|
if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
|
|
clib_time_init (&ldpw->clib_time);
|
|
if (tmo)
|
|
{
|
|
time_out = (f64) tmo->tv_sec + (f64) tmo->tv_nsec / (f64) 1e9;
|
|
time_out += clib_time_now (&ldpw->clib_time);
|
|
}
|
|
else
|
|
{
|
|
time_out = (f64) ~0;
|
|
}
|
|
|
|
while (nvecs < vlen)
|
|
{
|
|
mh = &vmessages[nvecs];
|
|
rv = recvmsg (fd, &mh->msg_hdr, flags);
|
|
if (rv > 0)
|
|
{
|
|
mh->msg_len = rv;
|
|
nvecs += 1;
|
|
continue;
|
|
}
|
|
|
|
if (!time_out || clib_time_now (&ldpw->clib_time) >= time_out)
|
|
break;
|
|
|
|
usleep (1);
|
|
}
|
|
|
|
return nvecs > 0 ? nvecs : rv;
|
|
}
|
|
else
|
|
{
|
|
return libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
int
|
|
getsockopt (int fd, int level, int optname,
|
|
void *__restrict optval, socklen_t * __restrict optlen)
|
|
{
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
rv = -EOPNOTSUPP;
|
|
|
|
switch (level)
|
|
{
|
|
case SOL_TCP:
|
|
switch (optname)
|
|
{
|
|
case TCP_NODELAY:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY,
|
|
optval, optlen);
|
|
break;
|
|
case TCP_MAXSEG:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS,
|
|
optval, optlen);
|
|
break;
|
|
case TCP_KEEPIDLE:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE,
|
|
optval, optlen);
|
|
break;
|
|
case TCP_KEEPINTVL:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL,
|
|
optval, optlen);
|
|
break;
|
|
case TCP_INFO:
|
|
/* Note: tcp_info in netinet/tcp.h and linux/tcp.h have
|
|
* different lenghts but overlap. Accept both for now */
|
|
if (optval && optlen)
|
|
{
|
|
LDBG (1,
|
|
"fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, "
|
|
"optlen %d: #LDP-NOP#",
|
|
fd, vlsh, optval, *optlen);
|
|
memset (optval, 0, *optlen);
|
|
rv = VPPCOM_OK;
|
|
}
|
|
else
|
|
rv = -EFAULT;
|
|
break;
|
|
case TCP_CONGESTION:
|
|
*optlen = strlen ("cubic");
|
|
strncpy (optval, "cubic", *optlen + 1);
|
|
rv = 0;
|
|
break;
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, "
|
|
"optname %d unsupported!", fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_IP:
|
|
switch (optname)
|
|
{
|
|
case SO_ORIGINAL_DST:
|
|
rv =
|
|
vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
|
|
break;
|
|
default:
|
|
LDBG (0,
|
|
"ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
|
|
"optname %d unsupported!",
|
|
fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_IPV6:
|
|
switch (optname)
|
|
{
|
|
case IPV6_V6ONLY:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen);
|
|
break;
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u "
|
|
"optname %d unsupported!", fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_SOCKET:
|
|
switch (optname)
|
|
{
|
|
case SO_ACCEPTCONN:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen);
|
|
break;
|
|
case SO_KEEPALIVE:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen);
|
|
break;
|
|
case SO_PROTOCOL:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen);
|
|
*(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM;
|
|
break;
|
|
case SO_SNDBUF:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN,
|
|
optval, optlen);
|
|
break;
|
|
case SO_RCVBUF:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN,
|
|
optval, optlen);
|
|
break;
|
|
case SO_REUSEADDR:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen);
|
|
break;
|
|
case SO_REUSEPORT:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEPORT, optval, optlen);
|
|
break;
|
|
case SO_BROADCAST:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen);
|
|
break;
|
|
case SO_DOMAIN:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_DOMAIN, optval, optlen);
|
|
break;
|
|
case SO_ERROR:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen);
|
|
break;
|
|
case SO_BINDTODEVICE:
|
|
rv = 0;
|
|
break;
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u "
|
|
"optname %d unsupported!", fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rv = libc_getsockopt (fd, level, optname, optval, optlen);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
setsockopt (int fd, int level, int optname,
|
|
const void *optval, socklen_t optlen)
|
|
{
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
rv = -EOPNOTSUPP;
|
|
|
|
switch (level)
|
|
{
|
|
case SOL_TCP:
|
|
switch (optname)
|
|
{
|
|
case TCP_NODELAY:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case TCP_MAXSEG:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case TCP_KEEPIDLE:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case TCP_KEEPINTVL:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case TCP_CONGESTION:
|
|
case TCP_CORK:
|
|
/* Ignore */
|
|
rv = 0;
|
|
break;
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u"
|
|
"optname %d unsupported!", fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_IPV6:
|
|
switch (optname)
|
|
{
|
|
case IPV6_V6ONLY:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u"
|
|
"optname %d unsupported!", fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_SOCKET:
|
|
switch (optname)
|
|
{
|
|
case SO_KEEPALIVE:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case SO_REUSEADDR:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case SO_REUSEPORT:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEPORT, (void *) optval,
|
|
&optlen);
|
|
break;
|
|
case SO_BROADCAST:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST,
|
|
(void *) optval, &optlen);
|
|
break;
|
|
case SO_LINGER:
|
|
rv = 0;
|
|
break;
|
|
default:
|
|
LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u "
|
|
"optname %d unsupported!", fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
case SOL_IP:
|
|
switch (optname)
|
|
{
|
|
case IP_PKTINFO:
|
|
rv = vls_attr (vlsh, VPPCOM_ATTR_SET_IP_PKTINFO, (void *) optval,
|
|
&optlen);
|
|
break;
|
|
default:
|
|
LDBG (0,
|
|
"ERROR: fd %d: setsockopt SOL_IP: vlsh %u optname %d"
|
|
"unsupported!",
|
|
fd, vlsh, optname);
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rv = libc_setsockopt (fd, level, optname, optval, optlen);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
listen (int fd, int n)
|
|
{
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n);
|
|
|
|
rv = vls_listen (vlsh, n);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n);
|
|
rv = libc_listen (fd, n);
|
|
}
|
|
|
|
LDBG (1, "fd %d: returning %d", fd, rv);
|
|
return rv;
|
|
}
|
|
|
|
static inline int
|
|
ldp_accept4 (int listen_fd, __SOCKADDR_ARG _addr,
|
|
socklen_t *__restrict addr_len, int flags)
|
|
{
|
|
struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
|
|
vls_handle_t listen_vlsh, accept_vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
listen_vlsh = ldp_fd_to_vlsh (listen_fd);
|
|
if (listen_vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
vppcom_endpt_t ep;
|
|
u8 src_addr[sizeof (struct sockaddr_in6)];
|
|
memset (&ep, 0, sizeof (ep));
|
|
ep.ip = src_addr;
|
|
|
|
LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u,"
|
|
" ep %p, flags 0x%x", listen_fd, listen_vlsh, &ep, flags);
|
|
|
|
accept_vlsh = vls_accept (listen_vlsh, &ep, flags);
|
|
if (accept_vlsh < 0)
|
|
{
|
|
errno = -accept_vlsh;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
(void) vls_close (accept_vlsh);
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
rv = ldp_vlsh_to_fd (accept_vlsh);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p,"
|
|
" flags 0x%x", listen_fd, addr, addr_len, flags);
|
|
|
|
rv = libc_accept4 (listen_fd, addr, addr_len, flags);
|
|
}
|
|
|
|
LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv);
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len,
|
|
int flags)
|
|
{
|
|
return ldp_accept4 (fd, addr, addr_len, flags);
|
|
}
|
|
|
|
int
|
|
accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
|
|
{
|
|
return ldp_accept4 (fd, addr, addr_len, 0);
|
|
}
|
|
|
|
int
|
|
shutdown (int fd, int how)
|
|
{
|
|
vls_handle_t vlsh;
|
|
int rv = 0;
|
|
|
|
ldp_init_check ();
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how);
|
|
rv = vls_shutdown (vlsh, how);
|
|
}
|
|
else
|
|
{
|
|
LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how);
|
|
rv = libc_shutdown (fd, how);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
epoll_create1 (int flags)
|
|
{
|
|
ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
|
|
vls_handle_t vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ())
|
|
{
|
|
rv = libc_epoll_create1 (flags);
|
|
ldp->vcl_needs_real_epoll = 0;
|
|
/* Assume this is a request to create the mq epfd */
|
|
ldpw->vcl_mq_epfd = rv;
|
|
LDBG (0, "created vcl epfd %u", rv);
|
|
return rv;
|
|
}
|
|
|
|
vlsh = vls_epoll_create ();
|
|
if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE))
|
|
{
|
|
errno = -vlsh;
|
|
rv = -1;
|
|
}
|
|
else
|
|
{
|
|
rv = ldp_vlsh_to_fd (vlsh);
|
|
}
|
|
LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh);
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
epoll_create (int size)
|
|
{
|
|
return epoll_create1 (0);
|
|
}
|
|
|
|
int
|
|
epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
|
|
{
|
|
vls_handle_t vep_vlsh, vlsh;
|
|
int rv;
|
|
|
|
ldp_init_check ();
|
|
|
|
vep_vlsh = ldp_fd_to_vlsh (epfd);
|
|
if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE))
|
|
{
|
|
/* The LDP epoll_create1 always creates VCL epfd's.
|
|
* The app should never have a kernel base epoll fd unless it
|
|
* was acquired outside of the LD_PRELOAD process context.
|
|
* In any case, if we get one, punt it to libc_epoll_ctl.
|
|
*/
|
|
LDBG (1,
|
|
"epfd %d: calling libc_epoll_ctl: op %d, fd %d"
|
|
" events 0x%x",
|
|
epfd, op, fd, event ? event->events : 0);
|
|
|
|
rv = libc_epoll_ctl (epfd, op, fd, event);
|
|
goto done;
|
|
}
|
|
|
|
vlsh = ldp_fd_to_vlsh (fd);
|
|
|
|
LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd,
|
|
vlsh, op);
|
|
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
LDBG (1,
|
|
"epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
|
|
" events 0x%x",
|
|
epfd, vep_vlsh, op, vlsh, event ? event->events : 0);
|
|
|
|
rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
|
|
if (rv != VPPCOM_OK)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int libc_epfd;
|
|
|
|
libc_epfd = vls_get_libc_epfd (vep_vlsh);
|
|
if (!libc_epfd)
|
|
{
|
|
LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
|
|
"EPOLL_CLOEXEC", epfd, vep_vlsh);
|
|
|
|
libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
|
|
if (libc_epfd < 0)
|
|
{
|
|
rv = libc_epfd;
|
|
goto done;
|
|
}
|
|
|
|
rv = vls_set_libc_epfd (vep_vlsh, libc_epfd);
|
|
if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
}
|
|
else if (PREDICT_FALSE (libc_epfd < 0))
|
|
{
|
|
errno = -epfd;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d,"
|
|
" event %p", epfd, libc_epfd, op, fd, event);
|
|
|
|
rv = libc_epoll_ctl (libc_epfd, op, fd, event);
|
|
}
|
|
|
|
done:
|
|
return rv;
|
|
}
|
|
|
|
static inline int
|
|
ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
|
|
int timeout, const sigset_t * sigmask)
|
|
{
|
|
ldp_worker_ctx_t *ldpw;
|
|
double time_to_wait = (double) 0, max_time;
|
|
int libc_epfd, rv = 0;
|
|
vls_handle_t ep_vlsh;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (PREDICT_FALSE (!events || (timeout < -1)))
|
|
{
|
|
errno = EFAULT;
|
|
return -1;
|
|
}
|
|
|
|
if (PREDICT_FALSE (vppcom_worker_index () == ~0))
|
|
vls_register_vcl_worker ();
|
|
|
|
ldpw = ldp_worker_get_current ();
|
|
if (epfd == ldpw->vcl_mq_epfd)
|
|
return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
|
|
|
|
ep_vlsh = ldp_fd_to_vlsh (epfd);
|
|
if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
|
|
{
|
|
LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
|
|
errno = EBADFD;
|
|
return -1;
|
|
}
|
|
|
|
if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
|
|
clib_time_init (&ldpw->clib_time);
|
|
time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0);
|
|
max_time = clib_time_now (&ldpw->clib_time) + time_to_wait;
|
|
|
|
libc_epfd = vls_get_libc_epfd (ep_vlsh);
|
|
if (PREDICT_FALSE (libc_epfd < 0))
|
|
{
|
|
errno = -libc_epfd;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, "
|
|
"timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh,
|
|
libc_epfd, events, maxevents, timeout, sigmask, time_to_wait);
|
|
do
|
|
{
|
|
if (!ldpw->epoll_wait_vcl)
|
|
{
|
|
rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0);
|
|
if (rv > 0)
|
|
{
|
|
ldpw->epoll_wait_vcl = 1;
|
|
goto done;
|
|
}
|
|
else if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
}
|
|
else
|
|
ldpw->epoll_wait_vcl = 0;
|
|
|
|
if (libc_epfd > 0)
|
|
{
|
|
rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask);
|
|
if (rv != 0)
|
|
goto done;
|
|
}
|
|
}
|
|
while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time));
|
|
|
|
done:
|
|
return rv;
|
|
}
|
|
|
|
static inline int
|
|
ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
|
|
int maxevents, int timeout, const sigset_t * sigmask)
|
|
{
|
|
int libc_epfd, rv = 0, num_ev, libc_num_ev, vcl_wups = 0;
|
|
struct epoll_event *libc_evts;
|
|
ldp_worker_ctx_t *ldpw;
|
|
vls_handle_t ep_vlsh;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (PREDICT_FALSE (!events || (timeout < -1)))
|
|
{
|
|
errno = EFAULT;
|
|
return -1;
|
|
}
|
|
|
|
/* Make sure the vcl worker is valid. Could be that epoll fd was created on
|
|
* one thread but it is now used on another */
|
|
if (PREDICT_FALSE (vppcom_worker_index () == ~0))
|
|
vls_register_vcl_worker ();
|
|
|
|
ldpw = ldp_worker_get_current ();
|
|
if (epfd == ldpw->vcl_mq_epfd)
|
|
return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
|
|
|
|
ep_vlsh = ldp_fd_to_vlsh (epfd);
|
|
if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
|
|
{
|
|
LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
|
|
errno = EBADFD;
|
|
return -1;
|
|
}
|
|
|
|
libc_epfd = vls_get_libc_epfd (ep_vlsh);
|
|
if (PREDICT_FALSE (!libc_epfd))
|
|
{
|
|
LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
|
|
"EPOLL_CLOEXEC", epfd, ep_vlsh);
|
|
libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
|
|
if (libc_epfd < 0)
|
|
{
|
|
rv = libc_epfd;
|
|
goto done;
|
|
}
|
|
|
|
rv = vls_set_libc_epfd (ep_vlsh, libc_epfd);
|
|
if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
}
|
|
if (PREDICT_FALSE (libc_epfd <= 0))
|
|
{
|
|
errno = -libc_epfd;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
if (PREDICT_FALSE (!ldpw->mq_epfd_added))
|
|
{
|
|
struct epoll_event e = { 0 };
|
|
ldpw->vcl_mq_epfd = vppcom_mq_epoll_fd ();
|
|
e.events = EPOLLIN;
|
|
e.data.fd = ldpw->vcl_mq_epfd;
|
|
if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) <
|
|
0)
|
|
{
|
|
LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d",
|
|
epfd, ldpw->vcl_mq_epfd, libc_epfd);
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
ldpw->mq_epfd_added = 1;
|
|
}
|
|
|
|
/* Request to only drain unhandled to prevent libc_epoll_wait starved */
|
|
rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2);
|
|
if (rv > 0)
|
|
{
|
|
timeout = 0;
|
|
if (rv >= maxevents)
|
|
goto done;
|
|
maxevents -= rv;
|
|
}
|
|
else if (PREDICT_FALSE (rv < 0))
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
|
|
epoll_again:
|
|
|
|
libc_evts = &events[rv];
|
|
libc_num_ev =
|
|
libc_epoll_pwait (libc_epfd, libc_evts, maxevents, timeout, sigmask);
|
|
if (libc_num_ev <= 0)
|
|
{
|
|
rv = rv >= 0 ? rv : -1;
|
|
goto done;
|
|
}
|
|
|
|
for (int i = 0; i < libc_num_ev; i++)
|
|
{
|
|
if (libc_evts[i].data.fd == ldpw->vcl_mq_epfd)
|
|
{
|
|
/* We should remove mq epoll fd from events. */
|
|
libc_num_ev--;
|
|
if (i != libc_num_ev)
|
|
{
|
|
libc_evts[i].events = libc_evts[libc_num_ev].events;
|
|
libc_evts[i].data.u64 = libc_evts[libc_num_ev].data.u64;
|
|
}
|
|
num_ev = vls_epoll_wait (ep_vlsh, &libc_evts[libc_num_ev],
|
|
maxevents - libc_num_ev, 0);
|
|
if (PREDICT_TRUE (num_ev > 0))
|
|
rv += num_ev;
|
|
/* Woken up by vcl but no events generated. Accept it once */
|
|
if (rv == 0 && libc_num_ev == 0 && timeout && vcl_wups++ < 1)
|
|
goto epoll_again;
|
|
break;
|
|
}
|
|
}
|
|
|
|
rv += libc_num_ev;
|
|
|
|
done:
|
|
return rv;
|
|
}
|
|
|
|
int
|
|
epoll_pwait (int epfd, struct epoll_event *events,
|
|
int maxevents, int timeout, const sigset_t * sigmask)
|
|
{
|
|
if (vls_use_eventfd ())
|
|
return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout,
|
|
sigmask);
|
|
else
|
|
return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
|
|
}
|
|
|
|
int
|
|
epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout)
|
|
{
|
|
if (vls_use_eventfd ())
|
|
return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL);
|
|
else
|
|
return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL);
|
|
}
|
|
|
|
int
|
|
poll (struct pollfd *fds, nfds_t nfds, int timeout)
|
|
{
|
|
int rv, i, n_revents = 0;
|
|
ldp_worker_ctx_t *ldpw;
|
|
vls_handle_t vlsh;
|
|
vcl_poll_t *vp;
|
|
double max_time;
|
|
|
|
ldp_init_check ();
|
|
|
|
if (PREDICT_FALSE (vppcom_worker_index () == ~0))
|
|
vls_register_vcl_worker ();
|
|
|
|
ldpw = ldp_worker_get_current ();
|
|
|
|
LDBG (3, "fds %p, nfds %ld, timeout %d", fds, nfds, timeout);
|
|
|
|
if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
|
|
clib_time_init (&ldpw->clib_time);
|
|
|
|
max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0;
|
|
max_time += clib_time_now (&ldpw->clib_time);
|
|
|
|
for (i = 0; i < nfds; i++)
|
|
{
|
|
if (fds[i].fd < 0)
|
|
continue;
|
|
|
|
vlsh = ldp_fd_to_vlsh (fds[i].fd);
|
|
if (vlsh != VLS_INVALID_HANDLE)
|
|
{
|
|
fds[i].fd = -fds[i].fd;
|
|
vec_add2 (ldpw->vcl_poll, vp, 1);
|
|
vp->fds_ndx = i;
|
|
vp->sh = vlsh_to_sh (vlsh);
|
|
vp->events = fds[i].events;
|
|
#ifdef __USE_XOPEN2K
|
|
if (fds[i].events & POLLRDNORM)
|
|
vp->events |= POLLIN;
|
|
if (fds[i].events & POLLWRNORM)
|
|
vp->events |= POLLOUT;
|
|
#endif
|
|
vp->revents = fds[i].revents;
|
|
}
|
|
else
|
|
{
|
|
vec_add1 (ldpw->libc_poll, fds[i]);
|
|
vec_add1 (ldpw->libc_poll_idxs, i);
|
|
}
|
|
}
|
|
|
|
do
|
|
{
|
|
if (vec_len (ldpw->vcl_poll))
|
|
{
|
|
rv = vls_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0);
|
|
if (rv < 0)
|
|
{
|
|
errno = -rv;
|
|
rv = -1;
|
|
goto done;
|
|
}
|
|
else
|
|
n_revents += rv;
|
|
}
|
|
|
|
if (vec_len (ldpw->libc_poll))
|
|
{
|
|
rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0);
|
|
if (rv < 0)
|
|
goto done;
|
|
else
|
|
n_revents += rv;
|
|
}
|
|
|
|
if (n_revents)
|
|
{
|
|
rv = n_revents;
|
|
goto done;
|
|
}
|
|
}
|
|
while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time));
|
|
rv = 0;
|
|
|
|
done:
|
|
vec_foreach (vp, ldpw->vcl_poll)
|
|
{
|
|
fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd;
|
|
fds[vp->fds_ndx].revents = vp->revents;
|
|
#ifdef __USE_XOPEN2K
|
|
if ((fds[vp->fds_ndx].revents & POLLIN) &&
|
|
(fds[vp->fds_ndx].events & POLLRDNORM))
|
|
fds[vp->fds_ndx].revents |= POLLRDNORM;
|
|
if ((fds[vp->fds_ndx].revents & POLLOUT) &&
|
|
(fds[vp->fds_ndx].events & POLLWRNORM))
|
|
fds[vp->fds_ndx].revents |= POLLWRNORM;
|
|
#endif
|
|
}
|
|
vec_reset_length (ldpw->vcl_poll);
|
|
|
|
for (i = 0; i < vec_len (ldpw->libc_poll); i++)
|
|
{
|
|
fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents;
|
|
}
|
|
vec_reset_length (ldpw->libc_poll_idxs);
|
|
vec_reset_length (ldpw->libc_poll);
|
|
|
|
return rv;
|
|
}
|
|
|
|
#ifdef _GNU_SOURCE
|
|
int
|
|
ppoll (struct pollfd *fds, nfds_t nfds,
|
|
const struct timespec *timeout, const sigset_t * sigmask)
|
|
{
|
|
ldp_init_check ();
|
|
|
|
clib_warning ("LDP<%d>: LDP-TBD", getpid ());
|
|
errno = ENOSYS;
|
|
|
|
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void);
|
|
|
|
void DESTRUCTOR_ATTRIBUTE ldp_destructor (void);
|
|
|
|
/*
|
|
* This function is called when the library is loaded
|
|
*/
|
|
void
|
|
ldp_constructor (void)
|
|
{
|
|
swrap_constructor ();
|
|
if (ldp_init () != 0)
|
|
{
|
|
fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n",
|
|
getpid ());
|
|
_exit (1);
|
|
}
|
|
else if (LDP_DEBUG > 0)
|
|
clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ());
|
|
}
|
|
|
|
/*
|
|
* This function is called when the library is unloaded
|
|
*/
|
|
void
|
|
ldp_destructor (void)
|
|
{
|
|
/*
|
|
swrap_destructor ();
|
|
if (ldp->init)
|
|
ldp->init = 0;
|
|
*/
|
|
|
|
/* Don't use clib_warning() here because that calls writev()
|
|
* which will call ldp_init().
|
|
*/
|
|
if (LDP_DEBUG > 0)
|
|
fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n",
|
|
__func__, __LINE__, getpid ());
|
|
}
|
|
|
|
|
|
/*
|
|
* fd.io coding-style-patch-verification: ON
|
|
*
|
|
* Local Variables:
|
|
* eval: (c-set-style "gnu")
|
|
* End:
|
|
*/
|