diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index 9bba85fddf4..613e0cad5eb 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -41,6 +41,13 @@ format_function_t format_ooo_list; #define SVM_FIFO_INVALID_SESSION_INDEX ((u32)~0) #define SVM_FIFO_INVALID_INDEX ((u32)~0) +enum +{ + SVM_FIFO_NO_TX_NOTIF = 0, + SVM_FIFO_WANT_TX_NOTIF = 1, + SVM_FIFO_WANT_TX_NOTIF_IF_FULL = 2, +}; + typedef struct { u32 offset; @@ -66,7 +73,8 @@ typedef struct _svm_fifo u32 ct_session_index; /**< Local session index for vpp */ CLIB_CACHE_LINE_ALIGN_MARK (end_shared); u32 head; - volatile u32 want_tx_evt; /**< producer wants nudge */ + volatile u32 want_tx_ntf; /**< producer wants nudge */ + volatile u32 has_tx_ntf; CLIB_CACHE_LINE_ALIGN_MARK (end_consumer); /* producer */ @@ -174,18 +182,6 @@ svm_fifo_unset_event (svm_fifo_t * f) clib_atomic_release (&f->has_event); } -static inline void -svm_fifo_set_want_tx_evt (svm_fifo_t * f, u8 want_evt) -{ - f->want_tx_evt = want_evt; -} - -static inline u8 -svm_fifo_want_tx_evt (svm_fifo_t * f) -{ - return f->want_tx_evt; -} - svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes); void svm_fifo_free (svm_fifo_t * f); @@ -200,27 +196,10 @@ int svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes); void svm_fifo_dequeue_drop_all (svm_fifo_t * f); int svm_fifo_segments (svm_fifo_t * f, svm_fifo_segment_t * fs); void svm_fifo_segments_free (svm_fifo_t * f, svm_fifo_segment_t * fs); -u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); -ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer); void svm_fifo_overwrite_head (svm_fifo_t * f, u8 * data, u32 len); - format_function_t format_svm_fifo; -always_inline ooo_segment_t * -svm_fifo_newest_ooo_segment (svm_fifo_t * f) -{ - if (f->ooos_newest == OOO_SEGMENT_INVALID_INDEX) - return 0; - return pool_elt_at_index (f->ooo_segments, f->ooos_newest); -} - -always_inline void -svm_fifo_newest_ooo_segment_reset (svm_fifo_t * f) -{ - f->ooos_newest = OOO_SEGMENT_INVALID_INDEX; -} - /** * Max contiguous chunk of data that can be read */ @@ -264,6 +243,77 @@ svm_fifo_tail (svm_fifo_t * f) return (f->data + f->tail); } +always_inline u32 +svm_fifo_nitems (svm_fifo_t * f) +{ + return f->nitems; +} + +static inline void +svm_fifo_add_want_tx_ntf (svm_fifo_t * f, u8 ntf_type) +{ + f->want_tx_ntf |= ntf_type; +} + +static inline void +svm_fifo_del_want_tx_ntf (svm_fifo_t * f, u8 ntf_type) +{ + f->want_tx_ntf &= ~ntf_type; +} + +static inline void +svm_fifo_clear_tx_ntf (svm_fifo_t * f) +{ + /* Set the flag if want_tx_notif_if_full was the only ntf requested */ + f->has_tx_ntf = f->want_tx_ntf == SVM_FIFO_WANT_TX_NOTIF_IF_FULL; + svm_fifo_del_want_tx_ntf (f, SVM_FIFO_WANT_TX_NOTIF); +} + +static inline void +svm_fifo_reset_tx_ntf (svm_fifo_t * f) +{ + f->has_tx_ntf = 0; +} + +static inline u8 +svm_fifo_needs_tx_ntf (svm_fifo_t * f, u32 n_last_deq) +{ + u8 want_ntf = f->want_tx_ntf; + + if (PREDICT_TRUE (want_ntf == SVM_FIFO_NO_TX_NOTIF)) + return 0; + else if (want_ntf & SVM_FIFO_WANT_TX_NOTIF) + return 1; + else if (want_ntf & SVM_FIFO_WANT_TX_NOTIF_IF_FULL) + { + u32 max_deq = svm_fifo_max_dequeue (f); + u32 nitems = svm_fifo_nitems (f); + if (!f->has_tx_ntf && max_deq < nitems + && max_deq + n_last_deq >= nitems) + return 1; + + return 0; + } + return 0; +} + +u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); +ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); + +always_inline ooo_segment_t * +svm_fifo_newest_ooo_segment (svm_fifo_t * f) +{ + if (f->ooos_newest == OOO_SEGMENT_INVALID_INDEX) + return 0; + return pool_elt_at_index (f->ooo_segments, f->ooos_newest); +} + +always_inline void +svm_fifo_newest_ooo_segment_reset (svm_fifo_t * f) +{ + f->ooos_newest = OOO_SEGMENT_INVALID_INDEX; +} + always_inline u32 ooo_segment_distance_from_tail (svm_fifo_t * f, u32 pos) { diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 54c5f10284d..1797d93c683 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -1744,9 +1744,9 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n, if (svm_fifo_is_empty (rx_fifo)) svm_fifo_unset_event (rx_fifo); - if (is_ct && svm_fifo_want_tx_evt (rx_fifo)) + if (is_ct && svm_fifo_needs_tx_ntf (rx_fifo, n_read)) { - svm_fifo_set_want_tx_evt (s->rx_fifo, 0); + svm_fifo_clear_tx_ntf (s->rx_fifo); app_send_io_evt_to_vpp (s->vpp_evt_q, s->rx_fifo, SESSION_IO_EVT_CT_RX, SVM_Q_WAIT); } @@ -1959,7 +1959,7 @@ vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n, } while (svm_fifo_is_full (tx_fifo)) { - svm_fifo_set_want_tx_evt (tx_fifo, 1); + svm_fifo_add_want_tx_ntf (tx_fifo, SVM_FIFO_WANT_TX_NOTIF); svm_msg_q_lock (mq); if (svm_msg_q_is_empty (mq)) svm_msg_q_wait (mq); @@ -2351,7 +2351,7 @@ vppcom_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map, bits_set++; } else - svm_fifo_set_want_tx_evt (session->tx_fifo, 1); + svm_fifo_add_want_tx_ntf (session->tx_fifo, SVM_FIFO_WANT_TX_NOTIF); })); check_rd: @@ -2570,6 +2570,10 @@ vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle, session->is_vep_session = 1; vep_session->vep.next_sh = session_handle; + if (session->tx_fifo) + svm_fifo_add_want_tx_ntf (session->tx_fifo, + SVM_FIFO_WANT_TX_NOTIF_IF_FULL); + VDBG (1, "EPOLL_CTL_ADD: vep_sh %u, sh %u, events 0x%x, data 0x%llx!", vep_handle, session_handle, event->events, event->data.u64); vcl_evt (VCL_EVT_EPOLL_CTLADD, session, event->events, event->data.u64); @@ -2655,6 +2659,10 @@ vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle, session->vep.prev_sh = ~0; session->vep.vep_sh = ~0; session->is_vep_session = 0; + + if (session->tx_fifo) + svm_fifo_del_want_tx_ntf (session->tx_fifo, SVM_FIFO_NO_TX_NOTIF); + VDBG (1, "EPOLL_CTL_DEL: vep_idx %u, sid %u!", vep_handle, session_handle); vcl_evt (VCL_EVT_EPOLL_CTLDEL, session, vep_sh); @@ -2708,6 +2716,7 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, add_event = 1; events[*num_ev].events |= EPOLLOUT; session_evt_data = session->vep.ev.data.u64; + svm_fifo_reset_tx_ntf (session->tx_fifo); break; case SESSION_IO_EVT_CT_TX: vcl_fifo_rx_evt_valid_or_break (e->fifo); @@ -2734,6 +2743,7 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, add_event = 1; events[*num_ev].events |= EPOLLOUT; session_evt_data = session->vep.ev.data.u64; + svm_fifo_reset_tx_ntf (session->tx_fifo); break; case SESSION_CTRL_EVT_ACCEPTED: session = vcl_session_accepted (wrk, diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 4cf0f9e7e49..c56712bbf87 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -557,7 +557,11 @@ session_dequeue_notify (stream_session_t * s) if (PREDICT_FALSE (!app)) return -1; - return app_worker_lock_and_send_event (app, s, FIFO_EVENT_APP_TX); + if (app_worker_lock_and_send_event (app, s, FIFO_EVENT_APP_TX)) + return -1; + + svm_fifo_clear_tx_ntf (s->server_tx_fifo); + return 0; } /** diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index cf1b3e99f4d..2bbc380282c 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -176,20 +176,6 @@ typedef struct session_tx_context_ session_dgram_hdr_t hdr; } session_tx_context_t; -/* Forward definition */ -typedef struct _session_manager_main session_manager_main_t; - -typedef int - (session_fifo_rx_fn) (vlib_main_t * vm, vlib_node_runtime_t * node, - session_event_t * e0, stream_session_t * s0, - int *n_tx_pkts); - -extern session_fifo_rx_fn session_tx_fifo_peek_and_snd; -extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd; -extern session_fifo_rx_fn session_tx_fifo_dequeue_internal; - -u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e); - typedef struct session_manager_worker_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -237,7 +223,18 @@ typedef struct session_manager_worker_ } session_manager_worker_t; -struct _session_manager_main +typedef int (session_fifo_rx_fn) (vlib_main_t * vm, + vlib_node_runtime_t * node, + session_manager_worker_t * wrk, + session_event_t * e, int *n_tx_pkts); + +extern session_fifo_rx_fn session_tx_fifo_peek_and_snd; +extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd; +extern session_fifo_rx_fn session_tx_fifo_dequeue_internal; + +u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e); + +typedef struct session_manager_main_ { /** Worker contexts */ session_manager_worker_t *wrk; @@ -297,7 +294,7 @@ struct _session_manager_main f64 *last_event_poll_by_thread; #endif -}; +} session_manager_main_t; extern session_manager_main_t session_manager_main; extern vlib_node_registration_t session_queue_node; diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 880f16388b8..f5e5efeaf0f 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -632,32 +632,30 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, always_inline int session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, - session_event_t * e, - stream_session_t * s, int *n_tx_packets, + session_manager_worker_t * wrk, + session_event_t * e, int *n_tx_packets, u8 peek_data) { u32 next_index, next0, next1, *to_next, n_left_to_next; u32 n_trace = vlib_get_trace_count (vm, node), n_bufs_needed = 0; - u32 thread_index = s->thread_index, n_left, pbi; + u32 thread_index = vm->thread_index, n_left, pbi; session_manager_main_t *smm = &session_manager_main; - session_manager_worker_t *wrk = &smm->wrk[thread_index]; session_tx_context_t *ctx = &wrk->ctx; transport_proto_t tp; vlib_buffer_t *pb; u16 n_bufs, rv; - if (PREDICT_FALSE ((rv = session_tx_not_ready (s, peek_data)))) + if (PREDICT_FALSE ((rv = session_tx_not_ready (ctx->s, peek_data)))) { if (rv < 2) vec_add1 (wrk->pending_event_vector, *e); return SESSION_TX_NO_DATA; } - next_index = smm->session_type_to_next[s->session_type]; + next_index = smm->session_type_to_next[ctx->s->session_type]; next0 = next1 = next_index; - tp = session_get_transport_proto (s); - ctx->s = s; + tp = session_get_transport_proto (ctx->s); ctx->transport_vft = transport_protocol_get_vft (tp); ctx->tc = session_tx_get_transport (ctx, peek_data); ctx->snd_mss = ctx->transport_vft->send_mss (ctx->tc); @@ -679,7 +677,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Allow enqueuing of a new event */ - svm_fifo_unset_event (s->server_tx_fifo); + svm_fifo_unset_event (ctx->s->server_tx_fifo); /* Check how much we can pull. */ session_tx_set_dequeue_params (vm, ctx, VLIB_FRAME_SIZE - *n_tx_packets, @@ -784,7 +782,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (n_trace > 0)) session_tx_trace_frame (vm, node, next_index, to_next, - ctx->n_segs_per_evt, s, n_trace); + ctx->n_segs_per_evt, ctx->s, n_trace); _vec_len (wrk->tx_buffers) = n_bufs; *n_tx_packets += ctx->n_segs_per_evt; @@ -794,18 +792,18 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, /* If we couldn't dequeue all bytes mark as partially read */ ASSERT (ctx->left_to_snd == 0); if (ctx->max_len_to_snd < ctx->max_dequeue) - if (svm_fifo_set_event (s->server_tx_fifo)) + if (svm_fifo_set_event (ctx->s->server_tx_fifo)) vec_add1 (wrk->pending_event_vector, *e); if (!peek_data && ctx->transport_vft->tx_type == TRANSPORT_TX_DGRAM) { /* Fix dgram pre header */ if (ctx->max_len_to_snd < ctx->max_dequeue) - svm_fifo_overwrite_head (s->server_tx_fifo, (u8 *) & ctx->hdr, + svm_fifo_overwrite_head (ctx->s->server_tx_fifo, (u8 *) & ctx->hdr, sizeof (session_dgram_pre_hdr_t)); /* More data needs to be read */ - else if (svm_fifo_max_dequeue (s->server_tx_fifo) > 0) - if (svm_fifo_set_event (s->server_tx_fifo)) + else if (svm_fifo_max_dequeue (ctx->s->server_tx_fifo) > 0) + if (svm_fifo_set_event (ctx->s->server_tx_fifo)) vec_add1 (wrk->pending_event_vector, *e); } return SESSION_TX_OK; @@ -813,27 +811,29 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, int session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, - session_event_t * e, - stream_session_t * s, int *n_tx_pkts) + session_manager_worker_t * wrk, + session_event_t * e, int *n_tx_pkts) { - return session_tx_fifo_read_and_snd_i (vm, node, e, s, n_tx_pkts, 1); + return session_tx_fifo_read_and_snd_i (vm, node, wrk, e, n_tx_pkts, 1); } int session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, - session_event_t * e, - stream_session_t * s, int *n_tx_pkts) + session_manager_worker_t * wrk, + session_event_t * e, int *n_tx_pkts) { - return session_tx_fifo_read_and_snd_i (vm, node, e, s, n_tx_pkts, 0); + return session_tx_fifo_read_and_snd_i (vm, node, wrk, e, n_tx_pkts, 0); } int session_tx_fifo_dequeue_internal (vlib_main_t * vm, vlib_node_runtime_t * node, - session_event_t * e, - stream_session_t * s, int *n_tx_pkts) + session_manager_worker_t * wrk, + session_event_t * e, int *n_tx_pkts) { + stream_session_t *s = wrk->ctx.s; application_t *app; + if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED)) return 0; app = application_get (s->t_app_index); @@ -923,7 +923,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { stream_session_t *s; /* $$$ prefetch 1 ahead maybe */ session_event_t *e; - u8 want_tx_evt; + u8 need_tx_ntf; e = &fifo_events[i]; switch (e->event_type) @@ -943,19 +943,17 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, clib_warning ("session was freed!"); continue; } - - want_tx_evt = svm_fifo_want_tx_evt (s->server_tx_fifo); + wrk->ctx.s = s; /* Spray packets in per session type frames, since they go to * different nodes */ - rv = (smm->session_tx_fns[s->session_type]) (vm, node, e, s, + rv = (smm->session_tx_fns[s->session_type]) (vm, node, wrk, e, &n_tx_packets); if (PREDICT_TRUE (rv == SESSION_TX_OK)) { - if (PREDICT_FALSE (want_tx_evt)) - { - svm_fifo_set_want_tx_evt (s->server_tx_fifo, 0); - session_dequeue_notify (s); - } + need_tx_ntf = svm_fifo_needs_tx_ntf (s->server_tx_fifo, + wrk->ctx.max_len_to_snd); + if (PREDICT_FALSE (need_tx_ntf)) + session_dequeue_notify (s); } else if (PREDICT_FALSE (rv == SESSION_TX_NO_BUFFERS)) { @@ -995,8 +993,10 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, break; case FIFO_EVENT_BUILTIN_TX: s = session_get_from_handle_if_valid (e->session_handle); + wrk->ctx.s = s; if (PREDICT_TRUE (s != 0)) - session_tx_fifo_dequeue_internal (vm, node, e, s, &n_tx_packets); + session_tx_fifo_dequeue_internal (vm, node, wrk, e, + &n_tx_packets); break; case FIFO_EVENT_RPC: fp = e->rpc_args.fp;