From 2ab470a77fe73548c1d7f6a0d18ed51fbdba36b4 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 10 Aug 2016 18:38:36 -0400 Subject: [PATCH] VPP-223 Document vlib cooperative multi-tasking threads Change-Id: I283963b004ed6d91133e7e36811f75834280bbe7 Signed-off-by: Dave Barach --- vlib/vlib/node_funcs.h | 28 ++++++- vlib/vlib/vlib_process_doc.h | 147 +++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 vlib/vlib/vlib_process_doc.h diff --git a/vlib/vlib/node_funcs.h b/vlib/vlib/node_funcs.h index b5b7dd5f6ba..265b897eb9b 100644 --- a/vlib/vlib/node_funcs.h +++ b/vlib/vlib/node_funcs.h @@ -410,13 +410,22 @@ vlib_current_process (vlib_main_t * vm) return vlib_get_current_process (vm)->node_runtime.node_index; } -/* Anything less than 1e-6 is considered zero. */ +/** Returns TRUE if a process suspend time is less than 1us + @param dt - remaining poll time in seconds + @returns 1 if dt < 1e-6, 0 otherwise +*/ always_inline uword vlib_process_suspend_time_is_zero (f64 dt) { return dt < 1e-6; } +/** Suspend a vlib cooperative multi-tasking thread for a period of time + @param vm - vlib_main_t * + @param dt - suspend interval in seconds + @returns VLIB_PROCESS_RESUME_LONGJMP_RESUME, routinely ignored +*/ + always_inline uword vlib_process_suspend (vlib_main_t * vm, f64 dt) { @@ -503,7 +512,15 @@ vlib_process_put_event_data (vlib_main_t * vm, void *event_data) vec_add1 (nm->recycled_event_data_vectors, event_data); } -/* Return type & add any events to data vector. */ +/** Return the first event type which has occurred and a vector of per-event + data of that type, or a timeout indication + + @param vm - vlib_main_t pointer + @param data_vector - pointer to a (uword *) vector to receive event data + @returns either an event type and a vector of per-event instance data, + or ~0 to indicate a timeout. +*/ + always_inline uword vlib_process_get_events (vlib_main_t * vm, uword ** data_vector) { @@ -654,6 +671,13 @@ vlib_process_wait_for_event_with_type (vlib_main_t * vm, return vlib_process_get_events_helper (p, h[0], data_vector); } +/** Suspend a cooperative multi-tasking thread + Waits for an event, or for the indicated number of seconds to elapse + @param vm - vlib_main_t pointer + @param dt - timeout, in seconds. + @returns the remaining time interval +*/ + always_inline f64 vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt) { diff --git a/vlib/vlib/vlib_process_doc.h b/vlib/vlib/vlib_process_doc.h new file mode 100644 index 00000000000..953eb0c459d --- /dev/null +++ b/vlib/vlib/vlib_process_doc.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#error do not #include this file! + +/** \file + + Cooperative multi-tasking thread support. + + Vlib provides a lightweight cooperative multi-tasking thread + model. Context switching costs a setjmp/longjump pair. It's not + unreasonable to put vlib threads to sleep for 10us. + + The graph node scheduler invokes these processes in much the same + way as traditional vector-processing run-to-completion graph + nodes; plus-or-minus a setjmp/longjmp pair required to switch + stacks. Simply set the vlib_node_registration_t type field to + VLIB_NODE_TYPE_PROCESS. Process is a misnomer; these are threads. + + As of this writing, the default stack size is 2<<15; + 32kb. Initialize the node registration's + process_log2_n_stack_bytes member as needed. The graph node + dispatcher makes some effort to detect stack overrun. We map a + no-access page below each thread stack. + + Process node dispatch functions are expected to be while(1) { } + loops which suspend when not otherwise occupied, and which must + not run for unreasonably long periods of time. Unreasonably long + is an application-dependent concept. Over the years, we have + constructed frame-size sensitive control-plane nodes which will + use a much higher fraction of the available CPU bandwidth when the + frame size is low. Classic example: modifying forwarding + tables. So long as the table-builder leaves the forwarding tables + in a valid state, one can suspend the table builder to avoid + dropping packets as a result of control-plane activity. + + Process nodes can suspend for fixed amounts of time, or until another + entity signals an event, or both. See the example below. + + When running in VLIB process context, one must pay strict attention to + loop invariant issues. If one walks a data structure and calls a + function which may suspend, one had best know by construction that it + cannot change. Often, it s best to simply make a snapshot copy of a + data structure, walk the copy at leisure, then free the copy. + + Here's an example: + +
+    #define EXAMPLE_POLL_PERIOD 10.0
+
+    static uword
+    example_process (vlib_main_t * vm, vlib_node_runtime_t * rt, 
+                     vlib_frame_t * f)
+    {
+      f64 poll_time_remaining;
+      uword event_type, *event_data = 0;
+
+      poll_time_remaining = EXAMPLE_POLL_PERIOD;
+      while (1)
+        {
+          int i;
+
+           // Sleep until next periodic call due, 
+           // or until we receive event(s) 
+           //
+          poll_time_remaining =
+    	    vlib_process_wait_for_event_or_clock (vm, poll_time_remaining);
+
+          event_type = vlib_process_get_events (vm, &event_data);
+          switch (event_type)
+     	    {
+       	    case ~0:		// no events => timeout
+      	      break;
+
+            case EVENT1:
+    	      for (i = 0; i < vec_len (event_data); i++)
+    	        handle_event1 (mm, event_data[i]);
+    	      break;
+
+    	    case EVENT2:
+    	      for (i = 0; i < vec_len (event_data); i++)
+    	        handle_event2 (vm, event_data[i]);
+    	      break;
+
+              // ... and so forth for each event type 
+
+            default:
+              // This should never happen... 
+    	      clib_warning ("BUG: unhandled event type %d", 
+                            event_type);
+    	      break;
+      	    }
+          vec_reset_length (event_data);
+
+          // Timer expired, call periodic function 
+          if (vlib_process_suspend_time_is_zero (poll_time_remaining))
+    	    {
+    	      example_periodic (vm);
+    	      poll_time_remaining = EXAMPLE_POLL_PERIOD;
+    	    }
+        }
+      // NOTREACHED 
+      return 0;
+    }     
+
+    static VLIB_REGISTER_NODE (example_node) = {
+      .function = example_process,
+      .type = VLIB_NODE_TYPE_PROCESS,
+      .name = "example-process",
+    };
+    
+ + In this example, the VLIB process node waits for an event to + occur, or for 10 seconds to elapse. The code demuxes on the event + type, calling the appropriate handler function. + + Each call to vlib_process_get_events returns a vector of + per-event-type data passed to successive vlib_process_signal_event + calls; vec_len (event_data) >= 1. It is an error to process only + event_data[0]. + + Resetting the event_data vector-length to 0 by calling + vec_reset_length (event_data) - instead of calling vec_free (...) + - means that the event scheme doesn t burn cycles continuously + allocating and freeing the event data vector. This is a common + coding pattern, well worth using when appropriate. +*/ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */