Cleanup: use lambdas instead of functors for task pools, remove threadid

2020-06-05 14:18:02 +02:00 · 2020-06-05 14:18:02 +02:00 · b10b7cdb43
commit b10b7cdb43
parent d8c2092b15
11 changed files with 113 additions and 216 deletions
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@ -39,48 +39,6 @@

 CCL_NAMESPACE_BEGIN

-/* BVH Build Task */
-
-class BVHBuildTask : public Task {
- public:
-  BVHBuildTask(
-      BVHBuild *build, InnerNode *node, int child, const BVHObjectBinning &range, int level)
-      : range_(range)
-  {
-    run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range_, level);
-  }
-
- private:
-  BVHObjectBinning range_;
-};
-
-class BVHSpatialSplitBuildTask : public Task {
- public:
-  BVHSpatialSplitBuildTask(BVHBuild *build,
-                           InnerNode *node,
-                           int child,
-                           const BVHRange &range,
-                           const vector<BVHReference> &references,
-                           int level)
-      : range_(range),
-        references_(references.begin() + range.start(), references.begin() + range.end())
-  {
-    range_.set_start(0);
-    run = function_bind(&BVHBuild::thread_build_spatial_split_node,
-                        build,
-                        node,
-                        child,
-                        &range_,
-                        &references_,
-                        level,
-                        _1);
-  }
-
- private:
-  BVHRange range_;
-  vector<BVHReference> references_;
-};
-
 /* Constructor / Destructor */

 BVHBuild::BVHBuild(const vector<Object *> &objects_,
@ -449,7 +407,8 @@ BVHNode *BVHBuild::run()

  if (params.use_spatial_split) {
    /* Perform multithreaded spatial split build. */
-    rootnode = build_node(root, &references, 0, 0);
+    BVHSpatialStorage *local_storage = &spatial_storage.local();
+    rootnode = build_node(root, references, 0, local_storage);
    task_pool.wait_work();
  }
  else {
@ -516,30 +475,36 @@ void BVHBuild::progress_update()
  progress_start_time = time_dt();
 }

-void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
+void BVHBuild::thread_build_node(InnerNode *inner,
+                                 int child,
+                                 const BVHObjectBinning &range,
+                                 int level)
 {
  if (progress.get_cancel())
    return;

  /* build nodes */
-  BVHNode *node = build_node(*range, level);
+  BVHNode *node = build_node(range, level);

  /* set child in inner node */
  inner->children[child] = node;

  /* update progress */
-  if (range->size() < THREAD_TASK_SIZE) {
+  if (range.size() < THREAD_TASK_SIZE) {
    /*rotate(node, INT_MAX, 5);*/

    thread_scoped_lock lock(build_mutex);

-    progress_count += range->size();
+    progress_count += range.size();
    progress_update();
  }
 }

-void BVHBuild::thread_build_spatial_split_node(
-    InnerNode *inner, int child, BVHRange *range, vector<BVHReference> *references, int level)
+void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
+                                               int child,
+                                               const BVHRange &range,
+                                               vector<BVHReference> &references,
+                                               int level)
 {
  if (progress.get_cancel()) {
    return;
@ -549,7 +514,7 @@ void BVHBuild::thread_build_spatial_split_node(
  BVHSpatialStorage *local_storage = &spatial_storage.local();

  /* build nodes */
-  BVHNode *node = build_node(*range, references, level, local_storage);
+  BVHNode *node = build_node(range, references, level, local_storage);

  /* set child in inner node */
  inner->children[child] = node;
@ -661,8 +626,8 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
    /* Threaded build */
    inner = new InnerNode(bounds);

-    task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
-    task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
+    task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); }, true);
+    task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); }, true);
  }

  if (do_unalinged_split) {
@ -674,7 +639,7 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)

 /* multithreaded spatial split builder */
 BVHNode *BVHBuild::build_node(const BVHRange &range,
-                              vector<BVHReference> *references,
+                              vector<BVHReference> &references,
                              int level,
                              BVHSpatialStorage *storage)
 {
@ -693,7 +658,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
  if (!(range.size() > 0 && params.top_level && level == 0)) {
    if (params.small_enough_for_leaf(range.size(), level)) {
      progress_count += range.size();
-      return create_leaf_node(range, *references);
+      return create_leaf_node(range, references);
    }
  }

@ -703,7 +668,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
  if (!(range.size() > 0 && params.top_level && level == 0)) {
    if (split.no_split) {
      progress_count += range.size();
-      return create_leaf_node(range, *references);
+      return create_leaf_node(range, references);
    }
  }
  float leafSAH = params.sah_primitive_cost * split.leafSAH;
@ -716,7 +681,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
  Transform aligned_space;
  bool do_unalinged_split = false;
  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
-    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references->at(0));
+    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0));
    unaligned_split = BVHMixedSplit(
        this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
    /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
@ -742,8 +707,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,

  BoundBox bounds;
  if (do_unalinged_split) {
-    bounds = unaligned_heuristic.compute_aligned_boundbox(
-        range, &references->at(0), aligned_space);
+    bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space);
  }
  else {
    bounds = range.bounds();
@ -755,23 +719,38 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
    /* Local build. */

    /* Build left node. */
-    vector<BVHReference> copy(references->begin() + right.start(),
-                              references->begin() + right.end());
+    vector<BVHReference> right_references(references.begin() + right.start(),
+                                          references.begin() + right.end());
    right.set_start(0);

-    BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
+    BVHNode *leftnode = build_node(left, references, level + 1, storage);

    /* Build right node. */
-    BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
+    BVHNode *rightnode = build_node(right, right_references, level + 1, storage);

    inner = new InnerNode(bounds, leftnode, rightnode);
  }
  else {
    /* Threaded build. */
    inner = new InnerNode(bounds);
-    task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 0, left, *references, level + 1),
+
+    vector<BVHReference> left_references(references.begin() + left.start(),
+                                         references.begin() + left.end());
+    vector<BVHReference> right_references(references.begin() + right.start(),
+                                          references.begin() + right.end());
+    right.set_start(0);
+
+    /* Create tasks for left and right nodes, using copy for most arguments and
+     * move for reference to avoid memory copies. */
+    task_pool.push(
+        [=, refs = std::move(left_references)]() mutable {
+          thread_build_spatial_split_node(inner, 0, left, refs, level + 1);
+        },
        true);
-    task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 1, right, *references, level + 1),
+    task_pool.push(
+        [=, refs = std::move(right_references)]() mutable {
+          thread_build_spatial_split_node(inner, 1, right, refs, level + 1);
+        },
        true);
  }

--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@ -74,7 +74,7 @@ class BVHBuild {

  /* Building. */
  BVHNode *build_node(const BVHRange &range,
-                      vector<BVHReference> *references,
+                      vector<BVHReference> &references,
                      int level,
                      BVHSpatialStorage *storage);
  BVHNode *build_node(const BVHObjectBinning &range, int level);
@ -86,9 +86,12 @@ class BVHBuild {

  /* Threads. */
  enum { THREAD_TASK_SIZE = 4096 };
-  void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
-  void thread_build_spatial_split_node(
-      InnerNode *node, int child, BVHRange *range, vector<BVHReference> *references, int level);
+  void thread_build_node(InnerNode *node, int child, const BVHObjectBinning &range, int level);
+  void thread_build_spatial_split_node(InnerNode *node,
+                                       int child,
+                                       const BVHRange &range,
+                                       vector<BVHReference> &references,
+                                       int level);
  thread_mutex build_mutex;

  /* Progress. */
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@ -88,18 +88,6 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
                                        const int job_end,
                                        const BVHReferenceCompare &compare);

-class BVHSortTask : public Task {
- public:
-  BVHSortTask(TaskPool *task_pool,
-              BVHReference *data,
-              const int job_start,
-              const int job_end,
-              const BVHReferenceCompare &compare)
-  {
-    run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
-  }
-};
-
 /* Multi-threaded reference sort. */
 static void bvh_reference_sort_threaded(TaskPool *task_pool,
                                        BVHReference *data,
@ -158,7 +146,8 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
    have_work = false;
    if (left < end) {
      if (start < right) {
-        task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
+        task_pool->push(
+            function_bind(bvh_reference_sort_threaded, task_pool, data, left, end, compare), true);
      }
      else {
        start = left;
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@ -33,7 +33,7 @@ CCL_NAMESPACE_BEGIN
 BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
                               BVHSpatialStorage *storage,
                               const BVHRange &range,
-                               vector<BVHReference> *references,
+                               vector<BVHReference> &references,
                               float nodeSAH,
                               const BVHUnaligned *unaligned_heuristic,
                               const Transform *aligned_space)
@ -43,7 +43,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
      left_bounds(BoundBox::empty),
      right_bounds(BoundBox::empty),
      storage_(storage),
-      references_(references),
+      references_(&references),
      unaligned_heuristic_(unaligned_heuristic),
      aligned_space_(aligned_space)
 {
@ -133,7 +133,7 @@ void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &rang
 BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
                                 BVHSpatialStorage *storage,
                                 const BVHRange &range,
-                                 vector<BVHReference> *references,
+                                 vector<BVHReference> &references,
                                 float nodeSAH,
                                 const BVHUnaligned *unaligned_heuristic,
                                 const Transform *aligned_space)
@ -141,7 +141,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
      dim(0),
      pos(0.0f),
      storage_(storage),
-      references_(references),
+      references_(&references),
      unaligned_heuristic_(unaligned_heuristic),
      aligned_space_(aligned_space)
 {
@ -152,7 +152,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
  }
  else {
    range_bounds = unaligned_heuristic->compute_aligned_boundbox(
-        range, &references->at(0), *aligned_space);
+        range, &references_->at(0), *aligned_space);
  }

  float3 origin = range_bounds.min;
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@ -44,7 +44,7 @@ class BVHObjectSplit {
  BVHObjectSplit(BVHBuild *builder,
                 BVHSpatialStorage *storage,
                 const BVHRange &range,
-                 vector<BVHReference> *references,
+                 vector<BVHReference> &references,
                 float nodeSAH,
                 const BVHUnaligned *unaligned_heuristic = NULL,
                 const Transform *aligned_space = NULL);
@ -82,7 +82,7 @@ class BVHSpatialSplit {
  BVHSpatialSplit(const BVHBuild &builder,
                  BVHSpatialStorage *storage,
                  const BVHRange &range,
-                  vector<BVHReference> *references,
+                  vector<BVHReference> &references,
                  float nodeSAH,
                  const BVHUnaligned *unaligned_heuristic = NULL,
                  const Transform *aligned_space = NULL);
@ -187,7 +187,7 @@ class BVHMixedSplit {
  __forceinline BVHMixedSplit(BVHBuild *builder,
                              BVHSpatialStorage *storage,
                              const BVHRange &range,
-                              vector<BVHReference> *references,
+                              vector<BVHReference> &references,
                              int level,
                              const BVHUnaligned *unaligned_heuristic = NULL,
                              const Transform *aligned_space = NULL)
@ -197,7 +197,7 @@ class BVHMixedSplit {
    }
    else {
      bounds = unaligned_heuristic->compute_aligned_boundbox(
-          range, &references->at(0), *aligned_space);
+          range, &references.at(0), *aligned_space);
    }
    /* find split candidates. */
    float area = bounds.safe_area();
@ -220,7 +220,7 @@ class BVHMixedSplit {

    /* leaf SAH is the lowest => create leaf. */
    minSAH = min(min(leafSAH, object.sah), spatial.sah);
-    no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
+    no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, references));
  }

  __forceinline void split(BVHBuild *builder,
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@ -2401,16 +2401,6 @@ void CUDADevice::thread_run(DeviceTask &task)
  }
 }

-class CUDADeviceTask : public Task {
- public:
-  CUDADeviceTask(CUDADevice *device, DeviceTask &task) : task(task)
-  {
-    run = function_bind(&CUDADevice::thread_run, device, task);
-  }
-
-  DeviceTask task;
-};
-
 void CUDADevice::task_add(DeviceTask &task)
 {
  CUDAContextScope scope(this);
@ -2426,7 +2416,10 @@ void CUDADevice::task_add(DeviceTask &task)
    film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
  }
  else {
-    task_pool.push(new CUDADeviceTask(this, task));
+    task_pool.push([=] {
+      DeviceTask task_copy = task;
+      thread_run(task_copy);
+    });
  }
 }

--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@ -540,16 +540,6 @@ class CPUDevice : public Device {
      thread_denoise(task);
  }

-  class CPUDeviceTask : public Task {
-   public:
-    CPUDeviceTask(CPUDevice *device, DeviceTask &task) : task(task)
-    {
-      run = function_bind(&CPUDevice::thread_run, device, task);
-    }
-
-    DeviceTask task;
-  };
-
  bool denoising_non_local_means(device_ptr image_ptr,
                                 device_ptr guide_ptr,
                                 device_ptr variance_ptr,
@ -1163,8 +1153,12 @@ class CPUDevice : public Device {
    else
      task.split(tasks, info.cpu_threads);

-    foreach (DeviceTask &task, tasks)
-      task_pool.push(new CPUDeviceTask(this, task));
+    foreach (DeviceTask &task, tasks) {
+      task_pool.push([=] {
+        DeviceTask task_copy = task;
+        thread_run(task_copy);
+      });
+    }
  }

  void task_wait()
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@ -1463,17 +1463,6 @@ class OptiXDevice : public CUDADevice {

  void task_add(DeviceTask &task) override
  {
-    struct OptiXDeviceTask : public Task {
-      OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : task(task)
-      {
-        // Using task index parameter instead of thread index, since number of CUDA streams may
-        // differ from number of threads
-        run = function_bind(&OptiXDevice::thread_run, device, task, task_index);
-      }
-
-      DeviceTask task;
-    };
-
    // Upload texture information to device if it has changed since last launch
    load_texture_info();

@ -1485,7 +1474,10 @@ class OptiXDevice : public CUDADevice {

    if (task.type == DeviceTask::DENOISE_BUFFER) {
      // Execute denoising in a single thread (e.g. to avoid race conditions during creation)
-      task_pool.push(new OptiXDeviceTask(this, task, 0));
+      task_pool.push([=] {
+        DeviceTask task_copy = task;
+        thread_run(task_copy, 0);
+      });
      return;
    }

@ -1495,8 +1487,15 @@ class OptiXDevice : public CUDADevice {

    // Queue tasks in internal task pool
    int task_index = 0;
-    for (DeviceTask &task : tasks)
-      task_pool.push(new OptiXDeviceTask(this, task, task_index++));
+    for (DeviceTask &task : tasks) {
+      task_pool.push([=] {
+        // Using task index parameter instead of thread index, since number of CUDA streams may
+        // differ from number of threads
+        DeviceTask task_copy = task;
+        thread_run(task_copy, task_index);
+      });
+      task_index++;
+    }
  }

  void task_wait() override
--- a/intern/cycles/device/opencl/device_opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@ -456,16 +456,6 @@ class OpenCLDevice : public Device {

  void denoise(RenderTile &tile, DenoisingTask &denoising);

-  class OpenCLDeviceTask : public Task {
-   public:
-    OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : task(task)
-    {
-      run = function_bind(&OpenCLDevice::thread_run, device, task);
-    }
-
-    DeviceTask task;
-  };
-
  int get_split_task_count(DeviceTask & /*task*/)
  {
    return 1;
@ -473,7 +463,10 @@ class OpenCLDevice : public Device {

  void task_add(DeviceTask &task)
  {
-    task_pool.push(new OpenCLDeviceTask(this, task));
+    task_pool.push([=] {
+      DeviceTask task_copy = task;
+      thread_run(task_copy);
+    });
  }

  void task_wait()
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@ -49,21 +49,16 @@ TaskPool::~TaskPool()
  stop();
 }

-void TaskPool::push(Task *task, bool front)
+void TaskPool::push(TaskRunFunction &&task, bool front)
 {
  TaskScheduler::Entry entry;

-  entry.task = task;
+  entry.task = new TaskRunFunction(std::move(task));
  entry.pool = this;

  TaskScheduler::push(entry, front);
 }

-void TaskPool::push(TaskRunFunction &&run, bool front)
-{
-  push(new Task(std::move(run)), front);
-}
-
 void TaskPool::wait_work(Summary *stats)
 {
  thread_scoped_lock num_lock(num_mutex);
@ -95,7 +90,7 @@ void TaskPool::wait_work(Summary *stats)
    /* if found task, do it, otherwise wait until other tasks are done */
    if (found_entry) {
      /* run task */
-      work_entry.task->run(0);
+      (*work_entry.task)();

      /* delete task */
      delete work_entry.task;
@ -334,7 +329,7 @@ void TaskScheduler::init(int num_threads)
  /* Launch threads that will be waiting for work. */
  threads.resize(num_threads);
  for (int thread_index = 0; thread_index < num_threads; ++thread_index) {
-    threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, thread_index + 1),
+    threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run),
                                       thread_nodes[thread_index]);
  }
 }
@ -384,7 +379,7 @@ bool TaskScheduler::thread_wait_pop(Entry &entry)
  return true;
 }

-void TaskScheduler::thread_run(int thread_id)
+void TaskScheduler::thread_run()
 {
  Entry entry;

@ -393,7 +388,7 @@ void TaskScheduler::thread_run(int thread_id)
  /* keep popping off tasks */
  while (thread_wait_pop(entry)) {
    /* run task */
-    entry.task->run(thread_id);
+    (*entry.task)();

    /* delete task */
    delete entry.task;
@ -463,26 +458,21 @@ DedicatedTaskPool::~DedicatedTaskPool()
  delete worker_thread;
 }

-void DedicatedTaskPool::push(Task *task, bool front)
+void DedicatedTaskPool::push(TaskRunFunction &&task, bool front)
 {
  num_increase();

  /* add task to queue */
  queue_mutex.lock();
  if (front)
-    queue.push_front(task);
+    queue.emplace_front(std::move(task));
  else
-    queue.push_back(task);
+    queue.emplace_back(std::move(task));

  queue_cond.notify_one();
  queue_mutex.unlock();
 }

-void DedicatedTaskPool::push(TaskRunFunction &&run, bool front)
-{
-  push(new Task(std::move(run)), front);
-}
-
 void DedicatedTaskPool::wait()
 {
  thread_scoped_lock num_lock(num_mutex);
@ -535,7 +525,7 @@ void DedicatedTaskPool::num_increase()
  num_cond.notify_all();
 }

-bool DedicatedTaskPool::thread_wait_pop(Task *&task)
+bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task)
 {
  thread_scoped_lock queue_lock(queue_mutex);

@ -555,15 +545,15 @@ bool DedicatedTaskPool::thread_wait_pop(Task *&task)

 void DedicatedTaskPool::thread_run()
 {
-  Task *task;
+  TaskRunFunction task;

  /* keep popping off tasks */
  while (thread_wait_pop(task)) {
    /* run task */
-    task->run(0);
+    task();

    /* delete task */
-    delete task;
+    task = nullptr;

    /* notify task was done */
    num_decrease(1);
@ -575,15 +565,8 @@ void DedicatedTaskPool::clear()
  thread_scoped_lock queue_lock(queue_mutex);

  /* erase all tasks from the queue */
-  list<Task *>::iterator it = queue.begin();
-  int done = 0;
-
-  while (it != queue.end()) {
-    done++;
-    delete *it;
-
-    it = queue.erase(it);
-  }
+  int done = queue.size();
+  queue.clear();

  queue_lock.unlock();

--- a/intern/cycles/util/util_task.h
+++ b/intern/cycles/util/util_task.h
@ -31,43 +31,10 @@ using tbb::blocked_range;
 using tbb::enumerable_thread_specific;
 using tbb::parallel_for;

-class Task;
 class TaskPool;
 class TaskScheduler;

-/* Notes on Thread ID
- *
- * Thread ID argument reports the 0-based ID of a working thread from which
- * the run() callback is being invoked. Thread ID of 0 denotes the thread from
- * which wait_work() was called.
- *
- * DO NOT use this ID to control execution flaw, use it only for things like
- * emulating TLS which does not affect on scheduling. Don't use this ID to make
- * any decisions.
- *
- * It is to be noted here that dedicated task pool will always report thread ID
- * of 0.
- */
-
-typedef function<void(int thread_id)> TaskRunFunction;
-
-/* Task
- *
- * Base class for tasks to be executed in threads. */
-
-class Task {
- public:
-  Task(){};
-  explicit Task(TaskRunFunction &&run_) : run(run_)
-  {
-  }
-
-  virtual ~Task()
-  {
-  }
-
-  TaskRunFunction run;
-};
+typedef function<void(void)> TaskRunFunction;

 /* Task Pool
 *
@ -75,8 +42,7 @@ class Task {
 * pool, we can wait for all tasks to be done, or cancel them before they are
 * done.
 *
- * The run callback that actually executes the task may be created like this:
- * function_bind(&MyClass::task_execute, this, _1, _2) */
+ * TaskRunFunction may be created with std::bind or lambda expressions. */

 class TaskPool {
 public:
@ -96,8 +62,7 @@ class TaskPool {
  TaskPool();
  ~TaskPool();

-  void push(Task *task, bool front = false);
-  void push(TaskRunFunction &&run, bool front = false);
+  void push(TaskRunFunction &&task, bool front = false);

  void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
  void cancel();                         /* cancel all tasks, keep worker threads running */
@ -154,7 +119,7 @@ class TaskScheduler {
  friend class TaskPool;

  struct Entry {
-    Task *task;
+    TaskRunFunction *task;
    TaskPool *pool;
  };

@ -167,7 +132,7 @@ class TaskScheduler {
  static thread_mutex queue_mutex;
  static thread_condition_variable queue_cond;

-  static void thread_run(int thread_id);
+  static void thread_run();
  static bool thread_wait_pop(Entry &entry);

  static void push(Entry &entry, bool front);
@ -186,7 +151,6 @@ class DedicatedTaskPool {
  DedicatedTaskPool();
  ~DedicatedTaskPool();

-  void push(Task *task, bool front = false);
  void push(TaskRunFunction &&run, bool front = false);

  void wait();   /* wait until all tasks are done */
@ -200,14 +164,14 @@ class DedicatedTaskPool {
  void num_increase();

  void thread_run();
-  bool thread_wait_pop(Task *&entry);
+  bool thread_wait_pop(TaskRunFunction &task);

  void clear();

  thread_mutex num_mutex;
  thread_condition_variable num_cond;

-  list<Task *> queue;
+  list<TaskRunFunction> queue;
  thread_mutex queue_mutex;
  thread_condition_variable queue_cond;