vtk-m/vtkm/worklet/DispatcherMapTopology.h
Robert Maynard c1560e2d3f Perform less unnecessary copies when deducing a worklets parameters.
One of the causes of the large library size and slow compile times has been
that vtkm has been creating unnecessary copies when not needed. When the
objects being copied use shared_ptr this causes a bloom in library size. I
presume this bloom is caused by the atomic increment/decrement that is
required by shared_ptr.

For testing I used the following example:
```
struct ExampleFieldWorklet : public vtkm::worklet::WorkletMapField
{
  typedef void ControlSignature( FieldIn<>, FieldIn<>, FieldIn<>,
                                 FieldOut<>, FieldOut<>, FieldOut<> );
  typedef void ExecutionSignature( _1, _2, _3, _4, _5, _6 );

  template<typename T, typename U, typename V>
  VTKM_EXEC_EXPORT
  void operator()( const vtkm::Vec< T, 3 > & vec,
                   const U & scalar1,
                   const V& scalar2,
                   vtkm::Vec<T, 3>& out_vec,
                   U& out_scalar1,
                   V& out_scalar2 ) const
  {
    out_vec = vec * scalar1;
    out_scalar1 = scalar1 + scalar2;
    out_scalar2 = scalar2;
  }

  template<typename T, typename U, typename V, typename W, typename X, typename Y>
  VTKM_EXEC_EXPORT
  void operator()( const T & vec,
                   const U & scalar1,
                   const V& scalar2,
                   W& out_vec,
                   X& out_scalar,
                   Y& ) const
  {
  //no-op
  }
};

int main(int argc, char** argv)
{
  std::vector< vtkm::Vec<vtkm::Float32, 3> > inputVec;
  std::vector< vtkm::Int32 > inputScalar1;
  std::vector< vtkm::Float64 > inputScalar2;

  vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32, 3> > handleV =
    vtkm::cont::make_ArrayHandle(inputVec);

  vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32, 3> > handleS1 =
    vtkm::cont::make_ArrayHandle(inputVec);

  vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32, 3> > handleS2 =
    vtkm::cont::make_ArrayHandle(inputVec);

  vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32, 3> > handleOV;
  vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32, 3> > handleOS1;
  vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32, 3> > handleOS2;

  std::cout << "Making 3 output DynamicArrayHandles " << std::endl;
  vtkm::cont::DynamicArrayHandle out1(handleOV), out2(handleOS1), out3(handleOS2);

  typedef vtkm::worklet::DispatcherMapField<ExampleFieldWorklet> DispatcherType;

  std::cout << "Invoking ExampleFieldWorklet" << std::endl;
  DispatcherType dispatcher;

  dispatcher.Invoke(handleV, handleS1, handleS2, out1, out2, out3);

}
```

Original vtkm would generate a binary of size 4684kb and would perform 91
ArrayHandle copies or assignments. With this branch the binary size is
reduced to 2392kb and will perform 36 copies or assignments.
2016-01-19 09:20:49 -05:00

82 lines
2.9 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_worklet_Dispatcher_MapTopology_h
#define vtk_m_worklet_Dispatcher_MapTopology_h
#include <vtkm/TopologyElementTag.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/worklet/WorkletMapTopology.h>
#include <vtkm/worklet/internal/DispatcherBase.h>
namespace vtkm {
namespace worklet {
/// \brief Dispatcher for worklets that inherit from \c WorkletMapTopology.
///
template<typename WorkletType,
typename Device = VTKM_DEFAULT_DEVICE_ADAPTER_TAG>
class DispatcherMapTopology :
public vtkm::worklet::internal::DispatcherBase<
DispatcherMapTopology<WorkletType,Device>,
WorkletType,
vtkm::worklet::WorkletMapTopologyBase
>
{
typedef vtkm::worklet::internal::DispatcherBase<
DispatcherMapTopology<WorkletType,Device>,
WorkletType,
vtkm::worklet::WorkletMapTopologyBase
> Superclass;
public:
VTKM_CONT_EXPORT
DispatcherMapTopology(const WorkletType &worklet = WorkletType())
: Superclass(worklet) { }
template<typename Invocation>
VTKM_CONT_EXPORT
void DoInvoke(const Invocation &invocation) const
{
// This is the type for the input domain
typedef typename Invocation::InputDomainType InputDomainType;
// If you get a compile error on this line, then you have tried to use
// something that is not a vtkm::cont::CellSet as the input domain to a
// topology operation (that operates on a cell set connection domain).
VTKM_IS_CELL_SET(InputDomainType);
// We can pull the input domain parameter (the data specifying the input
// domain) from the invocation object.
const InputDomainType &inputDomain = invocation.GetInputDomain();
// Now that we have the input domain, we can extract the range of the
// scheduling and call BadicInvoke.
this->BasicInvoke(invocation,
inputDomain.GetSchedulingRange(
typename WorkletType::ToTopologyType()),
Device());
}
};
}
} // namespace vtkm::worklet
#endif //vtk_m_worklet_Dispatcher_MapTopology_h