Add support for BitFields.

BitFields are: - Stored in memory using a contiguous buffer of bits. - Accessible via portals, a la ArrayHandle. - Portals operate on individual bits or words. - Operations may be atomic for safe use from concurrent kernels. The new BitFieldToUnorderedSet device algorithm produces an ArrayHandle containing the indices of all set bits, in no particular order. The new AtomicInterface classes provide an abstraction into bitwise atomic operations across control and execution environments and are used to implement the BitPortals.
2024-09-08 13:23:51 +00:00 · 2019-03-05 16:47:09 -05:00 · 2019-03-05 16:47:09 -05:00 · 56cc5c3d3a
commit 56cc5c3d3a
parent d01b973821
35 changed files with 2725 additions and 17 deletions
--- a/benchmarking/BenchmarkDeviceAdapter.cxx
+++ b/benchmarking/BenchmarkDeviceAdapter.cxx
@ -26,6 +26,7 @@
 #include <vtkm/cont/ArrayHandlePermutation.h>
 #include <vtkm/cont/ArrayHandleZip.h>
 #include <vtkm/cont/ArrayPortalToIterators.h>
+#include <vtkm/cont/BitField.h>
 #include <vtkm/cont/DeviceAdapterAlgorithm.h>
 #include <vtkm/cont/ErrorExecution.h>
 #include <vtkm/cont/StorageBasic.h>
@ -63,20 +64,24 @@ namespace benchmarking

 enum BenchmarkName
 {
-  COPY = 1,
-  COPY_IF = 1 << 1,
-  LOWER_BOUNDS = 1 << 2,
-  REDUCE = 1 << 3,
-  REDUCE_BY_KEY = 1 << 4,
-  SCAN_INCLUSIVE = 1 << 5,
-  SCAN_EXCLUSIVE = 1 << 6,
-  SORT = 1 << 7,
-  SORT_BY_KEY = 1 << 8,
-  STABLE_SORT_INDICES = 1 << 9,
-  STABLE_SORT_INDICES_UNIQUE = 1 << 10,
-  UNIQUE = 1 << 11,
-  UPPER_BOUNDS = 1 << 12,
-  ALL = COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | SCAN_INCLUSIVE | SCAN_EXCLUSIVE |
+  BITFIELD_TO_UNORDERED_SET = 1 << 0,
+  COPY = 1 << 1,
+  COPY_IF = 1 << 2,
+  LOWER_BOUNDS = 1 << 3,
+  REDUCE = 1 << 4,
+  REDUCE_BY_KEY = 1 << 5,
+  SCAN_INCLUSIVE = 1 << 6,
+  SCAN_EXCLUSIVE = 1 << 7,
+  SORT = 1 << 8,
+  SORT_BY_KEY = 1 << 9,
+  STABLE_SORT_INDICES = 1 << 10,
+  STABLE_SORT_INDICES_UNIQUE = 1 << 11,
+  UNIQUE = 1 << 12,
+  UPPER_BOUNDS = 1 << 13,
+
+  ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY |
+    SCAN_INCLUSIVE |
+    SCAN_EXCLUSIVE |
    SORT |
    SORT_BY_KEY |
    STABLE_SORT_INDICES |
@ -132,6 +137,20 @@ struct BenchDevAlgoConfig
      ? static_cast<vtkm::Id>(this->ArraySizeBytes / static_cast<vtkm::UInt64>(sizeof(T)))
      : static_cast<vtkm::Id>(this->ArraySizeValues);
  }
+
+  // Compute the number of words in a bit field with the given type.
+  // If DoByteSizes is true, the specified buffer is rounded down to the nearest
+  // number of words that fit into the byte limit. Otherwise, ArraySizeValues
+  // is used to indicate the number of bits.
+  template <typename WordType>
+  VTKM_CONT vtkm::Id ComputeNumberOfWords()
+  {
+    static constexpr vtkm::UInt64 BytesPerWord = static_cast<vtkm::UInt64>(sizeof(WordType));
+    static constexpr vtkm::UInt64 BitsPerWord = BytesPerWord * 8;
+
+    return this->DoByteSizes ? static_cast<vtkm::Id>(this->ArraySizeBytes / BytesPerWord)
+                             : static_cast<vtkm::Id>(this->ArraySizeValues / BitsPerWord);
+  }
 };

 // Share a global instance of the config (only way to get it into the benchmark
@ -255,7 +274,170 @@ public:
    }
  };

+  template <typename WordType, typename BitFieldPortal>
+  struct GenerateBitFieldFunctor : public vtkm::exec::FunctorBase
+  {
+    WordType Exemplar;
+    vtkm::Id Stride;
+    vtkm::Id MaxMaskedWord;
+    BitFieldPortal Portal;
+
+    VTKM_EXEC_CONT
+    GenerateBitFieldFunctor(WordType exemplar,
+                            vtkm::Id stride,
+                            vtkm::Id maxMaskedWord,
+                            const BitFieldPortal& portal)
+      : Exemplar(exemplar)
+      , Stride(stride)
+      , MaxMaskedWord(maxMaskedWord)
+      , Portal(portal)
+    {
+    }
+
+    VTKM_EXEC
+    void operator()(vtkm::Id wordIdx) const
+    {
+      if (wordIdx <= this->MaxMaskedWord && (wordIdx % this->Stride) == 0)
+      {
+        this->Portal.SetWord(wordIdx, this->Exemplar);
+      }
+      else
+      {
+        this->Portal.SetWord(wordIdx, static_cast<WordType>(0));
+      }
+    }
+  };
+
+  // Create a bit field for testing. The bit array will contain numWords words.
+  // The exemplar word is used to set bits in the array. Stride indicates how
+  // many words will be set to 0 between words initialized to the exemplar.
+  // Words with indices higher than maxMaskedWord will be set to 0.
+  // Stride and maxMaskedWord may be used to test different types of imbalanced
+  // loads.
+  template <typename WordType, typename DeviceAdapterTag>
+  static VTKM_CONT vtkm::cont::BitField GenerateBitField(WordType exemplar,
+                                                         vtkm::Id stride,
+                                                         vtkm::Id maxMaskedWord,
+                                                         vtkm::Id numWords)
+  {
+    using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
+
+    if (stride == 0)
+    {
+      stride = 1;
+    }
+
+    vtkm::cont::BitField bits;
+    auto portal = bits.PrepareForOutput(numWords, DeviceAdapterTag{});
+
+    using Functor = GenerateBitFieldFunctor<WordType, decltype(portal)>;
+
+    Algo::Schedule(Functor{ exemplar, stride, maxMaskedWord, portal }, numWords);
+    Algo::Synchronize();
+
+    return bits;
+  }
+
 private:
+  template <typename WordType, typename DeviceAdapter>
+  struct BenchBitFieldToUnorderedSet
+  {
+    using IndicesArray = vtkm::cont::ArrayHandle<vtkm::Id>;
+
+    vtkm::Id NumWords;
+    vtkm::Id NumBits;
+    WordType Exemplar;
+    vtkm::Id Stride;
+    vtkm::Float32 FillRatio;
+    vtkm::Id MaxMaskedIndex;
+    std::string Name;
+
+    vtkm::cont::BitField Bits;
+    IndicesArray Indices;
+
+    // See GenerateBitField for details. fillRatio is used to compute
+    // maxMaskedWord.
+    VTKM_CONT
+    BenchBitFieldToUnorderedSet(WordType exemplar,
+                                vtkm::Id stride,
+                                vtkm::Float32 fillRatio,
+                                const std::string& name)
+      : NumWords(Config.ComputeNumberOfWords<WordType>())
+      , NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
+      , Exemplar(exemplar)
+      , Stride(stride)
+      , FillRatio(fillRatio)
+      , MaxMaskedIndex(this->NumWords / static_cast<vtkm::Id>(1. / this->FillRatio))
+      , Name(name)
+      , Bits(GenerateBitField<WordType, DeviceAdapter>(this->Exemplar,
+                                                       this->Stride,
+                                                       this->MaxMaskedIndex,
+                                                       this->NumWords))
+    {
+    }
+
+    VTKM_CONT
+    vtkm::Float64 operator()()
+    {
+      Timer timer(DeviceAdapter{});
+      timer.Start();
+      Algorithm::BitFieldToUnorderedSet(DeviceAdapter{}, this->Bits, this->Indices);
+      return timer.GetElapsedTime();
+    }
+
+    VTKM_CONT
+    std::string Description() const
+    {
+      const vtkm::Id numFilledWords = this->MaxMaskedIndex / this->Stride;
+      const vtkm::Id numSetBits = numFilledWords * vtkm::CountSetBits(this->Exemplar);
+
+      std::stringstream description;
+      description << "BitFieldToUnorderedSet" << this->Name << " ( "
+                  << "NumWords: " << this->NumWords << " "
+                  << "Exemplar: " << std::hex << this->Exemplar << std::dec << " "
+                  << "FillRatio: " << this->FillRatio << " "
+                  << "Stride: " << this->Stride << " "
+                  << "NumSetBits: " << numSetBits << " )";
+      return description.str();
+    }
+  };
+  VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetNull,
+                      BenchBitFieldToUnorderedSet,
+                      0x00000000,
+                      1,
+                      0.f,
+                      "Null");
+  VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetFull,
+                      BenchBitFieldToUnorderedSet,
+                      0xffffffff,
+                      1,
+                      1.f,
+                      "Full");
+  VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetHalfWord,
+                      BenchBitFieldToUnorderedSet,
+                      0xffff0000,
+                      1,
+                      1.f,
+                      "HalfWord");
+  VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetHalfField,
+                      BenchBitFieldToUnorderedSet,
+                      0xffffffff,
+                      1,
+                      0.5f,
+                      "HalfField");
+  VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetAlternateWords,
+                      BenchBitFieldToUnorderedSet,
+                      0xffffffff,
+                      2,
+                      1.f,
+                      "AlternateWords");
+  VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetAlternateBits,
+                      BenchBitFieldToUnorderedSet,
+                      0x55555555,
+                      1,
+                      1.f,
+                      "AlternateBits");
+
  template <typename Value, typename DeviceAdapter>
  struct BenchCopy
  {
@ -982,6 +1164,19 @@ public:
  template <typename ValueTypes>
  static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id)
  {
+    using BitFieldWordTypes = vtkm::ListTagBase<vtkm::UInt32>;
+
+    if (Config.BenchmarkFlags & BITFIELD_TO_UNORDERED_SET)
+    {
+      std::cout << DIVIDER << "\nBenchmarking BitFieldToUnorderedSet\n";
+      VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, BitFieldWordTypes{}, id);
+      VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, BitFieldWordTypes{}, id);
+      VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, BitFieldWordTypes{}, id);
+      VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, BitFieldWordTypes{}, id);
+      VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, BitFieldWordTypes{}, id);
+      VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, BitFieldWordTypes{}, id);
+    }
+
    if (Config.BenchmarkFlags & COPY)
    {
      std::cout << DIVIDER << "\nBenchmarking Copy\n";
@ -1434,7 +1629,11 @@ int main(int argc, char* argv[])
    std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
      return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
    });
-    if (arg == "copy")
+    if (arg == "bitfieldtounorderedset")
+    {
+      config.BenchmarkFlags |= vtkm::benchmarking::BITFIELD_TO_UNORDERED_SET;
+    }
+    else if (arg == "copy")
    {
      config.BenchmarkFlags |= vtkm::benchmarking::COPY;
    }
--- a/docs/changelog/bitfields.md
+++ b/docs/changelog/bitfields.md
@ -0,0 +1,15 @@
+# Add support for BitFields.
+
+BitFields are:
+- Stored in memory using a contiguous buffer of bits.
+- Accessible via portals, a la ArrayHandle.
+- Portals operate on individual bits or words.
+- Operations may be atomic for safe use from concurrent kernels.
+
+The new BitFieldToUnorderedSet device algorithm produces an
+ArrayHandle containing the indices of all set bits, in no particular
+order.
+
+The new AtomicInterface classes provide an abstraction into bitwise
+atomic operations across control and execution environments and are
+used to implement the BitPortals.
--- a/vtkm/Types.h
+++ b/vtkm/Types.h
@ -159,6 +159,10 @@ using UInt32 = unsigned int;
 /// than smaller widths.
 using IdComponent = vtkm::Int32;

+/// The default word size used for atomic bitwise operations. Universally
+/// supported on all devices.
+using WordTypeDefault = vtkm::UInt32;
+
 //In this order so that we exactly match the logic that exists in VTK
 #if VTKM_SIZE_LONG_LONG == 8
 using Int64 = long long;
--- a/vtkm/cont/Algorithm.h
+++ b/vtkm/cont/Algorithm.h
@ -60,6 +60,20 @@ auto PrepareArgForExec(T&& object)
                                     vtkm::cont::internal::IsExecutionObjectBase<T>{});
 }

+struct BitFieldToUnorderedSetFunctor
+{
+  vtkm::Id Result{ 0 };
+
+  template <typename Device, typename... Args>
+  VTKM_CONT bool operator()(Device, Args&&... args)
+  {
+    VTKM_IS_DEVICE_ADAPTER_TAG(Device);
+    this->Result = vtkm::cont::DeviceAdapterAlgorithm<Device>::BitFieldToUnorderedSet(
+      PrepareArgForExec<Device>(std::forward<Args>(args))...);
+    return true;
+  }
+};
+
 struct CopyFunctor
 {
  template <typename Device, typename... Args>
@ -374,6 +388,27 @@ struct UpperBoundsFunctor
 struct Algorithm
 {

+  template <typename IndicesStorage>
+  VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
+    vtkm::cont::DeviceAdapterId devId,
+    const vtkm::cont::BitField& bits,
+    vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
+  {
+    detail::BitFieldToUnorderedSetFunctor functor;
+    vtkm::cont::TryExecuteOnDevice(devId, functor, bits, indices);
+    return functor.Result;
+  }
+
+  template <typename IndicesStorage>
+  VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
+    const vtkm::cont::BitField& bits,
+    vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
+  {
+    detail::BitFieldToUnorderedSetFunctor functor;
+    vtkm::cont::TryExecute(functor, bits, indices);
+    return functor.Result;
+  }
+
  template <typename T, typename U, class CIn, class COut>
  VTKM_CONT static bool Copy(vtkm::cont::DeviceAdapterId devId,
                             const vtkm::cont::ArrayHandle<T, CIn>& input,
--- a/vtkm/cont/BitField.h
+++ b/vtkm/cont/BitField.h
@ -0,0 +1,719 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+
+#ifndef vtk_m_cont_BitField_h
+#define vtk_m_cont_BitField_h
+
+#include <vtkm/cont/internal/AtomicInterfaceControl.h>
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
+
+#include <vtkm/cont/ArrayHandle.h>
+#include <vtkm/cont/Logging.h>
+
+#include <vtkm/ListTag.h>
+#include <vtkm/Types.h>
+
+#include <cassert>
+#include <climits>
+#include <memory>
+#include <type_traits>
+
+namespace vtkm
+{
+namespace cont
+{
+
+class BitField;
+
+namespace detail
+{
+
+struct BitFieldTraits
+{
+  // Allocations will occur in blocks of BlockSize bytes. This ensures that
+  // power-of-two word sizes up to BlockSize will not access invalid data
+  // during word-based access, and that atomic values will be properly aligned.
+  // We use the default StorageBasic alignment for this.
+  constexpr static vtkm::Id BlockSize = VTKM_ALLOCATION_ALIGNMENT;
+
+  // Make sure the blocksize is at least 64. Eventually we may implement SIMD
+  // bit operations, and the current largest vector width is 512 bits.
+  VTKM_STATIC_ASSERT(BlockSize >= 64);
+
+  /// Require an unsigned integral type that is <= BlockSize bytes.
+  template <typename WordType>
+  using IsValidWordType =
+    std::integral_constant<bool,
+                           /* is unsigned */
+                           std::is_unsigned<WordType>::value &&
+                             /* doesn't exceed blocksize */
+                             sizeof(WordType) <= static_cast<size_t>(BlockSize) &&
+                             /* BlockSize is a multiple of WordType */
+                             static_cast<size_t>(BlockSize) % sizeof(WordType) == 0>;
+
+  /// Require an unsigned integral type that is <= BlockSize bytes, and is
+  /// is supported by the specified AtomicInterface.
+  template <typename WordType, typename AtomicInterface>
+  using IsValidWordTypeAtomic = std::integral_constant<
+    bool,
+    /* is unsigned */
+    std::is_unsigned<WordType>::value &&
+      /* doesn't exceed blocksize */
+      sizeof(WordType) <= static_cast<size_t>(BlockSize) &&
+      /* BlockSize is a multiple of WordType */
+      static_cast<size_t>(BlockSize) % sizeof(WordType) == 0 &&
+      /* Supported by atomic interface */
+      vtkm::ListContains<typename AtomicInterface::WordTypes, WordType>::value>;
+};
+
+/// Identifies a bit in a BitField by Word and BitOffset. Note that these
+/// values are dependent on the type of word used to generate the coordinate.
+struct BitCoordinate
+{
+  /// The word containing the specified bit.
+  vtkm::Id WordIndex;
+
+  /// The zero-indexed bit in the word.
+  vtkm::Int32 BitOffset; // [0, bitsInWord)
+};
+
+/// Portal for performing bit or word operations on a BitField.
+///
+/// This is the implementation used by BitPortal and BitPortalConst.
+template <typename AtomicInterface_, bool IsConst>
+class BitPortalBase
+{
+  // Checks if PortalType has a GetIteratorBegin() method that returns a
+  // pointer.
+  template <typename PortalType,
+            typename PointerType = decltype(std::declval<PortalType>().GetIteratorBegin())>
+  struct HasPointerAccess : public std::is_pointer<PointerType>
+  {
+  };
+
+  // Determine whether we should store a const vs. mutable pointer:
+  template <typename T>
+  using MaybeConstPointer = typename std::conditional<IsConst, T const*, T*>::type;
+  using BufferType = MaybeConstPointer<void>; // void* or void const*, as appropriate
+
+public:
+  /// The atomic interface used to carry out atomic operations. See
+  /// AtomicInterfaceExecution<Device> and AtomicInterfaceControl
+  using AtomicInterface = AtomicInterface_;
+
+  /// The fastest word type for performing bitwise operations through AtomicInterface.
+  using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
+
+  /// MPL check for whether a WordType may be used for non-atomic operations.
+  template <typename WordType>
+  using IsValidWordType = BitFieldTraits::IsValidWordType<WordType>;
+
+  /// MPL check for whether a WordType may be used for atomic operations.
+  template <typename WordType>
+  using IsValidWordTypeAtomic = BitFieldTraits::IsValidWordTypeAtomic<WordType, AtomicInterface>;
+
+  VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypeDefault>::value,
+                         "Internal error: Default word type is invalid.");
+  VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypePreferred>::value,
+                         "Device-specific fast word type is invalid.");
+
+  VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordTypeDefault>::value,
+                         "Internal error: Default word type is invalid.");
+  VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordTypePreferred>::value,
+                         "Device-specific fast word type is invalid for atomic operations.");
+
+protected:
+  friend class vtkm::cont::BitField;
+
+  /// Construct a BitPortal from an ArrayHandle with basic storage's portal.
+  template <typename PortalType>
+  VTKM_EXEC_CONT BitPortalBase(const PortalType& portal, vtkm::Id numberOfBits)
+    : Data{ portal.GetIteratorBegin() }
+    , NumberOfBits{ numberOfBits }
+  {
+    VTKM_STATIC_ASSERT_MSG(HasPointerAccess<PortalType>::value,
+                           "Source portal must return a pointer from "
+                           "GetIteratorBegin().");
+  }
+
+public:
+  BitPortalBase() noexcept = default;
+  BitPortalBase(const BitPortalBase& src) noexcept = default;
+  BitPortalBase(BitPortalBase&& src) noexcept = default;
+  BitPortalBase& operator=(const BitPortalBase& src) noexcept = default;
+  BitPortalBase& operator=(BitPortalBase&& src) noexcept = default;
+
+  /// Returns the number of bits in the BitField.
+  VTKM_EXEC_CONT
+  vtkm::Id GetNumberOfBits() const noexcept { return this->NumberOfBits; }
+
+  /// Returns how many words of type @a WordTypePreferred exist in the dataset.
+  /// Note that this is rounded up and may contain partial words. See
+  /// also GetFinalWordMask to handle the trailing partial word.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT vtkm::Id GetNumberOfWords() const noexcept
+  {
+    VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
+    static constexpr vtkm::Id WordSize = static_cast<vtkm::Id>(sizeof(WordType));
+    static constexpr vtkm::Id WordBits = WordSize * CHAR_BIT;
+    return (this->NumberOfBits + WordBits - 1) / WordBits;
+  }
+
+  /// Return a mask in which the valid bits in the final word (of type @a
+  /// WordType) are set to 1.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType GetFinalWordMask() const noexcept
+  {
+    if (this->NumberOfBits == 0)
+    {
+      return WordType{ 0 };
+    }
+
+    static constexpr vtkm::Int32 BitsPerWord =
+      static_cast<vtkm::Int32>(sizeof(WordType) * CHAR_BIT);
+
+    const auto maxBit = this->NumberOfBits - 1;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(maxBit);
+    const vtkm::Int32 shift = BitsPerWord - coord.BitOffset - 1;
+    return (~WordType{ 0 }) >> shift;
+  }
+
+  /// Given a bit index, compute a @a BitCoordinate that identifies the
+  /// corresponding word index and bit offset.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT static BitCoordinate GetBitCoordinateFromIndex(vtkm::Id bitIdx) noexcept
+  {
+    VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
+    static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
+    return { static_cast<vtkm::Id>(bitIdx / BitsPerWord),
+             static_cast<vtkm::Int32>(bitIdx % BitsPerWord) };
+  }
+
+  /// Set the bit at @a bitIdx to @a val. This method is not thread-safe --
+  /// threads modifying bits nearby may interfere with this operation.
+  /// Additionally, this should not be used for synchronization, as there are
+  /// no memory ordering requirements. See SetBitAtomic for those usecases.
+  VTKM_EXEC_CONT
+  void SetBit(vtkm::Id bitIdx, bool val) const noexcept
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto mask = WordType(1) << coord.BitOffset;
+    auto* const wordAddr = this->GetWordAddress<WordType>(coord.WordIndex);
+    if (val)
+    {
+      *wordAddr |= mask;
+    }
+    else
+    {
+      *wordAddr &= ~mask;
+    }
+  }
+
+  /// Set the bit at @a bitIdx to @a val using atomic operations. This method
+  /// is thread-safe and guarantees, at minimum, "release" memory ordering.
+  VTKM_EXEC_CONT
+  void SetBitAtomic(vtkm::Id bitIdx, bool val) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto mask = WordType(1) << coord.BitOffset;
+    if (val)
+    {
+      this->OrWordAtomic(coord.WordIndex, mask);
+    }
+    else
+    {
+      this->AndWordAtomic(coord.WordIndex, ~mask);
+    }
+  }
+
+  /// Return whether or not the bit at @a bitIdx is set. Note that this uses
+  /// non-atomic loads and thus should not be used for synchronization.
+  VTKM_EXEC_CONT
+  bool GetBit(vtkm::Id bitIdx) const noexcept
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto word = this->GetWord<WordType>(coord.WordIndex);
+    const auto mask = WordType(1) << coord.BitOffset;
+    return (word & mask) != WordType(0);
+  }
+
+  /// Return whether or not the bit at @a bitIdx is set using atomic loads.
+  /// This method is thread safe and guarantees, at minimum, "acquire" memory
+  /// ordering.
+  VTKM_EXEC_CONT
+  bool GetBitAtomic(vtkm::Id bitIdx) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto word = this->GetWordAtomic<WordType>(coord.WordIndex);
+    const auto mask = WordType(1) << coord.BitOffset;
+    return (word & mask) != WordType(0);
+  }
+
+  /// Set the word (of type @a WordType) at @a wordIdx to @a word using
+  /// non-atomic operations.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT void SetWord(vtkm::Id wordIdx, WordType word) const noexcept
+  {
+    *this->GetWordAddress<WordType>(wordIdx) = word;
+  }
+
+  /// Set the word (of type @a WordType) at @a wordIdx to @a word using atomic
+  /// operations. The store guarantees, at minimum, "release" memory ordering.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT void SetWordAtomic(vtkm::Id wordIdx, WordType word) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    AtomicInterface::Store(this->GetWordAddress<WordType>(wordIdx), word);
+  }
+
+  /// Get the word (of type @a WordType) at @a wordIdx using non-atomic
+  /// operations.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType GetWord(vtkm::Id wordIdx) const noexcept
+  {
+    return *this->GetWordAddress<WordType>(wordIdx);
+  }
+
+  /// Get the word (of type @a WordType) at @ wordIdx using an atomic read with,
+  /// at minimum, "acquire" memory ordering.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType GetWordAtomic(vtkm::Id wordIdx) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    return AtomicInterface::Load(this->GetWordAddress<WordType>(wordIdx));
+  }
+
+  /// Toggle the bit at @a bitIdx, returning the original value. This method
+  /// uses atomic operations and a full memory barrier.
+  VTKM_EXEC_CONT
+  bool NotBitAtomic(vtkm::Id bitIdx) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto mask = WordType(1) << coord.BitOffset;
+    const auto oldWord = this->XorWordAtomic(coord.WordIndex, mask);
+    return (oldWord & mask) != WordType(0);
+  }
+
+  /// Perform a bitwise "not" operation on the word at @a wordIdx, returning the
+  /// original word. This uses atomic operations and a full memory barrier.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType NotWordAtomic(vtkm::Id wordIdx) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
+    return AtomicInterface::Not(addr);
+  }
+
+  /// Perform an "and" operation between the bit at @a bitIdx and @a val,
+  /// returning the original value at @a bitIdx. This method uses atomic
+  /// operations and a full memory barrier.
+  VTKM_EXEC_CONT
+  bool AndBitAtomic(vtkm::Id bitIdx, bool val) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto bitmask = WordType(1) << coord.BitOffset;
+    // wordmask is all 1's, except for BitOffset which is (val ? 1 : 0)
+    const auto wordmask = val ? ~WordType(0) : ~bitmask;
+    const auto oldWord = this->AndWordAtomic(coord.WordIndex, wordmask);
+    return (oldWord & bitmask) != WordType(0);
+  }
+
+  /// Perform an "and" operation between the word at @a wordIdx and @a wordMask,
+  /// returning the original word at @a wordIdx. This method uses atomic
+  /// operations and a full memory barrier.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType AndWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
+    return AtomicInterface::And(addr, wordmask);
+  }
+
+  /// Perform an "of" operation between the bit at @a bitIdx and @a val,
+  /// returning the original value at @a bitIdx. This method uses atomic
+  /// operations and a full memory barrier.
+  VTKM_EXEC_CONT
+  bool OrBitAtomic(vtkm::Id bitIdx, bool val) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto bitmask = WordType(1) << coord.BitOffset;
+    // wordmask is all 0's, except for BitOffset which is (val ? 1 : 0)
+    const auto wordmask = val ? bitmask : WordType(0);
+    const auto oldWord = this->OrWordAtomic(coord.WordIndex, wordmask);
+    return (oldWord & bitmask) != WordType(0);
+  }
+
+  /// Perform an "or" operation between the word at @a wordIdx and @a wordMask,
+  /// returning the original word at @a wordIdx. This method uses atomic
+  /// operations and a full memory barrier.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType OrWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
+    return AtomicInterface::Or(addr, wordmask);
+  }
+
+  /// Perform an "xor" operation between the bit at @a bitIdx and @a val,
+  /// returning the original value at @a bitIdx. This method uses atomic
+  /// operations and a full memory barrier.
+  VTKM_EXEC_CONT
+  bool XorBitAtomic(vtkm::Id bitIdx, bool val) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto bitmask = WordType(1) << coord.BitOffset;
+    // wordmask is all 0's, except for BitOffset which is (val ? 1 : 0)
+    const auto wordmask = val ? bitmask : WordType(0);
+    const auto oldWord = this->XorWordAtomic(coord.WordIndex, wordmask);
+    return (oldWord & bitmask) != WordType(0);
+  }
+
+  /// Perform an "xor" operation between the word at @a wordIdx and @a wordMask,
+  /// returning the original word at @a wordIdx. This method uses atomic
+  /// operations and a full memory barrier.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType XorWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
+    return AtomicInterface::Xor(addr, wordmask);
+  }
+
+  /// Perform an atomic compare-and-swap operation on the bit at @a bitIdx.
+  /// If the value in memory is equal to @a expectedBit, it is replaced with
+  /// the value of @a newBit and the original value of the bit is returned as a
+  /// boolean. This method implements a full memory barrier around the atomic
+  /// operation.
+  VTKM_EXEC_CONT
+  bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
+  {
+    using WordType = WordTypePreferred;
+    const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
+    const auto bitmask = WordType(1) << coord.BitOffset;
+
+    WordType oldWord;
+    WordType newWord;
+    do
+    {
+      oldWord = this->GetWord<WordType>(coord.WordIndex);
+      bool oldBitSet = (oldWord & bitmask) != WordType(0);
+      if (oldBitSet != expectedBit)
+      { // The bit-of-interest does not match what we expected.
+        return oldBitSet;
+      }
+      else if (oldBitSet == newBit)
+      { // The bit hasn't changed, but also already matches newVal. We're done.
+        return expectedBit;
+      }
+
+      // Compute the new word
+      newWord = oldWord ^ bitmask;
+    } // CAS loop to resolve any conflicting changes to other bits in the word.
+    while (this->CompareAndSwapWordAtomic(coord.WordIndex, newWord, oldWord) != oldWord);
+
+    return expectedBit;
+  }
+
+  /// Perform an atomic compare-and-swap operation on the word at @a wordIdx.
+  /// If the word in memory is equal to @a expectedWord, it is replaced with
+  /// the value of @a newWord and the original word is returned. This method
+  /// implements a full memory barrier around the atomic operation.
+  template <typename WordType = WordTypePreferred>
+  VTKM_EXEC_CONT WordType CompareAndSwapWordAtomic(vtkm::Id wordIdx,
+                                                   WordType newWord,
+                                                   WordType expected) const
+  {
+    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
+                           "Requested WordType does not support atomic"
+                           " operations on target execution platform.");
+    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
+    return AtomicInterface::CompareAndSwap(addr, newWord, expected);
+  }
+
+private:
+  template <typename WordType>
+  VTKM_EXEC_CONT MaybeConstPointer<WordType> GetWordAddress(vtkm::Id wordId) const noexcept
+  {
+    VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
+    return reinterpret_cast<MaybeConstPointer<WordType>>(this->Data) + wordId;
+  }
+
+  BufferType Data{ nullptr };
+  vtkm::Id NumberOfBits{ 0 };
+};
+
+template <typename AtomicOps>
+using BitPortal = BitPortalBase<AtomicOps, false>;
+
+template <typename AtomicOps>
+using BitPortalConst = BitPortalBase<AtomicOps, true>;
+
+} // end namespace detail
+
+class BitField
+{
+  static constexpr vtkm::Id BlockSize = detail::BitFieldTraits::BlockSize;
+
+public:
+  /// The type array handle used to store the bit data internally:
+  using ArrayHandleType = ArrayHandle<WordTypeDefault, StorageTagBasic>;
+
+  /// The BitPortal used in the control environment.
+  using PortalControl = detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
+
+  /// A read-only BitPortal used in the control environment.
+  using PortalConstControl = detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
+
+  template <typename Device>
+  struct ExecutionTypes
+  {
+    /// The AtomicInterfaceExecution implementation used by the specified device.
+    using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<Device>;
+
+    /// The preferred word type used by the specified device.
+    using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
+
+    /// A BitPortal that is usable on the specified device.
+    using Portal = detail::BitPortal<AtomicInterface>;
+
+    /// A read-only BitPortal that is usable on the specified device.
+    using PortalConst = detail::BitPortalConst<AtomicInterface>;
+  };
+
+  /// Check whether a word type is valid for non-atomic operations.
+  template <typename WordType>
+  using IsValidWordType = detail::BitFieldTraits::IsValidWordType<WordType>;
+
+  /// Check whether a word type is valid for atomic operations on a specific
+  /// device.
+  template <typename WordType, typename Device>
+  using IsValidWordTypeAtomic = detail::BitFieldTraits::
+    IsValidWordTypeAtomic<WordType, vtkm::cont::internal::AtomicInterfaceExecution<Device>>;
+
+  /// Check whether a word type is valid for atomic operations from the control
+  /// environment.
+  template <typename WordType>
+  using IsValidWordTypeAtomicControl =
+    detail::BitFieldTraits::IsValidWordTypeAtomic<WordType,
+                                                  vtkm::cont::internal::AtomicInterfaceControl>;
+
+  VTKM_CONT BitField()
+    : Internals{ std::make_shared<InternalStruct>() }
+  {
+  }
+  VTKM_CONT BitField(const BitField& src) = default;
+  VTKM_CONT BitField(BitField&& src) noexcept = default;
+  VTKM_CONT ~BitField() = default;
+  VTKM_CONT BitField& operator=(const BitField& src) = default;
+  VTKM_CONT BitField& operator=(BitField&& src) noexcept = default;
+
+  VTKM_CONT
+  bool operator==(const BitField& rhs) const { return this->Internals == rhs.Internals; }
+
+  VTKM_CONT
+  bool operator!=(const BitField& rhs) const { return this->Internals != rhs.Internals; }
+
+  /// Return the internal ArrayHandle used to store the BitField.
+  VTKM_CONT
+  ArrayHandleType& GetData() { return this->Internals->Data; }
+
+  /// Return the internal ArrayHandle used to store the BitField.
+  VTKM_CONT
+  const ArrayHandleType& GetData() const { return this->Internals->Data; }
+
+  /// Return the number of bits stored by this BitField.
+  VTKM_CONT
+  vtkm::Id GetNumberOfBits() const { return this->Internals->NumberOfBits; }
+
+  /// Return the number of words (of @a WordType) stored in this bit fields.
+  ///
+  template <typename WordType>
+  VTKM_CONT vtkm::Id GetNumberOfWords() const
+  {
+    VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
+    static constexpr vtkm::Id WordBits = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
+    return (this->Internals->NumberOfBits + WordBits - 1) / WordBits;
+  }
+
+  /// Allocate the requested number of bits.
+  VTKM_CONT
+  void Allocate(vtkm::Id numberOfBits)
+  {
+    const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
+
+    VTKM_LOG_F(vtkm::cont::LogLevel::MemCont,
+               "BitField Allocation: %llu bits, blocked up to %s.",
+               static_cast<unsigned long long>(numberOfBits),
+               vtkm::cont::GetSizeString(
+                 static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
+                 .c_str());
+
+    this->Internals->Data.Allocate(numWords);
+    this->Internals->NumberOfBits = numberOfBits;
+  }
+
+  /// Shrink the bit field to the requested number of bits.
+  VTKM_CONT
+  void Shrink(vtkm::Id numberOfBits)
+  {
+    const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
+    this->Internals->Data.Shrink(numWords);
+    this->Internals->NumberOfBits = numberOfBits;
+  }
+
+  /// Release all execution-side resources held by this BitField.
+  VTKM_CONT
+  void ReleaseResourcesExecution() { this->Internals->Data.ReleaseResourcesExecution(); }
+
+  /// Release all resources held by this BitField and reset to empty.
+  VTKM_CONT
+  void ReleaseResources()
+  {
+    this->Internals->Data.ReleaseResources();
+    this->Internals->NumberOfBits = 0;
+  }
+
+  /// Force the control array to sync with the last-used device.
+  VTKM_CONT
+  void SyncControlArray() const { this->Internals->Data.SyncControlArray(); }
+
+  /// The id of the device where the most up-to-date copy of the data is
+  /// currently resident. If the data is on the host, DeviceAdapterTagUndefined
+  /// is returned.
+  VTKM_CONT
+  DeviceAdapterId GetDeviceAdapterId() const { return this->Internals->Data.GetDeviceAdapterId(); }
+
+  /// Get a portal to the data that is usable from the control environment.
+  VTKM_CONT
+  PortalControl GetPortalControl()
+  {
+    return PortalControl{ this->Internals->Data.GetPortalControl(), this->Internals->NumberOfBits };
+  }
+
+  /// Get a read-only portal to the data that is usable from the control
+  /// environment.
+  VTKM_CONT
+  PortalConstControl GetPortalConstControl() const
+  {
+    return PortalConstControl{ this->Internals->Data.GetPortalConstControl(),
+                               this->Internals->NumberOfBits };
+  }
+
+  /// Prepares this BitField to be used as an input to an operation in the
+  /// execution environment. If necessary, copies data to the execution
+  /// environment. Can throw an exception if this BitField does not yet contain
+  /// any data. Returns a portal that can be used in code running in the
+  /// execution environment.
+  template <typename DeviceAdapterTag>
+  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(
+    DeviceAdapterTag device) const
+  {
+    using PortalType = typename ExecutionTypes<DeviceAdapterTag>::PortalConst;
+    return PortalType{ this->Internals->Data.PrepareForInput(device),
+                       this->Internals->NumberOfBits };
+  }
+
+  /// Prepares (allocates) this BitField to be used as an output from an
+  /// operation in the execution environment. The internal state of this class
+  /// is set to have valid data in the execution BitField with the assumption
+  /// that the array will be filled soon (i.e. before any other methods of this
+  /// object are called). Returns a portal that can be used in code running in
+  /// the execution environment.
+  template <typename DeviceAdapterTag>
+  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForOutput(
+    vtkm::Id numBits,
+    DeviceAdapterTag device) const
+  {
+    using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
+    const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numBits);
+
+    VTKM_LOG_F(vtkm::cont::LogLevel::MemExec,
+               "BitField Allocation: %llu bits, blocked up to %s.",
+               static_cast<unsigned long long>(numBits),
+               vtkm::cont::GetSizeString(
+                 static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
+                 .c_str());
+
+    auto portal = this->Internals->Data.PrepareForOutput(numWords, device);
+    this->Internals->NumberOfBits = numBits;
+    return PortalType{ portal, numBits };
+  }
+
+  /// Prepares this BitField to be used in an in-place operation (both as input
+  /// and output) in the execution environment. If necessary, copies data to
+  /// the execution environment. Can throw an exception if this BitField does
+  /// not yet contain any data. Returns a portal that can be used in code
+  /// running in the execution environment.
+  template <typename DeviceAdapterTag>
+  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(
+    DeviceAdapterTag device) const
+  {
+    using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
+    return PortalType{ this->Internals->Data.PrepareForInPlace(device),
+                       this->Internals->NumberOfBits };
+  }
+
+private:
+  /// Returns the number of words, padded out to respect BlockSize.
+  VTKM_CONT
+  static vtkm::Id BitsToAllocatedStorageWords(vtkm::Id numBits)
+  {
+    static constexpr vtkm::Id InternalWordSize = static_cast<vtkm::Id>(sizeof(WordTypeDefault));
+
+    // Round up to BlockSize bytes:
+    const vtkm::Id bytesNeeded = (numBits + CHAR_BIT - 1) / CHAR_BIT;
+    const vtkm::Id blocksNeeded = (bytesNeeded + BlockSize - 1) / BlockSize;
+    const vtkm::Id numBytes = blocksNeeded * BlockSize;
+    const vtkm::Id numWords = numBytes / InternalWordSize;
+    return numWords;
+  }
+
+  struct VTKM_ALWAYS_EXPORT InternalStruct
+  {
+    ArrayHandleType Data;
+    vtkm::Id NumberOfBits;
+  };
+
+  std::shared_ptr<InternalStruct> Internals;
+};
+}
+} // end namespace vtkm::cont
+
+#endif // vtk_m_cont_BitField_h
--- a/vtkm/cont/CMakeLists.txt
+++ b/vtkm/cont/CMakeLists.txt
@ -49,6 +49,7 @@ set(headers
  ArrayRangeCompute.h
  AssignerMultiBlock.h
  AtomicArray.h
+  BitField.h
  BoundsCompute.h
  BoundsGlobalCompute.h
  CastAndCall.h
--- a/vtkm/cont/DeviceAdapterAlgorithm.h
+++ b/vtkm/cont/DeviceAdapterAlgorithm.h
@ -52,6 +52,15 @@ template <class DeviceAdapterTag>
 struct DeviceAdapterAlgorithm
 #ifdef VTKM_DOXYGEN_ONLY
 {
+  /// \brief Create a unique, unsorted list of indices denoting which bits are
+  /// set in a bitfield.
+  ///
+  /// Returns the total number of set bits.
+  template <typename IndicesStorage>
+  VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
+    const vtkm::cont::BitField& bits,
+    vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices);
+
  /// \brief Copy the contents of one ArrayHandle to another
  ///
  /// Copies the contents of \c input to \c output. The array \c output will be
@ -660,9 +669,18 @@ public:
 /// The class provide the actual implementation used by
 /// vtkm::cont::DeviceAdapterAtomicArrayImplementation.
 ///
+/// TODO combine this with AtomicInterfaceExecution.
 template <typename T, typename DeviceTag>
 class DeviceAdapterAtomicArrayImplementation;

+/// \brief Class providing a device-specific support for atomic operations.
+///
+/// AtomicInterfaceControl provides atomic operations for the control
+/// environment, and may be subclassed to implement the device interface when
+/// appropriate for a CPU-based device.
+template <typename DeviceTag>
+class AtomicInterfaceExecution;
+
 /// \brief Class providing a device-specific support for selecting the optimal
 /// Task type for a given worklet.
 ///
--- a/vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h
+++ b/vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h
@ -0,0 +1,105 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
+#define vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
+
+#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
+
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
+
+#include <vtkm/ListTag.h>
+#include <vtkm/Types.h>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+template <>
+class AtomicInterfaceExecution<DeviceAdapterTagCuda>
+{
+
+public:
+  // Note: There are 64-bit atomics available, but not on all devices. Stick
+  // with 32-bit only until we require compute capability 3.5+
+  using WordTypes = vtkm::ListTagBase<vtkm::UInt32>;
+  using WordTypePreferred = vtkm::UInt32;
+
+#define VTKM_ATOMIC_OPS_FOR_TYPE(type)                                                             \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Load(const type* addr)                        \
+  {                                                                                                \
+    const volatile type* vaddr = addr; /* volatile to bypass cache*/                               \
+    const type value = *vaddr;                                                                     \
+    /* fence to ensure that dependent reads are correctly ordered */                               \
+    __threadfence();                                                                               \
+    return value;                                                                                  \
+  }                                                                                                \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static void Store(type* addr, type value)                 \
+  {                                                                                                \
+    volatile type* vaddr = addr; /* volatile to bypass cache */                                    \
+    /* fence to ensure that previous non-atomic stores are visible to other threads */             \
+    __threadfence();                                                                               \
+    *vaddr = value;                                                                                \
+  }                                                                                                \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Not(type* addr)                               \
+  {                                                                                                \
+    return AtomicInterfaceExecution::Xor(addr, static_cast<type>(~type{ 0u }));                    \
+  }                                                                                                \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type And(type* addr, type mask)                    \
+  {                                                                                                \
+    __threadfence();                                                                               \
+    auto result = atomicAnd(addr, mask);                                                           \
+    __threadfence();                                                                               \
+    return result;                                                                                 \
+  }                                                                                                \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Or(type* addr, type mask)                     \
+  {                                                                                                \
+    __threadfence();                                                                               \
+    auto result = atomicOr(addr, mask);                                                            \
+    __threadfence();                                                                               \
+    return result;                                                                                 \
+  }                                                                                                \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Xor(type* addr, type mask)                    \
+  {                                                                                                \
+    __threadfence();                                                                               \
+    auto result = atomicXor(addr, mask);                                                           \
+    __threadfence();                                                                               \
+    return result;                                                                                 \
+  }                                                                                                \
+  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type CompareAndSwap(                               \
+    type* addr, type newWord, type expected)                                                       \
+  {                                                                                                \
+    __threadfence();                                                                               \
+    auto result = atomicCAS(addr, expected, newWord);                                              \
+    __threadfence();                                                                               \
+    return result;                                                                                 \
+  }
+
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
+
+#undef VTKM_ATOMIC_OPS_FOR_TYPE
+};
+}
+}
+} // end namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
--- a/vtkm/cont/cuda/internal/CMakeLists.txt
+++ b/vtkm/cont/cuda/internal/CMakeLists.txt
@ -20,6 +20,7 @@

 set(headers
  ArrayManagerExecutionCuda.h
+  AtomicInterfaceExecutionCuda.h
  CudaAllocator.h
  DeviceAdapterAlgorithmCuda.h
  DeviceAdapterAtomicArrayImplementationCuda.h
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@ -26,6 +26,7 @@
 #include <vtkm/UnaryPredicates.h>

 #include <vtkm/cont/ArrayHandle.h>
+#include <vtkm/cont/BitField.h>
 #include <vtkm/cont/DeviceAdapterAlgorithm.h>
 #include <vtkm/cont/ErrorExecution.h>
 #include <vtkm/cont/Logging.h>
@ -35,6 +36,7 @@

 #include <vtkm/cont/cuda/ErrorCuda.h>
 #include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
+#include <vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
@ -54,8 +56,7 @@

 // Disable warnings we check vtkm for but Thrust does not.
 VTKM_THIRDPARTY_PRE_INCLUDE
-//This is required to be first so that we get patches for thrust included
-//in the correct order
+#include <cooperative_groups.h>
 #include <cuda.h>
 #include <thrust/advance.h>
 #include <thrust/binary_search.h>
@ -71,6 +72,9 @@ VTKM_THIRDPARTY_PRE_INCLUDE
 #include <vtkm/exec/cuda/internal/ThrustPatches.h>
 VTKM_THIRDPARTY_POST_INCLUDE

+#include <limits>
+#include <memory>
+
 namespace vtkm
 {
 namespace cont
@ -145,6 +149,22 @@ struct CastPortal
  VTKM_EXEC
  ValueType Get(vtkm::Id index) const { return static_cast<OutValueType>(this->Portal.Get(index)); }
 };
+
+struct CudaFreeFunctor
+{
+  void operator()(void* ptr) const { VTKM_CUDA_CALL(cudaFree(ptr)); }
+};
+
+template <typename T>
+using CudaUniquePtr = std::unique_ptr<T, CudaFreeFunctor>;
+
+template <typename T>
+CudaUniquePtr<T> make_CudaUniquePtr(std::size_t numElements)
+{
+  T* ptr;
+  VTKM_CUDA_CALL(cudaMalloc(&ptr, sizeof(T) * numElements));
+  return CudaUniquePtr<T>(ptr);
+}
 }
 } // end namespace cuda::internal

@ -159,6 +179,132 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
 #ifndef VTKM_CUDA
 private:
 #endif
+
+  template <typename BitsPortal, typename IndicesPortal, typename GlobalPopCountType>
+  struct BitFieldToUnorderedSetFunctor : public vtkm::exec::FunctorBase
+  {
+    VTKM_STATIC_ASSERT_MSG(VTKM_PASS_COMMAS(std::is_same<GlobalPopCountType, vtkm::Int32>::value ||
+                                            std::is_same<GlobalPopCountType, vtkm::UInt32>::value ||
+                                            std::is_same<GlobalPopCountType, vtkm::UInt64>::value),
+                           "Unsupported GlobalPopCountType. Must support CUDA atomicAdd.");
+
+    using Word = typename BitsPortal::WordTypePreferred;
+
+    VTKM_STATIC_ASSERT(
+      VTKM_PASS_COMMAS(std::is_same<typename IndicesPortal::ValueType, vtkm::Id>::value));
+
+    VTKM_CONT
+    BitFieldToUnorderedSetFunctor(const BitsPortal& input,
+                                  const IndicesPortal& output,
+                                  GlobalPopCountType* globalPopCount)
+      : Input{ input }
+      , Output{ output }
+      , GlobalPopCount{ globalPopCount }
+      , FinalWordIndex{ input.GetNumberOfWords() - 1 }
+      , FinalWordMask(input.GetFinalWordMask())
+    {
+    }
+
+    ~BitFieldToUnorderedSetFunctor() {}
+
+    VTKM_CONT void Initialize()
+    {
+      assert(this->GlobalPopCount != nullptr);
+      VTKM_CUDA_CALL(cudaMemset(this->GlobalPopCount, 0, sizeof(GlobalPopCountType)));
+    }
+
+    VTKM_SUPPRESS_EXEC_WARNINGS
+    __device__ void operator()(vtkm::Id wordIdx) const
+    {
+      Word word = this->Input.GetWord(wordIdx);
+
+      // The last word may be partial -- mask out trailing bits if needed.
+      const Word mask = wordIdx == this->FinalWordIndex ? this->FinalWordMask : ~Word{ 0 };
+
+      word &= mask;
+
+      if (word != 0)
+      {
+        this->LocalPopCount = vtkm::CountSetBits(word);
+        this->ReduceAllocate();
+
+        vtkm::Id firstBitIdx = wordIdx * sizeof(Word) * CHAR_BIT;
+        do
+        {
+          // Find next bit. FindFirstSetBit's result is indexed starting at 1.
+          vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
+          vtkm::Id outIdx = this->GetNextOutputIndex();
+          // Write index of bit
+          this->Output.Set(outIdx, firstBitIdx + bit);
+          word ^= (1 << bit); // clear bit
+        } while (word != 0);  // have bits
+      }
+    }
+
+    VTKM_CONT vtkm::Id Finalize() const
+    {
+      assert(this->GlobalPopCount != nullptr);
+      GlobalPopCountType result;
+      VTKM_CUDA_CALL(cudaMemcpy(
+        &result, this->GlobalPopCount, sizeof(GlobalPopCountType), cudaMemcpyDeviceToHost));
+      return static_cast<vtkm::Id>(result);
+    }
+
+  private:
+    // Every thread with a non-zero local popcount calls this function, which
+    // computes the total popcount for the coalesced threads and allocates
+    // a contiguous block in the output by atomically increasing the global
+    // popcount.
+    VTKM_SUPPRESS_EXEC_WARNINGS
+    __device__ void ReduceAllocate() const
+    {
+      const auto activeLanes = cooperative_groups::coalesced_threads();
+      const int activeRank = activeLanes.thread_rank();
+      const int activeSize = activeLanes.size();
+
+      // Reduction value:
+      vtkm::Int32 rVal = this->LocalPopCount;
+      for (int delta = 1; delta < activeSize; delta *= 2)
+      {
+        rVal += activeLanes.shfl_down(rVal, delta);
+      }
+
+      if (activeRank == 0)
+      {
+        this->AllocationHead =
+          atomicAdd(this->GlobalPopCount, static_cast<GlobalPopCountType>(rVal));
+      }
+
+      this->AllocationHead = activeLanes.shfl(this->AllocationHead, 0);
+    }
+
+    // The global output allocation is written to by striding the writes across
+    // the warp lanes, allowing the writes to global memory to be coalesced.
+    VTKM_SUPPRESS_EXEC_WARNINGS
+    __device__ vtkm::Id GetNextOutputIndex() const
+    {
+      // Only lanes with unwritten output indices left will call this method,
+      // so just check the coalesced threads:
+      const auto activeLanes = cooperative_groups::coalesced_threads();
+      const int activeRank = activeLanes.thread_rank();
+      const int activeSize = activeLanes.size();
+
+      vtkm::Id nextIdx = static_cast<vtkm::Id>(this->AllocationHead + activeRank);
+      this->AllocationHead += activeSize;
+
+      return nextIdx;
+    }
+
+    const BitsPortal Input;
+    const IndicesPortal Output;
+    GlobalPopCountType* GlobalPopCount;
+    mutable vtkm::UInt64 AllocationHead{ 0 };
+    mutable vtkm::Int32 LocalPopCount{ 0 };
+    // Used to mask trailing bits the in last word.
+    vtkm::Id FinalWordIndex{ 0 };
+    Word FinalWordMask{ 0 };
+  };
+
  template <class InputPortal, class OutputPortal>
  VTKM_CONT static void CopyPortal(const InputPortal& input, const OutputPortal& output)
  {
@ -742,9 +888,43 @@ private:
    }
  }

+  template <typename GlobalPopCountType, typename BitsPortal, typename IndicesPortal>
+  VTKM_CONT static vtkm::Id BitFieldToUnorderedSetPortal(const BitsPortal& bits,
+                                                         const IndicesPortal& indices)
+  {
+    using Functor = BitFieldToUnorderedSetFunctor<BitsPortal, IndicesPortal, GlobalPopCountType>;
+
+    // RAII for the global atomic counter.
+    auto globalCount = cuda::internal::make_CudaUniquePtr<GlobalPopCountType>(1);
+    Functor functor{ bits, indices, globalCount.get() };
+
+    functor.Initialize();
+    Schedule(functor, bits.GetNumberOfWords());
+    Synchronize(); // Ensure kernel is done before checking final atomic count
+    return functor.Finalize();
+  }
+
  //-----------------------------------------------------------------------------

 public:
+  template <typename IndicesStorage>
+  VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
+    const vtkm::cont::BitField& bits,
+    vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
+  {
+    VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
+
+    vtkm::Id numBits = bits.GetNumberOfBits();
+    auto bitsPortal = bits.PrepareForInput(DeviceAdapterTagCuda{});
+    auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTagCuda{});
+
+    // Use a uint64 for accumulator, as atomicAdd does not support signed int64.
+    numBits = BitFieldToUnorderedSetPortal<vtkm::UInt64>(bitsPortal, indicesPortal);
+
+    indices.Shrink(numBits);
+    return numBits;
+  }
+
  template <typename T, typename U, class SIn, class SOut>
  VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, SIn>& input,
                             vtkm::cont::ArrayHandle<U, SOut>& output)
--- a/vtkm/cont/cuda/testing/CMakeLists.txt
+++ b/vtkm/cont/cuda/testing/CMakeLists.txt
@ -22,6 +22,7 @@ set(unit_tests
  UnitTestCudaArrayHandle.cu
  UnitTestCudaArrayHandleFancy.cu
  UnitTestCudaArrayHandleVirtualCoordinates.cu
+  UnitTestCudaBitField.cu
  UnitTestCudaCellLocatorRectilinearGrid.cu
  UnitTestCudaCellLocatorUniformBins.cu
  UnitTestCudaCellLocatorUniformGrid.cu
--- a/vtkm/cont/cuda/testing/UnitTestCudaBitField.cu
+++ b/vtkm/cont/cuda/testing/UnitTestCudaBitField.cu
@ -0,0 +1,34 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+
+// Make sure that the tested code is using the device adapter specified. This
+// is important in the long run so we don't, for example, use the CUDA device
+// for a part of an operation where the TBB device was specified.
+#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
+
+#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
+#include <vtkm/cont/testing/TestingBitField.h>
+
+int UnitTestCudaBitField(int argc, char* argv[])
+{
+  auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
+  tracker.ForceDevice(vtkm::cont::DeviceAdapterTagCuda{});
+  return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagCuda>::Run(argc, argv);
+}
--- a/vtkm/cont/internal/AtomicInterfaceControl.h
+++ b/vtkm/cont/internal/AtomicInterfaceControl.h
@ -0,0 +1,227 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_internal_AtomicInterfaceControl_h
+#define vtk_m_cont_internal_AtomicInterfaceControl_h
+
+#include <vtkm/internal/Configure.h>
+#include <vtkm/internal/Windows.h>
+
+#include <vtkm/ListTag.h>
+#include <vtkm/Types.h>
+
+#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
+#include <intrin.h> // For MSVC atomics
+#endif
+
+#include <atomic>
+#include <cstdint>
+#include <cstring>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+/**
+ * Implementation of AtomicInterfaceDevice that uses control-side atomics.
+ */
+class AtomicInterfaceControl
+{
+public:
+  using WordTypes = vtkm::ListTagBase<vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64>;
+
+  // TODO These support UInt64, too. This should be benchmarked to see which
+  // is faster.
+  using WordTypePreferred = vtkm::UInt32;
+
+#ifdef VTKM_MSVC
+private:
+  template <typename To, typename From>
+  VTKM_EXEC_CONT static To BitCast(const From& src)
+  {
+    // The memcpy should be removed by the compiler when possible, but this
+    // works around a host of issues with bitcasting using reinterpret_cast.
+    VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
+    To dst;
+    std::memcpy(&dst, &src, sizeof(From));
+    return dst;
+  }
+
+public:
+  // Note about Load and Store implementations:
+  //
+  // "Simple reads and writes to properly-aligned 32-bit variables are atomic
+  //  operations"
+  //
+  // "Simple reads and writes to properly aligned 64-bit variables are atomic on
+  // 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
+  // atomic on 32-bit Windows."
+  //
+  // "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
+  // guaranteed to be atomic on any platform."
+  //
+  // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
+
+  VTKM_EXEC_CONT
+  static vtkm::UInt8 Load(const vtkm::UInt8* addr)
+  {
+    // This assumes that the memory interface is smart enough to load a 32-bit
+    // word atomically and a properly aligned 8-bit word from it.
+    // We could build address masks and do shifts to perform this manually if
+    // this assumption is incorrect.
+    auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
+    std::atomic_thread_fence(std::memory_order_acquire);
+    return result;
+  }
+  VTKM_EXEC_CONT
+  static vtkm::UInt16 Load(const vtkm::UInt16* addr)
+  {
+    // This assumes that the memory interface is smart enough to load a 32-bit
+    // word atomically and a properly aligned 16-bit word from it.
+    // We could build address masks and do shifts to perform this manually if
+    // this assumption is incorrect.
+    auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
+    std::atomic_thread_fence(std::memory_order_acquire);
+    return result;
+  }
+  VTKM_EXEC_CONT
+  static vtkm::UInt32 Load(const vtkm::UInt32* addr)
+  {
+    auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
+    std::atomic_thread_fence(std::memory_order_acquire);
+    return result;
+  }
+  VTKM_EXEC_CONT
+  static vtkm::UInt64 Load(const vtkm::UInt64* addr)
+  {
+    auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
+    std::atomic_thread_fence(std::memory_order_acquire);
+    return result;
+  }
+  VTKM_EXEC_CONT
+  static void Store(vtkm::UInt8* addr, vtkm::UInt8 val)
+  {
+    // There doesn't seem to be an atomic store instruction in the windows
+    // API, so just exchange and discard the result.
+    _InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
+  }
+  VTKM_EXEC_CONT
+  static void Store(vtkm::UInt16* addr, vtkm::UInt16 val)
+  {
+    // There doesn't seem to be an atomic store instruction in the windows
+    // API, so just exchange and discard the result.
+    _InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
+  }
+  VTKM_EXEC_CONT
+  static void Store(vtkm::UInt32* addr, vtkm::UInt32 val)
+  {
+    std::atomic_thread_fence(std::memory_order_release);
+    *addr = val;
+  }
+  VTKM_EXEC_CONT
+  static void Store(vtkm::UInt64* addr, vtkm::UInt64 val)
+  {
+    std::atomic_thread_fence(std::memory_order_release);
+    *addr = val;
+  }
+
+#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix)                                        \
+  VTKM_EXEC_CONT static vtkmType Not(vtkmType* addr)                                               \
+  {                                                                                                \
+    return Xor(addr, static_cast<vtkmType>(~vtkmType{ 0u }));                                      \
+  }                                                                                                \
+  VTKM_EXEC_CONT static vtkmType And(vtkmType* addr, vtkmType mask)                                \
+  {                                                                                                \
+    return BitCast<vtkmType>(                                                                      \
+      _InterlockedAnd##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
+  }                                                                                                \
+  VTKM_EXEC_CONT static vtkmType Or(vtkmType* addr, vtkmType mask)                                 \
+  {                                                                                                \
+    return BitCast<vtkmType>(                                                                      \
+      _InterlockedOr##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask)));  \
+  }                                                                                                \
+  VTKM_EXEC_CONT static vtkmType Xor(vtkmType* addr, vtkmType mask)                                \
+  {                                                                                                \
+    return BitCast<vtkmType>(                                                                      \
+      _InterlockedXor##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
+  }                                                                                                \
+  VTKM_EXEC_CONT static vtkmType CompareAndSwap(                                                   \
+    vtkmType* addr, vtkmType newWord, vtkmType expected)                                           \
+  {                                                                                                \
+    return BitCast<vtkmType>(                                                                      \
+      _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),               \
+                                          BitCast<winType>(newWord),                               \
+                                          BitCast<winType>(expected)));                            \
+  }
+
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
+
+#undef VTKM_ATOMIC_OPS_FOR_TYPE
+
+#else // gcc/clang
+
+#define VTKM_ATOMIC_OPS_FOR_TYPE(type)                                                             \
+  VTKM_EXEC_CONT static type Load(const type* addr)                                                \
+  {                                                                                                \
+    return __atomic_load_n(addr, __ATOMIC_ACQUIRE);                                                \
+  }                                                                                                \
+  VTKM_EXEC_CONT static void Store(type* addr, type value)                                         \
+  {                                                                                                \
+    return __atomic_store_n(addr, value, __ATOMIC_RELEASE);                                        \
+  }                                                                                                \
+  VTKM_EXEC_CONT static type Not(type* addr) { return Xor(addr, static_cast<type>(~type{ 0u })); } \
+  VTKM_EXEC_CONT static type And(type* addr, type mask)                                            \
+  {                                                                                                \
+    return __atomic_fetch_and(addr, mask, __ATOMIC_SEQ_CST);                                       \
+  }                                                                                                \
+  VTKM_EXEC_CONT static type Or(type* addr, type mask)                                             \
+  {                                                                                                \
+    return __atomic_fetch_or(addr, mask, __ATOMIC_SEQ_CST);                                        \
+  }                                                                                                \
+  VTKM_EXEC_CONT static type Xor(type* addr, type mask)                                            \
+  {                                                                                                \
+    return __atomic_fetch_xor(addr, mask, __ATOMIC_SEQ_CST);                                       \
+  }                                                                                                \
+  VTKM_EXEC_CONT static type CompareAndSwap(type* addr, type newWord, type expected)               \
+  {                                                                                                \
+    __atomic_compare_exchange_n(                                                                   \
+      addr, &expected, newWord, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);                        \
+    return expected;                                                                               \
+  }
+
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8)
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16)
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
+  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64)
+
+#undef VTKM_ATOMIC_OPS_FOR_TYPE
+
+#endif
+};
+}
+}
+} // end namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_internal_AtomicInterfaceControl_h
--- a/vtkm/cont/internal/AtomicInterfaceExecution.h
+++ b/vtkm/cont/internal/AtomicInterfaceExecution.h
@ -0,0 +1,113 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_internal_AtomicInterfaceExecution_h
+#define vtk_m_cont_internal_AtomicInterfaceExecution_h
+
+#include <vtkm/Types.h>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+/// Class template that provides a collection of static methods that perform
+/// atomic operations on raw addresses. It is the responsibility of the caller
+/// to ensure that the addresses are properly aligned.
+///
+/// The class defines a WordTypePreferred member that is the fastest available
+/// for bitwise operations of the given device. At minimum, the interface must
+/// support operations on WordTypePreferred and vtkm::WordTypeDefault, which may
+/// be the same. A full list of supported word types is advertised in the type
+/// list @a WordTypes.
+///
+/// To implement this on devices that share the control environment, subclass
+/// vtkm::cont::internal::AtomicInterfaceControl, which may also be used
+/// directly from control-side code.
+template <typename DeviceTag>
+class AtomicInterfaceExecution
+#ifdef VTKM_DOXYGEN_ONLY
+{
+  /// The preferred word type for the target device for bitwise atomic
+  /// operations.
+  using WordTypePreferred = FastestWordTypeForDevice;
+
+  using WordTypes = vtkm::ListTagBase<vtkm::WordTypeDefault, WordTypePreferred>;
+
+  /// Atomically load a value from memory while enforcing, at minimum, "acquire"
+  /// memory ordering.
+  VTKM_EXEC static vtkm::WordTypeDefault Load(vtkm::WordTypeDefault* addr);
+  VTKM_EXEC static WordTypePreferred Load(WordTypePreferred* addr);
+
+  /// Atomically write a value to memory while enforcing, at minimum, "release"
+  /// memory ordering.
+  VTKM_EXEC static void Store(vtkm::WordTypeDefault* addr, vtkm::WordTypeDefault value);
+  VTKM_EXEC static void Store(WordTypePreferred* addr, WordTypePreferred value);
+
+  /// Perform a bitwise atomic not operation on the word at @a addr.
+  /// This operation performs a full memory barrier around the atomic access.
+  /// @{
+  VTKM_EXEC static vtkm::WordTypeDefault Not(vtkm::WordTypeDefault* addr);
+  VTKM_EXEC static WordTypePreferred Not(WordTypePreferred* addr);
+  /// @}
+
+  /// Perform a bitwise atomic and operation on the word at @a addr.
+  /// This operation performs a full memory barrier around the atomic access.
+  /// @{
+  VTKM_EXEC static vtkm::WordTypeDefault And(vtkm::WordTypeDefault* addr,
+                                             vtkm::WordTypeDefault mask);
+  VTKM_EXEC static WordTypePreferred And(WordTypePreferred* addr, WordTypePreferred mask);
+  /// @}
+
+  /// Perform a bitwise atomic or operation on the word at @a addr.
+  /// This operation performs a full memory barrier around the atomic access.
+  /// @{
+  VTKM_EXEC static vtkm::WordTypeDefault Or(vtkm::WordTypeDefault* addr,
+                                            vtkm::WordTypeDefault mask);
+  VTKM_EXEC static WordTypePreferred Or(WordTypePreferred* addr, WordTypePreferred mask);
+  /// @}
+
+  /// Perform a bitwise atomic xor operation on the word at @a addr.
+  /// This operation performs a full memory barrier around the atomic access.
+  /// @{
+  VTKM_EXEC static vtkm::WordTypeDefault Xor(vtkm::WordTypeDefault* addr,
+                                             vtkm::WordTypeDefault mask);
+  VTKM_EXEC static WordTypePreferred Xor(WordTypePreferred* addr, WordTypePreferred mask);
+  /// @}
+
+  /// Perform an atomic CAS operation on the word at @a addr.
+  /// This operation performs a full memory barrier around the atomic access.
+  /// @{
+  VTKM_EXEC static vtkm::WordTypeDefault CompareAndSwap(vtkm::WordTypeDefault* addr,
+                                                        vtkm::WordTypeDefault newWord,
+                                                        vtkm::WordTypeDefault expected);
+  VTKM_EXEC static WordTypePreferred CompareAndSwap(WordTypePreferred* addr,
+                                                    WordTypePreferred newWord,
+                                                    WordTypePreferred expected);
+  /// @}
+}
+#endif // VTKM_DOXYGEN_ONLY
+;
+}
+}
+} // end namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_internal_AtomicInterfaceExecution_h
--- a/vtkm/cont/internal/CMakeLists.txt
+++ b/vtkm/cont/internal/CMakeLists.txt
@ -28,6 +28,8 @@ set(headers
  ArrayPortalFromIterators.h
  ArrayPortalShrink.h
  ArrayTransfer.h
+  AtomicInterfaceControl.h
+  AtomicInterfaceExecution.h
  ConnectivityExplicitInternals.h
  DeviceAdapterAlgorithmGeneral.h
  DeviceAdapterAtomicArrayImplementation.h
--- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
+++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
@ -27,6 +27,7 @@
 #include <vtkm/cont/ArrayHandleIndex.h>
 #include <vtkm/cont/ArrayHandleStreaming.h>
 #include <vtkm/cont/ArrayHandleZip.h>
+#include <vtkm/cont/BitField.h>
 #include <vtkm/cont/Logging.h>
 #include <vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h>
 #include <vtkm/cont/internal/FunctorsGeneral.h>
@ -123,6 +124,35 @@ private:
  }

 public:
+  //--------------------------------------------------------------------------
+  // BitFieldToUnorderedSet
+  template <typename IndicesStorage>
+  VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
+    const vtkm::cont::BitField& bits,
+    vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
+  {
+    VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
+
+    vtkm::Id numBits = bits.GetNumberOfBits();
+
+    auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{});
+    auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTag{});
+
+    std::atomic<vtkm::UInt64> popCount;
+    popCount.store(0, std::memory_order_seq_cst);
+
+    using Functor = BitFieldToUnorderedSetFunctor<decltype(bitsPortal), decltype(indicesPortal)>;
+    Functor functor{ bitsPortal, indicesPortal, popCount };
+
+    DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
+    DerivedAlgorithm::Synchronize();
+
+    numBits = static_cast<vtkm::Id>(popCount.load(std::memory_order_seq_cst));
+
+    indices.Shrink(numBits);
+    return numBits;
+  }
+
  //--------------------------------------------------------------------------
  // Copy
  template <typename T, typename U, class CIn, class COut>
--- a/vtkm/cont/internal/FunctorsGeneral.h
+++ b/vtkm/cont/internal/FunctorsGeneral.h
@ -24,10 +24,12 @@
 #include <vtkm/TypeTraits.h>
 #include <vtkm/UnaryPredicates.h>
 #include <vtkm/cont/ArrayPortalToIterators.h>
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>

 #include <vtkm/exec/FunctorBase.h>

 #include <algorithm>
+#include <atomic>

 namespace vtkm
 {
@ -332,6 +334,142 @@ struct ShiftCopyAndInit : vtkm::exec::FunctorBase
  }
 };

+template <class BitsPortal, class IndicesPortal>
+struct BitFieldToUnorderedSetFunctor : public vtkm::exec::FunctorBase
+{
+  using WordType = typename BitsPortal::WordTypePreferred;
+
+  // This functor executes a number of instances, where each instance handles
+  // two cachelines worth of data. Figure out how many words that is:
+  static constexpr vtkm::Id CacheLineSize = VTKM_ALLOCATION_ALIGNMENT;
+  static constexpr vtkm::Id WordsPerCacheLine =
+    CacheLineSize / static_cast<vtkm::Id>(sizeof(WordType));
+  static constexpr vtkm::Id CacheLinesPerInstance = 2;
+  static constexpr vtkm::Id WordsPerInstance = CacheLinesPerInstance * WordsPerCacheLine;
+
+  VTKM_STATIC_ASSERT(
+    VTKM_PASS_COMMAS(std::is_same<typename IndicesPortal::ValueType, vtkm::Id>::value));
+
+  VTKM_CONT
+  BitFieldToUnorderedSetFunctor(const BitsPortal& input,
+                                IndicesPortal& output,
+                                std::atomic<vtkm::UInt64>& popCount)
+    : Input{ input }
+    , Output{ output }
+    , PopCount(popCount)
+    , FinalWordIndex{ input.GetNumberOfWords() - 1 }
+    , FinalWordMask(input.GetFinalWordMask())
+  {
+  }
+
+  VTKM_CONT vtkm::Id GetNumberOfInstances() const
+  {
+    const auto numWords = this->Input.GetNumberOfWords();
+    return (numWords + WordsPerInstance - 1) / WordsPerInstance;
+  }
+
+  VTKM_EXEC void operator()(vtkm::Id instanceIdx) const
+  {
+    const vtkm::Id numWords = this->Input.GetNumberOfWords();
+    const vtkm::Id wordStart = vtkm::Min(instanceIdx * WordsPerInstance, numWords);
+    const vtkm::Id wordEnd = vtkm::Min(wordStart + WordsPerInstance, numWords);
+
+    if (wordStart != wordEnd) // range is valid
+    {
+      this->ExecuteRange(wordStart, wordEnd);
+    }
+  }
+
+  VTKM_EXEC void ExecuteRange(vtkm::Id wordStart, vtkm::Id wordEnd) const
+  {
+#ifndef VTKM_CUDA_DEVICE_PASS // for std::atomic call from VTKM_EXEC function:
+    // Count bits and allocate space for output:
+    vtkm::UInt64 chunkBits = this->CountChunkBits(wordStart, wordEnd);
+    if (chunkBits > 0)
+    {
+      vtkm::UInt64 outIdx = this->PopCount.fetch_add(chunkBits, std::memory_order_relaxed);
+
+      this->ProcessWords(wordStart, wordEnd, static_cast<vtkm::Id>(outIdx));
+    }
+#else
+    (void)wordStart;
+    (void)wordEnd;
+#endif
+  }
+
+  VTKM_CONT vtkm::UInt64 GetPopCount() const { return PopCount.load(std::memory_order_relaxed); }
+
+private:
+  VTKM_EXEC vtkm::UInt64 CountChunkBits(vtkm::Id wordStart, vtkm::Id wordEnd) const
+  {
+    // Need to mask out trailing bits from the final word:
+    const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
+
+    if (isFinalChunk)
+    {
+      wordEnd = this->FinalWordIndex;
+    }
+
+    vtkm::Int32 tmp = 0;
+    for (vtkm::Id i = wordStart; i < wordEnd; ++i)
+    {
+      tmp += vtkm::CountSetBits(this->Input.GetWord(i));
+    }
+
+    if (isFinalChunk)
+    {
+      tmp += vtkm::CountSetBits(this->Input.GetWord(this->FinalWordIndex) & this->FinalWordMask);
+    }
+
+    return static_cast<vtkm::UInt64>(tmp);
+  }
+
+  VTKM_EXEC void ProcessWords(vtkm::Id wordStart, vtkm::Id wordEnd, vtkm::Id outputStartIdx) const
+  {
+    // Need to mask out trailing bits from the final word:
+    const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
+
+    if (isFinalChunk)
+    {
+      wordEnd = this->FinalWordIndex;
+    }
+
+    for (vtkm::Id i = wordStart; i < wordEnd; ++i)
+    {
+      const vtkm::Id firstBitIdx = i * static_cast<vtkm::Id>(sizeof(WordType)) * CHAR_BIT;
+      WordType word = this->Input.GetWord(i);
+      while (word != 0) // have bits
+      {
+        // Find next bit. FindFirstSetBit starts counting at 1.
+        vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
+        this->Output.Set(outputStartIdx++, firstBitIdx + bit); // Write index of bit
+        word ^= (1 << bit);                                    // clear bit
+      }
+    }
+
+    if (isFinalChunk)
+    {
+      const vtkm::Id i = this->FinalWordIndex;
+      const vtkm::Id firstBitIdx = i * static_cast<vtkm::Id>(sizeof(WordType)) * CHAR_BIT;
+      WordType word = this->Input.GetWord(i) & this->FinalWordMask;
+      while (word != 0) // have bits
+      {
+        // Find next bit. FindFirstSetBit starts counting at 1.
+        vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
+        this->Output.Set(outputStartIdx++, firstBitIdx + bit); // Write index of bit
+        word ^= (1 << bit);                                    // clear bit
+      }
+    }
+  }
+
+  BitsPortal Input;
+  IndicesPortal Output;
+  std::atomic<vtkm::UInt64>& PopCount;
+  // Used to mask trailing bits the in last word.
+  vtkm::Id FinalWordIndex{ 0 };
+  WordType FinalWordMask{ 0 };
+};
+
 template <class InputPortalType, class OutputPortalType>
 struct CopyKernel
 {
--- a/vtkm/cont/openmp/DeviceAdapterOpenMP.h
+++ b/vtkm/cont/openmp/DeviceAdapterOpenMP.h
@ -26,6 +26,7 @@

 #ifdef VTKM_ENABLE_OPENMP
 #include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
+#include <vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h>
 #include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
 #include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
 #endif
--- a/vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h
+++ b/vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h
@ -0,0 +1,45 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
+#define vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
+
+#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
+
+#include <vtkm/cont/internal/AtomicInterfaceControl.h>
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
+
+#include <vtkm/Types.h>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+template <>
+class AtomicInterfaceExecution<DeviceAdapterTagOpenMP> : public AtomicInterfaceControl
+{
+};
+}
+}
+} // end namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
--- a/vtkm/cont/openmp/internal/CMakeLists.txt
+++ b/vtkm/cont/openmp/internal/CMakeLists.txt
@ -23,6 +23,7 @@ set(headers
  DeviceAdapterAlgorithmOpenMP.h
  DeviceAdapterRuntimeDetectorOpenMP.h
  DeviceAdapterTagOpenMP.h
+  AtomicInterfaceExecutionOpenMP.h
  ExecutionArrayInterfaceBasicOpenMP.h
  FunctorsOpenMP.h
  ParallelQuickSortOpenMP.h
--- a/vtkm/cont/openmp/testing/CMakeLists.txt
+++ b/vtkm/cont/openmp/testing/CMakeLists.txt
@ -22,6 +22,7 @@ set(unit_tests
  UnitTestOpenMPArrayHandle.cxx
  UnitTestOpenMPArrayHandleFancy.cxx
  UnitTestOpenMPArrayHandleVirtualCoordinates.cxx
+  UnitTestOpenMPBitField.cxx
  UnitTestOpenMPCellLocatorRectilinearGrid.cxx
  UnitTestOpenMPCellLocatorUniformBins.cxx
  UnitTestOpenMPCellLocatorUniformGrid.cxx
--- a/vtkm/cont/openmp/testing/UnitTestOpenMPBitField.cxx
+++ b/vtkm/cont/openmp/testing/UnitTestOpenMPBitField.cxx
@ -0,0 +1,31 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2018 UT-Battelle, LLC.
+//  Copyright 2018 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+
+#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
+
+#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
+#include <vtkm/cont/testing/TestingBitField.h>
+
+int UnitTestOpenMPBitField(int argc, char* argv[])
+{
+  auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
+  tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
+  return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagOpenMP>::Run(argc, argv);
+}
--- a/vtkm/cont/serial/DeviceAdapterSerial.h
+++ b/vtkm/cont/serial/DeviceAdapterSerial.h
@ -24,6 +24,7 @@
 // clang-format off
 #include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
 #include <vtkm/cont/serial/internal/DeviceAdapterRuntimeDetectorSerial.h>
+#include <vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h>
 #include <vtkm/cont/serial/internal/ArrayManagerExecutionSerial.h>
 #include <vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h>
 #include <vtkm/cont/serial/internal/VirtualObjectTransferSerial.h>
--- a/vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h
+++ b/vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h
@ -0,0 +1,45 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
+#define vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
+
+#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
+
+#include <vtkm/cont/internal/AtomicInterfaceControl.h>
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
+
+#include <vtkm/Types.h>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+template <>
+class AtomicInterfaceExecution<DeviceAdapterTagSerial> : public AtomicInterfaceControl
+{
+};
+}
+}
+} // end namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
--- a/vtkm/cont/serial/internal/CMakeLists.txt
+++ b/vtkm/cont/serial/internal/CMakeLists.txt
@ -20,6 +20,7 @@

 set(headers
  ArrayManagerExecutionSerial.h
+  AtomicInterfaceExecutionSerial.h
  DeviceAdapterAlgorithmSerial.h
  DeviceAdapterRuntimeDetectorSerial.h
  DeviceAdapterTagSerial.h
--- a/vtkm/cont/serial/testing/CMakeLists.txt
+++ b/vtkm/cont/serial/testing/CMakeLists.txt
@ -22,6 +22,7 @@ set(unit_tests
  UnitTestSerialArrayHandle.cxx
  UnitTestSerialArrayHandleFancy.cxx
  UnitTestSerialArrayHandleVirtualCoordinates.cxx
+  UnitTestSerialBitField.cxx
  UnitTestSerialCellLocatorRectilinearGrid.cxx
  UnitTestSerialCellLocatorUniformBins.cxx
  UnitTestSerialCellLocatorUniformGrid.cxx
--- a/vtkm/cont/serial/testing/UnitTestSerialBitField.cxx
+++ b/vtkm/cont/serial/testing/UnitTestSerialBitField.cxx
@ -0,0 +1,34 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+
+// Make sure that the tested code is using the device adapter specified. This
+// is important in the long run so we don't, for example, use the CUDA device
+// for a part of an operation where the TBB device was specified.
+#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
+
+#include <vtkm/cont/serial/DeviceAdapterSerial.h>
+#include <vtkm/cont/testing/TestingBitField.h>
+
+int UnitTestSerialBitField(int argc, char* argv[])
+{
+  auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
+  tracker.ForceDevice(vtkm::cont::DeviceAdapterTagSerial{});
+  return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagSerial>::Run(argc, argv);
+}
--- a/vtkm/cont/tbb/DeviceAdapterTBB.h
+++ b/vtkm/cont/tbb/DeviceAdapterTBB.h
@ -25,6 +25,7 @@

 #ifdef VTKM_ENABLE_TBB
 #include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
+#include <vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h>
 #include <vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h>
 #include <vtkm/cont/tbb/internal/VirtualObjectTransferTBB.h>
 #endif
--- a/vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h
+++ b/vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h
@ -0,0 +1,45 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
+#define vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
+
+#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
+
+#include <vtkm/cont/internal/AtomicInterfaceControl.h>
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
+
+#include <vtkm/Types.h>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+template <>
+class AtomicInterfaceExecution<DeviceAdapterTagTBB> : public AtomicInterfaceControl
+{
+};
+}
+}
+} // end namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
--- a/vtkm/cont/tbb/internal/CMakeLists.txt
+++ b/vtkm/cont/tbb/internal/CMakeLists.txt
@ -20,6 +20,7 @@

 set(headers
  ArrayManagerExecutionTBB.h
+  AtomicInterfaceExecutionTBB.h
  DeviceAdapterAlgorithmTBB.h
  DeviceAdapterRuntimeDetectorTBB.h
  DeviceAdapterTagTBB.h
--- a/vtkm/cont/tbb/testing/CMakeLists.txt
+++ b/vtkm/cont/tbb/testing/CMakeLists.txt
@ -22,6 +22,7 @@ set(unit_tests
  UnitTestTBBArrayHandle.cxx
  UnitTestTBBArrayHandleFancy.cxx
  UnitTestTBBArrayHandleVirtualCoordinates.cxx
+  UnitTestTBBBitField.cxx
  UnitTestTBBCellLocatorRectilinearGrid.cxx
  UnitTestTBBCellLocatorUniformBins.cxx
  UnitTestTBBCellLocatorUniformGrid.cxx
--- a/vtkm/cont/tbb/testing/UnitTestTBBBitField.cxx
+++ b/vtkm/cont/tbb/testing/UnitTestTBBBitField.cxx
@ -0,0 +1,34 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+
+// Make sure that the tested code is using the device adapter specified. This
+// is important in the long run so we don't, for example, use the CUDA device
+// for a part of an operation where the TBB device was specified.
+#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
+
+#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
+#include <vtkm/cont/testing/TestingBitField.h>
+
+int UnitTestTBBBitField(int argc, char* argv[])
+{
+  auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
+  tracker.ForceDevice(vtkm::cont::DeviceAdapterTagTBB{});
+  return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagTBB>::Run(argc, argv);
+}
--- a/vtkm/cont/testing/TestingBitField.h
+++ b/vtkm/cont/testing/TestingBitField.h
@ -0,0 +1,540 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2019 UT-Battelle, LLC.
+//  Copyright 2019 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_testing_TestingBitFields_h
+#define vtk_m_cont_testing_TestingBitFields_h
+
+#include <vtkm/cont/BitField.h>
+#include <vtkm/cont/DeviceAdapterAlgorithm.h>
+#include <vtkm/cont/RuntimeDeviceTracker.h>
+
+#include <vtkm/cont/testing/Testing.h>
+
+#include <vtkm/exec/FunctorBase.h>
+
+#include <cstdio>
+
+#define DEVICE_ASSERT_MSG(cond, message)                                                           \
+  do                                                                                               \
+  {                                                                                                \
+    if (!(cond))                                                                                   \
+    {                                                                                              \
+      printf("Testing assert failed at %s:%d\n\t- Condition: %s\n\t- Subtest: %s\n",               \
+             __FILE__,                                                                             \
+             __LINE__,                                                                             \
+             #cond,                                                                                \
+             message);                                                                             \
+      return false;                                                                                \
+    }                                                                                              \
+  } while (false)
+
+#define DEVICE_ASSERT(cond)                                                                        \
+  do                                                                                               \
+  {                                                                                                \
+    if (!(cond))                                                                                   \
+    {                                                                                              \
+      printf("Testing assert failed at %s:%d\n\t- Condition: %s\n", __FILE__, __LINE__, #cond);    \
+      return false;                                                                                \
+    }                                                                                              \
+  } while (false)
+
+// Test with some trailing bits in partial last word:
+#define NUM_BITS                                                                                   \
+  vtkm::Id { 7681 }
+
+using vtkm::cont::BitField;
+
+namespace vtkm
+{
+namespace cont
+{
+namespace testing
+{
+
+/// This class has a single static member, Run, that runs all tests with the
+/// given DeviceAdapter.
+template <class DeviceAdapterTag>
+struct TestingBitField
+{
+  using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
+  using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<DeviceAdapterTag>;
+  using Traits = vtkm::cont::detail::BitFieldTraits;
+  using WordTypes = typename AtomicInterface::WordTypes;
+  using WordTypesControl = vtkm::cont::internal::AtomicInterfaceControl::WordTypes;
+
+  VTKM_EXEC_CONT
+  static bool RandomBitFromIndex(vtkm::Id idx) noexcept
+  {
+    // Some random operations that will give a pseudorandom stream of bits:
+    auto m = idx + (idx * 2) - (idx / 3) + (idx * 5 / 7) - (idx * 11 / 13);
+    return (m % 2) == 1;
+  }
+
+  template <typename WordType>
+  VTKM_EXEC_CONT static WordType RandomWordFromIndex(vtkm::Id idx) noexcept
+  {
+    vtkm::UInt64 m = static_cast<vtkm::UInt64>(idx * (NUM_BITS - 1) + (idx + 1) * NUM_BITS);
+    m ^= m << 3;
+    m ^= m << 7;
+    m ^= m << 15;
+    m ^= m << 31;
+    m = (m << 32) | (m >> 32);
+
+    const size_t mBits = 64;
+    const size_t wordBits = sizeof(WordType) * CHAR_BIT;
+
+    const WordType highWord = static_cast<WordType>(m >> (mBits - wordBits));
+    return highWord;
+  }
+
+  VTKM_CONT
+  static BitField RandomBitField(vtkm::Id numBits = NUM_BITS)
+  {
+    BitField field;
+    field.Allocate(numBits);
+    auto portal = field.GetPortalControl();
+    for (vtkm::Id i = 0; i < numBits; ++i)
+    {
+      portal.SetBit(i, RandomBitFromIndex(i));
+    }
+
+    return field;
+  }
+
+  VTKM_CONT
+  static void TestBlockAllocation()
+  {
+    BitField field;
+    field.Allocate(NUM_BITS);
+
+    // NumBits should be rounded up to the nearest block of bytes, as defined in
+    // the traits:
+    const vtkm::Id bytesInFieldData =
+      field.GetData().GetNumberOfValues() * static_cast<vtkm::Id>(sizeof(vtkm::WordTypeDefault));
+
+    const vtkm::Id blockSize = vtkm::cont::detail::BitFieldTraits::BlockSize;
+    const vtkm::Id numBytes = (NUM_BITS + CHAR_BIT - 1) / CHAR_BIT;
+    const vtkm::Id numBlocks = (numBytes + blockSize - 1) / blockSize;
+    const vtkm::Id expectedBytes = numBlocks * blockSize;
+
+    VTKM_TEST_ASSERT(bytesInFieldData == expectedBytes,
+                     "The BitField allocation does not round up to the nearest "
+                     "block. This can cause access-by-word to read/write invalid "
+                     "memory.");
+  }
+
+  template <typename PortalType, typename PortalConstType>
+  VTKM_EXEC_CONT static bool HelpTestBit(vtkm::Id i, PortalType portal, PortalConstType portalConst)
+  {
+    const auto origBit = RandomBitFromIndex(i);
+    auto bit = origBit;
+
+    const auto mod = RandomBitFromIndex(i + NUM_BITS);
+
+    auto testValues = [&](const char* op) -> bool {
+      auto expected = bit;
+      auto result = portal.GetBitAtomic(i);
+      auto resultConst = portalConst.GetBitAtomic(i);
+      DEVICE_ASSERT_MSG(result == expected, op);
+      DEVICE_ASSERT_MSG(resultConst == expected, op);
+
+      // Reset:
+      bit = origBit;
+      portal.SetBitAtomic(i, bit);
+      return true;
+    };
+
+    portal.SetBit(i, bit);
+    DEVICE_ASSERT(testValues("SetBit"));
+
+    bit = mod;
+    portal.SetBitAtomic(i, mod);
+    DEVICE_ASSERT(testValues("SetBitAtomic"));
+
+    bit = !bit;
+    portal.NotBitAtomic(i);
+    DEVICE_ASSERT(testValues("NotBitAtomic"));
+
+    bit = bit && mod;
+    portal.AndBitAtomic(i, mod);
+    DEVICE_ASSERT(testValues("AndBitAtomic"));
+
+    bit = bit || mod;
+    portal.OrBitAtomic(i, mod);
+    DEVICE_ASSERT(testValues("OrBitAtomic"));
+
+    bit = bit != mod;
+    portal.XorBitAtomic(i, mod);
+    DEVICE_ASSERT(testValues("XorBitAtomic"));
+
+    const auto notBit = !bit;
+    bool casResult = portal.CompareAndSwapBitAtomic(i, bit, notBit);
+    DEVICE_ASSERT(casResult == bit);
+    DEVICE_ASSERT(portal.GetBit(i) == bit);
+    DEVICE_ASSERT(portalConst.GetBit(i) == bit);
+    casResult = portal.CompareAndSwapBitAtomic(i, notBit, bit);
+    DEVICE_ASSERT(casResult == bit);
+    DEVICE_ASSERT(portal.GetBit(i) == notBit);
+    DEVICE_ASSERT(portalConst.GetBit(i) == notBit);
+
+    return true;
+  }
+
+  template <typename WordType, typename PortalType, typename PortalConstType>
+  VTKM_EXEC_CONT static bool HelpTestWord(vtkm::Id i,
+                                          PortalType portal,
+                                          PortalConstType portalConst)
+  {
+    const auto origWord = RandomWordFromIndex<WordType>(i);
+    auto word = origWord;
+
+    const auto mod = RandomWordFromIndex<WordType>(i + NUM_BITS);
+
+    auto testValues = [&](const char* op) -> bool {
+      auto expected = word;
+      auto result = portal.template GetWordAtomic<WordType>(i);
+      auto resultConst = portalConst.template GetWordAtomic<WordType>(i);
+      DEVICE_ASSERT_MSG(result == expected, op);
+      DEVICE_ASSERT_MSG(resultConst == expected, op);
+
+      // Reset:
+      word = origWord;
+      portal.SetWordAtomic(i, word);
+
+      return true;
+    };
+
+    portal.SetWord(i, word);
+    DEVICE_ASSERT(testValues("SetWord"));
+
+    word = mod;
+    portal.SetWordAtomic(i, mod);
+    DEVICE_ASSERT(testValues("SetWordAtomic"));
+
+    // C++ promotes e.g. uint8 to int32 when performing bitwise not. Silence
+    // conversion warning and mask unimportant bits:
+    word = static_cast<WordType>(~word);
+    portal.template NotWordAtomic<WordType>(i);
+    DEVICE_ASSERT(testValues("NotWordAtomic"));
+
+    word = word & mod;
+    portal.AndWordAtomic(i, mod);
+    DEVICE_ASSERT(testValues("AndWordAtomic"));
+
+    word = word | mod;
+    portal.OrWordAtomic(i, mod);
+    DEVICE_ASSERT(testValues("OrWordAtomic"));
+
+    word = word ^ mod;
+    portal.XorWordAtomic(i, mod);
+    DEVICE_ASSERT(testValues("XorWordAtomic"));
+
+    const WordType notWord = static_cast<WordType>(~word);
+    auto casResult = portal.CompareAndSwapWordAtomic(i, word, notWord);
+    DEVICE_ASSERT(casResult == word);
+    DEVICE_ASSERT(portal.template GetWord<WordType>(i) == word);
+    DEVICE_ASSERT(portalConst.template GetWord<WordType>(i) == word);
+    casResult = portal.CompareAndSwapWordAtomic(i, notWord, word);
+    DEVICE_ASSERT(casResult == word);
+    DEVICE_ASSERT(portal.template GetWord<WordType>(i) == notWord);
+    DEVICE_ASSERT(portalConst.template GetWord<WordType>(i) == notWord);
+
+    return true;
+  }
+
+  template <typename PortalType, typename PortalConstType>
+  struct HelpTestWordOpsControl
+  {
+    PortalType Portal;
+    PortalConstType PortalConst;
+
+    VTKM_CONT
+    HelpTestWordOpsControl(PortalType portal, PortalConstType portalConst)
+      : Portal(portal)
+      , PortalConst(portalConst)
+    {
+    }
+
+    template <typename WordType>
+    VTKM_CONT void operator()(WordType)
+    {
+      const auto numWords = this->Portal.template GetNumberOfWords<WordType>();
+      VTKM_TEST_ASSERT(numWords == this->PortalConst.template GetNumberOfWords<WordType>());
+      for (vtkm::Id i = 0; i < numWords; ++i)
+      {
+        VTKM_TEST_ASSERT(HelpTestWord<WordType>(i, this->Portal, this->PortalConst));
+      }
+    }
+  };
+
+  template <typename Portal, typename PortalConst>
+  VTKM_CONT static void HelpTestPortalsControl(Portal portal, PortalConst portalConst)
+  {
+    const auto numWords8 = (NUM_BITS + 7) / 8;
+    const auto numWords16 = (NUM_BITS + 15) / 16;
+    const auto numWords32 = (NUM_BITS + 31) / 32;
+    const auto numWords64 = (NUM_BITS + 63) / 64;
+
+    VTKM_TEST_ASSERT(portal.GetNumberOfBits() == NUM_BITS);
+    VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
+    VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
+    VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
+    VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
+    VTKM_TEST_ASSERT(portalConst.GetNumberOfBits() == NUM_BITS);
+    VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
+    VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
+    VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
+    VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
+
+    for (vtkm::Id i = 0; i < NUM_BITS; ++i)
+    {
+      HelpTestBit(i, portal, portalConst);
+    }
+
+    HelpTestWordOpsControl<Portal, PortalConst> test(portal, portalConst);
+    vtkm::ListForEach(test, typename Portal::AtomicInterface::WordTypes{});
+  }
+
+  VTKM_CONT
+  static void TestControlPortals()
+  {
+    auto field = RandomBitField();
+    auto portal = field.GetPortalControl();
+    auto portalConst = field.GetPortalConstControl();
+
+    HelpTestPortalsControl(portal, portalConst);
+  }
+
+  template <typename Portal>
+  VTKM_EXEC_CONT static bool HelpTestPortalSanityExecution(Portal portal)
+  {
+    const auto numWords8 = (NUM_BITS + 7) / 8;
+    const auto numWords16 = (NUM_BITS + 15) / 16;
+    const auto numWords32 = (NUM_BITS + 31) / 32;
+    const auto numWords64 = (NUM_BITS + 63) / 64;
+
+    DEVICE_ASSERT(portal.GetNumberOfBits() == NUM_BITS);
+    DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
+    DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
+    DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
+    DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
+
+    return true;
+  }
+
+  template <typename WordType, typename PortalType, typename PortalConstType>
+  struct HelpTestPortalsExecutionWordsFunctor : vtkm::exec::FunctorBase
+  {
+    PortalType Portal;
+    PortalConstType PortalConst;
+
+    HelpTestPortalsExecutionWordsFunctor(PortalType portal, PortalConstType portalConst)
+      : Portal(portal)
+      , PortalConst(portalConst)
+    {
+    }
+
+    VTKM_EXEC_CONT
+    void operator()(vtkm::Id i) const
+    {
+      if (i == 0)
+      {
+        if (!HelpTestPortalSanityExecution(this->Portal))
+        {
+          this->RaiseError("Testing Portal sanity failed.");
+          return;
+        }
+        if (!HelpTestPortalSanityExecution(this->PortalConst))
+        {
+          this->RaiseError("Testing PortalConst sanity failed.");
+          return;
+        }
+      }
+
+      if (!HelpTestWord<WordType>(i, this->Portal, this->PortalConst))
+      {
+        this->RaiseError("Testing word operations failed.");
+        return;
+      }
+    }
+  };
+
+  template <typename PortalType, typename PortalConstType>
+  struct HelpTestPortalsExecutionBitsFunctor : vtkm::exec::FunctorBase
+  {
+    PortalType Portal;
+    PortalConstType PortalConst;
+
+    HelpTestPortalsExecutionBitsFunctor(PortalType portal, PortalConstType portalConst)
+      : Portal(portal)
+      , PortalConst(portalConst)
+    {
+    }
+
+    VTKM_EXEC_CONT
+    void operator()(vtkm::Id i) const
+    {
+      if (!HelpTestBit(i, this->Portal, this->PortalConst))
+      {
+        this->RaiseError("Testing bit operations failed.");
+        return;
+      }
+    }
+  };
+
+  template <typename PortalType, typename PortalConstType>
+  struct HelpTestWordOpsExecution
+  {
+    PortalType Portal;
+    PortalConstType PortalConst;
+
+    VTKM_CONT
+    HelpTestWordOpsExecution(PortalType portal, PortalConstType portalConst)
+      : Portal(portal)
+      , PortalConst(portalConst)
+    {
+    }
+
+    template <typename WordType>
+    VTKM_CONT void operator()(WordType)
+    {
+      const auto numWords = this->Portal.template GetNumberOfWords<WordType>();
+      VTKM_TEST_ASSERT(numWords == this->PortalConst.template GetNumberOfWords<WordType>());
+
+      using WordFunctor =
+        HelpTestPortalsExecutionWordsFunctor<WordType, PortalType, PortalConstType>;
+      WordFunctor test{ this->Portal, this->PortalConst };
+      Algo::Schedule(test, numWords);
+    }
+  };
+
+  template <typename Portal, typename PortalConst>
+  VTKM_CONT static void HelpTestPortalsExecution(Portal portal, PortalConst portalConst)
+  {
+    HelpTestPortalsExecutionBitsFunctor<Portal, PortalConst> bitTest{ portal, portalConst };
+    Algo::Schedule(bitTest, portal.GetNumberOfBits());
+
+
+    HelpTestWordOpsExecution<Portal, PortalConst> test(portal, portalConst);
+    vtkm::ListForEach(test, typename Portal::AtomicInterface::WordTypes{});
+  }
+
+  VTKM_CONT
+  static void TestExecutionPortals()
+  {
+    auto field = RandomBitField();
+    auto portal = field.PrepareForInPlace(DeviceAdapterTag{});
+    auto portalConst = field.PrepareForInput(DeviceAdapterTag{});
+
+    HelpTestPortalsExecution(portal, portalConst);
+  }
+
+  VTKM_CONT
+  static void TestFinalWordMask()
+  {
+    auto testMask32 = [](vtkm::Id numBits, vtkm::UInt32 expectedMask) {
+      vtkm::cont::BitField field;
+      field.Allocate(numBits);
+      auto mask = field.GetPortalConstControl().GetFinalWordMask<vtkm::UInt32>();
+
+      VTKM_TEST_ASSERT(expectedMask == mask,
+                       "Unexpected mask for BitField size ",
+                       numBits,
+                       ": Expected 0x",
+                       std::hex,
+                       expectedMask,
+                       " got 0x",
+                       mask);
+    };
+
+    auto testMask64 = [](vtkm::Id numBits, vtkm::UInt64 expectedMask) {
+      vtkm::cont::BitField field;
+      field.Allocate(numBits);
+      auto mask = field.GetPortalConstControl().GetFinalWordMask<vtkm::UInt64>();
+
+      VTKM_TEST_ASSERT(expectedMask == mask,
+                       "Unexpected mask for BitField size ",
+                       numBits,
+                       ": Expected 0x",
+                       std::hex,
+                       expectedMask,
+                       " got 0x",
+                       mask);
+    };
+
+    testMask32(0, 0x00000000);
+    testMask32(1, 0x00000001);
+    testMask32(2, 0x00000003);
+    testMask32(3, 0x00000007);
+    testMask32(4, 0x0000000f);
+    testMask32(5, 0x0000001f);
+    testMask32(8, 0x000000ff);
+    testMask32(16, 0x0000ffff);
+    testMask32(24, 0x00ffffff);
+    testMask32(25, 0x01ffffff);
+    testMask32(31, 0x7fffffff);
+    testMask32(32, 0xffffffff);
+    testMask32(64, 0xffffffff);
+    testMask32(128, 0xffffffff);
+    testMask32(129, 0x00000001);
+
+    testMask64(0, 0x0000000000000000);
+    testMask64(1, 0x0000000000000001);
+    testMask64(2, 0x0000000000000003);
+    testMask64(3, 0x0000000000000007);
+    testMask64(4, 0x000000000000000f);
+    testMask64(5, 0x000000000000001f);
+    testMask64(8, 0x00000000000000ff);
+    testMask64(16, 0x000000000000ffff);
+    testMask64(24, 0x0000000000ffffff);
+    testMask64(25, 0x0000000001ffffff);
+    testMask64(31, 0x000000007fffffff);
+    testMask64(32, 0x00000000ffffffff);
+    testMask64(40, 0x000000ffffffffff);
+    testMask64(48, 0x0000ffffffffffff);
+    testMask64(56, 0x00ffffffffffffff);
+    testMask64(64, 0xffffffffffffffff);
+    testMask64(128, 0xffffffffffffffff);
+    testMask64(129, 0x0000000000000001);
+  }
+
+  struct TestRunner
+  {
+    VTKM_CONT
+    void operator()() const
+    {
+      TestingBitField::TestBlockAllocation();
+      TestingBitField::TestControlPortals();
+      TestingBitField::TestExecutionPortals();
+      TestingBitField::TestFinalWordMask();
+    }
+  };
+
+public:
+  static VTKM_CONT int Run(int argc, char* argv[])
+  {
+    vtkm::cont::GetGlobalRuntimeDeviceTracker().ForceDevice(DeviceAdapterTag());
+    return vtkm::cont::testing::Testing::Run(TestRunner{}, argc, argv);
+  }
+};
+}
+}
+} // namespace vtkm::cont::testing
+
+#endif //vtk_m_cont_testing_TestingArrayHandles_h
--- a/vtkm/cont/testing/TestingDeviceAdapter.h
+++ b/vtkm/cont/testing/TestingDeviceAdapter.h
@ -2402,6 +2402,100 @@ private:
    }
  }

+  static VTKM_CONT void TestBitFieldToUnorderedSet()
+  {
+    using IndexArray = vtkm::cont::ArrayHandle<vtkm::Id>;
+    using WordType = WordTypeDefault;
+
+    // Test that everything works correctly with a partial word at the end.
+    static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
+    // +5 to get a partial word:
+    static constexpr vtkm::Id NumBits = 1024 * BitsPerWord + 5;
+    static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
+
+    auto testIndexArray = [](const BitField& bits) {
+      const vtkm::Id numBits = bits.GetNumberOfBits();
+      IndexArray indices;
+      Algorithm::BitFieldToUnorderedSet(bits, indices);
+      Algorithm::Sort(indices);
+
+      auto bitPortal = bits.GetPortalConstControl();
+      auto indexPortal = indices.GetPortalConstControl();
+
+      const vtkm::Id numIndices = indices.GetNumberOfValues();
+      vtkm::Id curIndex = 0;
+      for (vtkm::Id curBit = 0; curBit < numBits; ++curBit)
+      {
+        const bool markedSet = curIndex < numIndices ? indexPortal.Get(curIndex) == curBit : false;
+        const bool isSet = bitPortal.GetBit(curBit);
+
+        //        std::cout << "curBit: " << curBit
+        //                  << " activeIndex: "
+        //                  << (curIndex < numIndices ? indexPortal.Get(curIndex) : -1)
+        //                  << " isSet: " << isSet << " markedSet: " << markedSet << "\n";
+
+        VTKM_TEST_ASSERT(
+          markedSet == isSet, "Bit ", curBit, " is set? ", isSet, " Marked set? ", markedSet);
+
+        if (markedSet)
+        {
+          curIndex++;
+        }
+      }
+
+      VTKM_TEST_ASSERT(curIndex == indices.GetNumberOfValues(), "Index array has extra values.");
+    };
+
+    auto testRepeatedMask = [&](WordType mask) {
+      std::cout << "Testing BitFieldToUnorderedSet with repeated 32-bit word 0x" << std::hex << mask
+                << std::endl;
+
+      BitField bits;
+      {
+        bits.Allocate(NumBits);
+        auto fillPortal = bits.GetPortalControl();
+        for (vtkm::Id i = 0; i < NumWords; ++i)
+        {
+          fillPortal.SetWord(i, mask);
+        }
+      }
+
+      testIndexArray(bits);
+    };
+
+    auto testRandomMask = [&](WordType seed) {
+      std::cout << "Testing BitFieldToUnorderedSet with random sequence seeded with 0x" << std::hex
+                << seed << std::endl;
+
+      std::mt19937 mt{ seed };
+      std::uniform_int_distribution<std::mt19937::result_type> rng;
+
+      BitField bits;
+      {
+        bits.Allocate(NumBits);
+        auto fillPortal = bits.GetPortalControl();
+        for (vtkm::Id i = 0; i < NumWords; ++i)
+        {
+          fillPortal.SetWord(i, static_cast<WordType>(rng(mt)));
+        }
+      }
+
+      testIndexArray(bits);
+    };
+
+    testRepeatedMask(0x00000000);
+    testRepeatedMask(0xeeeeeeee);
+    testRepeatedMask(0xffffffff);
+    testRepeatedMask(0x1c0fd395);
+    testRepeatedMask(0xdeadbeef);
+
+    testRandomMask(0x00000000);
+    testRandomMask(0xeeeeeeee);
+    testRandomMask(0xffffffff);
+    testRandomMask(0x1c0fd395);
+    testRandomMask(0xdeadbeef);
+  }
+
  struct TestAll
  {
    VTKM_CONT void operator()() const
@ -2455,6 +2549,8 @@ private:
      TestCopyArraysInDiffTypes();

      TestAtomicArray();
+
+      TestBitFieldToUnorderedSet();
    }
  };

--- a/vtkm/cont/testing/UnitTestDeviceAdapterAlgorithmGeneral.cxx
+++ b/vtkm/cont/testing/UnitTestDeviceAdapterAlgorithmGeneral.cxx
@ -30,6 +30,8 @@

 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/RuntimeDeviceTracker.h>
+#include <vtkm/cont/internal/AtomicInterfaceControl.h>
+#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
 #include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
 #include <vtkm/cont/internal/VirtualObjectTransferShareWithControl.h>
 #include <vtkm/cont/serial/DeviceAdapterSerial.h>
@ -101,6 +103,11 @@ public:
  }
 };

+template <>
+class AtomicInterfaceExecution<DeviceAdapterTagTestAlgorithmGeneral> : public AtomicInterfaceControl
+{
+};
+
 template <typename TargetClass>
 struct VirtualObjectTransfer<TargetClass, vtkm::cont::DeviceAdapterTagTestAlgorithmGeneral> final
  : public VirtualObjectTransferShareWithControl<TargetClass>