Optimize HashWithIndifferentAccess#to_hash

Creating a copy by starting from an empty hash and inserting
keys one by one is inneficient because unless the hash is
very small, Ruby will have to reallocate the hash multiple
times until it reaches the same saize as the original.

And every time it happens, keys will have to be re-hashes.

While we're at it, instead of using the generic `convert_value`
we define a dedicated method which saves on needless checks.

Co-Authored-By: Jean Boussier <jean.boussier@gmail.com>

```ruby

require "bundler/inline"

gemfile(true) do
  source "https://rubygems.org"

  gem "activesupport", path: '.'
  gem 'benchmark-ips'
  gem 'benchmark-memory'
end

require "active_support"
require "active_support/core_ext/hash/indifferent_access"
require 'benchmark/ips'

module ActiveSupport
  class KwstannardHWIA < HashWithIndifferentAccess
    def to_hash
      Hash[self]
        .transform_values! { |v| convert_value(v, conversion: :to_hash) }
        .tap { |h| set_defaults(h) }
    end
  end

  class ByrootHWIA < HashWithIndifferentAccess
    def to_hash
      copy = Hash[self]
      copy.transform_values! { |v| convert_value_to_hash(v) }
      set_defaults(copy)
      copy
    end

    private

    def convert_value_to_hash(value)
      if value.is_a? Hash
        value.to_hash
      elsif value.is_a?(Array)
        value.map { |e| convert_value_to_hash(e) }
      else
        value
      end
    end
  end
end

flat = Hash[(1..9).to_a.product([1])]
flat_baseline = ActiveSupport::HashWithIndifferentAccess.new(flat)
flat_k = ActiveSupport::KwstannardHWIA.new(flat)
flat_b = ActiveSupport::ByrootHWIA.new(flat)

Benchmark.ips do |x|
  x.report("original") { flat_baseline.to_hash }
  x.report("kwstannard") { flat_k.to_hash }
  x.report("byroot") { flat_b.to_hash }
  x.compare!(order: :baseline)
end

long_flat = Hash[(1..100).to_a.product([1])]
long_flat_baseline = ActiveSupport::HashWithIndifferentAccess.new(long_flat)
long_flat_k = ActiveSupport::KwstannardHWIA.new(long_flat)
long_flat_b = ActiveSupport::ByrootHWIA.new(long_flat)

Benchmark.ips do |x|
  x.report("original") { long_flat_baseline.to_hash }
  x.report("kwstannard") { long_flat_k.to_hash }
  x.report("byroot") { long_flat_b.to_hash }
  x.compare!(order: :baseline)
end

deep_baseline = ActiveSupport::HashWithIndifferentAccess.new(flat.dup)
3.times.reduce(deep_baseline) { |deep, _| deep[:deep] = ActiveSupport::HashWithIndifferentAccess.new(flat.dup) }

deep_k = ActiveSupport::KwstannardHWIA.new(flat.dup)
3.times.reduce(deep_k) { |deep, _| deep[:deep] = ActiveSupport::KwstannardHWIA.new(flat.dup) }

deep_b = ActiveSupport::ByrootHWIA.new(flat.dup)
3.times.reduce(deep_b) { |deep, _| deep[:deep] = ActiveSupport::ByrootHWIA.new(flat.dup) }

Benchmark.ips do |x|
  x.report("original") { deep_baseline.to_hash }
  x.report("kwstannard") { deep_k.to_hash }
  x.report("byroot") { deep_b.to_hash }
  x.compare!(order: :baseline)
end

```

```
$ ruby /tmp/to_hash.rb
Fetching gem metadata from https://rubygems.org/..........
Resolving dependencies...
ruby 3.3.1 (2024-04-23 revision c56cd86388) [arm64-darwin23]
Warming up --------------------------------------
            original    92.830k i/100ms
          kwstannard   125.443k i/100ms
              byroot   139.810k i/100ms
Calculating -------------------------------------
            original    968.200k (± 2.9%) i/s -      4.920M in   5.086600s
          kwstannard      1.268M (± 2.9%) i/s -      6.398M in   5.049545s
              byroot      1.397M (± 2.9%) i/s -      6.990M in   5.008404s

Comparison:
            original:   968200.3 i/s
              byroot:  1397069.4 i/s - 1.44x  faster
          kwstannard:  1268207.5 i/s - 1.31x  faster

ruby 3.3.1 (2024-04-23 revision c56cd86388) [arm64-darwin23]
Warming up --------------------------------------
            original    11.051k i/100ms
          kwstannard    15.865k i/100ms
              byroot    17.778k i/100ms
Calculating -------------------------------------
            original    109.059k (± 5.1%) i/s -    552.550k in   5.082820s
          kwstannard    157.499k (± 7.4%) i/s -    793.250k in   5.072103s
              byroot    177.066k (± 7.2%) i/s -    888.900k in   5.052601s

Comparison:
            original:   109059.1 i/s
              byroot:   177065.9 i/s - 1.62x  faster
          kwstannard:   157499.0 i/s - 1.44x  faster

ruby 3.3.1 (2024-04-23 revision c56cd86388) [arm64-darwin23]
Warming up --------------------------------------
            original    22.249k i/100ms
          kwstannard    30.132k i/100ms
              byroot    34.116k i/100ms
Calculating -------------------------------------
            original    224.127k (± 1.7%) i/s -      1.135M in   5.064253s
          kwstannard    295.912k (± 5.9%) i/s -      1.476M in   5.007324s
              byroot    343.225k (± 1.7%) i/s -      1.740M in   5.070715s

Comparison:
            original:   224127.0 i/s
              byroot:   343224.6 i/s - 1.53x  faster
          kwstannard:   295912.4 i/s - 1.32x  faster
```
This commit is contained in:
Kelly Wolf Stannard 2024-06-28 17:13:31 -04:00 committed by Jean Boussier
parent 6045f257fb
commit ed5d646127

@ -378,13 +378,10 @@ def compact
# Convert to a regular hash with string keys.
def to_hash
_new_hash = Hash.new
set_defaults(_new_hash)
each do |key, value|
_new_hash[key] = convert_value(value, conversion: :to_hash)
end
_new_hash
copy = Hash[self]
copy.transform_values! { |v| convert_value_to_hash(v) }
set_defaults(copy)
copy
end
def to_proc
@ -398,11 +395,7 @@ def convert_key(key)
def convert_value(value, conversion: nil)
if value.is_a? Hash
if conversion == :to_hash
value.to_hash
else
value.nested_under_indifferent_access
end
value.nested_under_indifferent_access
elsif value.is_a?(Array)
if conversion != :assignment || value.frozen?
value = value.dup
@ -413,6 +406,17 @@ def convert_value(value, conversion: nil)
end
end
def convert_value_to_hash(value)
if value.is_a? Hash
value.to_hash
elsif value.is_a?(Array)
value.map { |e| convert_value_to_hash(e) }
else
value
end
end
def set_defaults(target)
if default_proc
target.default_proc = default_proc.dup