Implement marshal_dump and marshal_load on ActiveRecord::Base

Fix: https://github.com/rails/rails/issues/47704
Superseed: https://github.com/rails/rails/pull/47722

While the instance variable ordering bug will be fixed in Ruby 3.2.2,
it's not great that we're depending on such brittle implementation detail.

Additionally, Marshalling Active Record instances is currently very inefficient,
the payload include lots of redundant data that shouldn't make it into the cache.

In this new format the serialized payload only contains basic Ruby core or stdlib objects,
reducing the risk of changes in the internal representation of Rails classes.
This commit is contained in:
Jean Boussier 2023-03-20 10:24:34 +00:00
parent 559c9ebcd2
commit dee93277e3
19 changed files with 167 additions and 2 deletions

@ -1,3 +1,9 @@
* Introduce a more stable and optimized Marshal serializer for Active Record models.
Can be enabled with `config.active_record.marshalling_format_version = 7.1`.
*Jean Boussier*
* Allow specifying where clauses with column-tuple syntax.
Querying through `#where` now accepts a new tuple-syntax which accepts, as

@ -52,6 +52,7 @@ module ActiveRecord
autoload :Integration
autoload :InternalMetadata
autoload :LogSubscriber
autoload :Marshalling
autoload :Migration
autoload :Migrator, "active_record/migration"
autoload :ModelSchema
@ -436,6 +437,14 @@ def self.suppress_multiple_database_warning=(value)
singleton_class.attr_accessor :yaml_column_permitted_classes
self.yaml_column_permitted_classes = [Symbol]
def self.marshalling_format_version
Marshalling.format_version
end
def self.marshalling_format_version=(value)
Marshalling.format_version = value
end
def self.eager_load!
super
ActiveRecord::Locking.eager_load!

@ -330,6 +330,7 @@ class Base
include SignedId
include Suppressor
include Normalization
include Marshalling::Methods
end
ActiveSupport.run_load_hooks(:active_record, Base)

@ -0,0 +1,56 @@
# frozen_string_literal: true
module ActiveRecord
module Marshalling
@format_version = 6.1
class << self
attr_reader :format_version
def format_version=(version)
case version
when 6.1
Methods.remove_method(:marshal_dump) if Methods.method_defined?(:marshal_dump)
when 7.1
Methods.alias_method(:marshal_dump, :_marshal_dump_7_1)
else
raise ArgumentError, "Unknown marshalling format: #{version.inspect}"
end
@format_version = version
end
end
module Methods
def _marshal_dump_7_1
payload = [attributes_for_database, new_record?]
cached_associations = self.class.reflect_on_all_associations.select do |reflection|
association_cached?(reflection.name)
end
unless cached_associations.empty?
payload << cached_associations.map do |reflection|
[reflection.name, association(reflection.name).target]
end
end
payload
end
def marshal_load(state)
attributes_from_database, new_record, associations = state
attributes = self.class.attributes_builder.build_from_database(attributes_from_database)
init_with_attributes(attributes, new_record)
if associations
associations.each do |name, target|
association(name).target = target
rescue ActiveRecord::AssociationNotFoundError
# the association no longer exist, we can just skip it.
end
end
end
end
end
end

@ -7,6 +7,14 @@
class MarshalSerializationTest < ActiveRecord::TestCase
fixtures :topics
setup do
@previous_format_version = ActiveRecord::Marshalling.format_version
end
teardown do
ActiveRecord::Marshalling.format_version = @previous_format_version
end
def test_deserializing_rails_6_1_marshal_basic
topic = Marshal.load(marshal_fixture("rails_6_1_topic"))
@ -23,6 +31,66 @@ def test_deserializing_rails_6_1_marshal_with_loaded_association_cache
assert_equal 1, topic.id
assert_equal "The First Topic", topic.title
assert_equal "Have a nice day", topic.content
assert_predicate topic.association(:replies), :loaded?
assert_predicate topic.replies.first.association(:topic), :loaded?
assert_same topic, topic.replies.first.topic
end
def test_deserializing_rails_7_1_marshal_basic
topic = Marshal.load(marshal_fixture("rails_7_1_topic"))
assert_not_predicate topic, :new_record?
assert_equal 1, topic.id
assert_equal "The First Topic", topic.title
assert_equal "Have a nice day", topic.content
end
def test_deserializing_rails_7_1_marshal_with_loaded_association_cache
topic = Marshal.load(marshal_fixture("rails_7_1_topic_associations"))
assert_not_predicate topic, :new_record?
assert_equal 1, topic.id
assert_equal "The First Topic", topic.title
assert_equal "Have a nice day", topic.content
assert_predicate topic.association(:replies), :loaded?
assert_predicate topic.replies.first.association(:topic), :loaded?
assert_same topic, topic.replies.first.topic
end
def test_rails_6_1_rountrip
topic = Topic.find(1)
topic.replies.to_a
topic = Marshal.load(Marshal.dump(topic))
assert_not_predicate topic, :new_record?
assert_equal 1, topic.id
assert_equal "The First Topic", topic.title
assert_equal "Have a nice day", topic.content
assert_predicate topic.association(:replies), :loaded?
end
def test_rails_7_1_rountrip
ActiveRecord::Marshalling.format_version = 7.1
topic = Topic.find(1)
topic.replies.each(&:topic)
assert_not_equal 0, topic.replies.size
topic.replies.each do |reply|
assert_same topic, reply.topic
end
topic = Marshal.load(Marshal.dump(topic))
assert_not_predicate topic, :new_record?
assert_equal 1, topic.id
assert_equal "The First Topic", topic.title
assert_equal "Have a nice day", topic.content
assert_predicate topic.association(:replies), :loaded?
assert_not_equal 0, topic.replies.size
topic.replies.each do |reply|
assert_same topic, reply.topic
end
end
private

@ -3,7 +3,7 @@
require "models/topic"
class Reply < Topic
belongs_to :topic, foreign_key: "parent_id", counter_cache: true
belongs_to :topic, foreign_key: "parent_id", counter_cache: true, inverse_of: :replies
belongs_to :topic_with_primary_key, class_name: "Topic", primary_key: "title", foreign_key: "parent_title", counter_cache: "replies_count", touch: true
has_many :replies, class_name: "SillyReply", dependent: :destroy, foreign_key: "parent_id"
has_many :silly_unique_replies, dependent: :destroy, foreign_key: "parent_id"

@ -46,7 +46,7 @@ def two
end
end
has_many :replies, dependent: :destroy, foreign_key: "parent_id", autosave: true
has_many :replies, dependent: :destroy, foreign_key: "parent_id", autosave: true, inverse_of: :topic
has_many :approved_replies, -> { approved }, class_name: "Reply", foreign_key: "parent_id", counter_cache: "replies_count"
has_many :open_replies, -> { open }, class_name: "Reply", foreign_key: "parent_id"

@ -68,6 +68,7 @@ Below are the default values associated with each target version. In cases of co
- [`config.active_record.belongs_to_required_validates_foreign_key`](#config-active-record-belongs-to-required-validates-foreign-key): `false`
- [`config.active_record.default_column_serializer`](#config-active-record-default-column-serializer): `nil`
- [`config.active_record.encryption.hash_digest_class`](#config-active-record-encryption-hash-digest-class): `OpenSSL::Digest::SHA256`
- [`config.active_record.marshalling_format_version`](#config-active-record-marshalling-format-version): `7.1`
- [`config.active_record.query_log_tags_format`](#config-active-record-query-log-tags-format): `:sqlcommenter`
- [`config.active_record.raise_on_assign_to_attr_readonly`](#config-active-record-raise-on-assign-to-attr-readonly): `true`
- [`config.active_record.run_commit_callbacks_on_first_saved_instances_in_transaction`](#config-active-record-run-commit-callbacks-on-first-saved-instances-in-transaction): `false`
@ -1130,6 +1131,20 @@ to get the parent every time the child record was updated, even when parent has
| (original) | `true` |
| 7.1 | `false` |
#### `config.active_record.marshalling_format`
When set to `7.1`, enables a more efficient serialization of Active Record instance with `Marshal.dump`.
This changes the serialization format, so models serialized this
way cannot be read by older (< 7.1) versions of Rails. However, messages that
use the old format can still be read, regardless of whether this optimization is
enabled.
| Starting with version | The default value is |
| --------------------- | -------------------- |
| (original) | `6.1` |
| 7.1 | `7.1` |
#### `config.active_record.action_on_strict_loading_violation`
Enables raising or logging an exception if strict_loading is set on an

@ -283,6 +283,7 @@ def load_defaults(target_version)
active_record.before_committed_on_all_records = true
active_record.default_column_serializer = nil
active_record.encryption.hash_digest_class = OpenSSL::Digest::SHA256
active_record.marshalling_format_version = 7.1
end
if respond_to?(:action_dispatch)

@ -152,3 +152,12 @@
# recommended to explicitly define the serialization method for each column
# rather than to rely on a global default.
# Rails.application.config.active_record.default_column_serializer = nil
# Enable a performance optimization that serializes Active Record models
# in a faster and more compact way.
#
# To perform a rolling deploy of a Rails 7.1 upgrade, wherein servers that have
# not yet been upgraded must be able to read caches from upgraded servers,
# leave this optimization off on the first deploy, then enable it on a
# subsequent deploy.
# Rails.application.config.active_record.marshalling_format_version = 7.1