Deprecated passing of start value to find_in_batches and find_each in favour of begin_at value.

This commit is contained in:
Vipul A M 2015-02-17 03:46:49 +05:30
parent 05b6e5d8b7
commit 6598272770
4 changed files with 73 additions and 33 deletions

@ -1,3 +1,8 @@
* Deprecated passing of `start` value to `find_in_batches` and `find_each`
in favour of `begin_at` value.
*Vipul A M*
* Add `foreign_key_exists?` method.
*Tõnis Simo*

@ -27,15 +27,15 @@ module Batches
#
# ==== Options
# * <tt>:batch_size</tt> - Specifies the size of the batch. Default to 1000.
# * <tt>:start</tt> - Specifies the primary key value to start from, inclusive of the value.
# * <tt>:begin_at</tt> - Specifies the primary key value to start from, inclusive of the value.
# * <tt>:end_at</tt> - Specifies the primary key value to end at, inclusive of the value.
# This is especially useful if you want multiple workers dealing with
# the same processing queue. You can make worker 1 handle all the records
# between id 0 and 10,000 and worker 2 handle from 10,000 and beyond
# (by setting the +:start+ and +:end_at+ option on each worker).
# (by setting the +:begin_at+ and +:end_at+ option on each worker).
#
# # Let's process for a batch of 2000 records, skipping the first 2000 rows
# Person.find_each(start: 2000, batch_size: 2000) do |person|
# Person.find_each(begin_at: 2000, batch_size: 2000) do |person|
# person.party_all_night!
# end
#
@ -46,15 +46,22 @@ module Batches
#
# NOTE: You can't set the limit either, that's used to control
# the batch sizes.
def find_each(start: nil, end_at: nil, batch_size: 1000)
def find_each(begin_at: nil, end_at: nil, batch_size: 1000, start: nil)
if start
begin_at = start
ActiveSupport::Deprecation.warn(<<-MSG.squish)
Passing `start` value to find_in_batches is deprecated, and will be removed in Rails 5.1.
Please pass `begin_at` instead.
MSG
end
if block_given?
find_in_batches(start: start, end_at: end_at, batch_size: batch_size) do |records|
find_in_batches(begin_at: begin_at, end_at: end_at, batch_size: batch_size) do |records|
records.each { |record| yield record }
end
else
enum_for(:find_each, start: start, end_at: end_at, batch_size: batch_size) do
enum_for(:find_each, begin_at: begin_at, end_at: end_at, batch_size: batch_size) do
relation = self
apply_limits(relation, start, end_at).size
apply_limits(relation, begin_at, end_at).size
end
end
end
@ -79,15 +86,15 @@ def find_each(start: nil, end_at: nil, batch_size: 1000)
#
# ==== Options
# * <tt>:batch_size</tt> - Specifies the size of the batch. Default to 1000.
# * <tt>:start</tt> - Specifies the primary key value to start from, inclusive of the value.
# * <tt>:begin_at</tt> - Specifies the primary key value to start from, inclusive of the value.
# * <tt>:end_at</tt> - Specifies the primary key value to end at, inclusive of the value.
# This is especially useful if you want multiple workers dealing with
# the same processing queue. You can make worker 1 handle all the records
# between id 0 and 10,000 and worker 2 handle from 10,000 and beyond
# (by setting the +:start+ and +:end_at+ option on each worker).
# (by setting the +:begin_at+ and +:end_at+ option on each worker).
#
# # Let's process the next 2000 records
# Person.find_in_batches(start: 2000, batch_size: 2000) do |group|
# Person.find_in_batches(begin_at: 2000, batch_size: 2000) do |group|
# group.each { |person| person.party_all_night! }
# end
#
@ -98,12 +105,19 @@ def find_each(start: nil, end_at: nil, batch_size: 1000)
#
# NOTE: You can't set the limit either, that's used to control
# the batch sizes.
def find_in_batches(start: nil, end_at: nil, batch_size: 1000)
relation = self
def find_in_batches(begin_at: nil, end_at: nil, batch_size: 1000, start: nil)
if start
begin_at = start
ActiveSupport::Deprecation.warn(<<-MSG.squish)
Passing `start` value to find_in_batches is deprecated, and will be removed in Rails 5.1.
Please pass `begin_at` instead.
MSG
end
relation = self
unless block_given?
return to_enum(:find_in_batches, start: start, end_at: end_at, batch_size: batch_size) do
total = apply_limits(relation, start, end_at).size
return to_enum(:find_in_batches, begin_at: begin_at, end_at: end_at, batch_size: batch_size) do
total = apply_limits(relation, begin_at, end_at).size
(total - 1).div(batch_size) + 1
end
end
@ -113,7 +127,7 @@ def find_in_batches(start: nil, end_at: nil, batch_size: 1000)
end
relation = relation.reorder(batch_order).limit(batch_size)
relation = apply_limits(relation, start, end_at)
relation = apply_limits(relation, begin_at, end_at)
records = relation.to_a
while records.any?
@ -131,8 +145,8 @@ def find_in_batches(start: nil, end_at: nil, batch_size: 1000)
private
def apply_limits(relation, start, end_at)
relation = relation.where(table[primary_key].gteq(start)) if start
def apply_limits(relation, begin_at, end_at)
relation = relation.where(table[primary_key].gteq(begin_at)) if begin_at
relation = relation.where(table[primary_key].lteq(end_at)) if end_at
relation
end

@ -37,9 +37,9 @@ def test_each_should_return_an_enumerator_if_no_block_is_present
if Enumerator.method_defined? :size
def test_each_should_return_a_sized_enumerator
assert_equal 11, Post.find_each(:batch_size => 1).size
assert_equal 5, Post.find_each(:batch_size => 2, :start => 7).size
assert_equal 11, Post.find_each(:batch_size => 10_000).size
assert_equal 11, Post.find_each(batch_size: 1).size
assert_equal 5, Post.find_each(batch_size: 2, begin_at: 7).size
assert_equal 11, Post.find_each(batch_size: 10_000).size
end
end
@ -99,7 +99,7 @@ def test_find_in_batches_should_return_batches
def test_find_in_batches_should_start_from_the_start_option
assert_queries(@total) do
Post.find_in_batches(:batch_size => 1, :start => 2) do |batch|
Post.find_in_batches(batch_size: 1, begin_at: 2) do |batch|
assert_kind_of Array, batch
assert_kind_of Post, batch.first
end
@ -172,7 +172,7 @@ def test_find_in_batches_should_not_ignore_the_default_scope_if_it_is_other_then
def test_find_in_batches_should_not_modify_passed_options
assert_nothing_raised do
Post.find_in_batches({ batch_size: 42, start: 1 }.freeze){}
Post.find_in_batches({ batch_size: 42, begin_at: 1 }.freeze){}
end
end
@ -181,7 +181,7 @@ def test_find_in_batches_should_use_any_column_as_primary_key
start_nick = nick_order_subscribers.second.nick
subscribers = []
Subscriber.find_in_batches(:batch_size => 1, :start => start_nick) do |batch|
Subscriber.find_in_batches(batch_size: 1, begin_at: start_nick) do |batch|
subscribers.concat(batch)
end
@ -209,11 +209,32 @@ def test_find_in_batches_should_return_an_enumerator
end
end
def test_find_in_batches_start_deprecated
assert_deprecated do
assert_queries(@total) do
Post.find_in_batches(batch_size: 1, start: 2) do |batch|
assert_kind_of Array, batch
assert_kind_of Post, batch.first
end
end
end
end
def test_find_each_start_deprecated
assert_deprecated do
assert_queries(@total) do
Post.find_each(batch_size: 1, start: 2) do |post|
assert_kind_of Post, post
end
end
end
end
if Enumerator.method_defined? :size
def test_find_in_batches_should_return_a_sized_enumerator
assert_equal 11, Post.find_in_batches(:batch_size => 1).size
assert_equal 6, Post.find_in_batches(:batch_size => 2).size
assert_equal 4, Post.find_in_batches(:batch_size => 2, :start => 4).size
assert_equal 4, Post.find_in_batches(batch_size: 2, begin_at: 4).size
assert_equal 4, Post.find_in_batches(:batch_size => 3).size
assert_equal 1, Post.find_in_batches(:batch_size => 10_000).size
end

@ -317,7 +317,7 @@ end
The `find_each` method accepts most of the options allowed by the regular `find` method, except for `:order` and `:limit`, which are reserved for internal use by `find_each`.
Two additional options, `:batch_size` and `:start`, are available as well.
Two additional options, `:batch_size` and `:begin_at`, are available as well.
**`:batch_size`**
@ -329,29 +329,29 @@ User.find_each(batch_size: 5000) do |user|
end
```
**`:start`**
**`:begin_at`**
By default, records are fetched in ascending order of the primary key, which must be an integer. The `:start` option allows you to configure the first ID of the sequence whenever the lowest ID is not the one you need. This would be useful, for example, if you wanted to resume an interrupted batch process, provided you saved the last processed ID as a checkpoint.
By default, records are fetched in ascending order of the primary key, which must be an integer. The `:begin_at` option allows you to configure the first ID of the sequence whenever the lowest ID is not the one you need. This would be useful, for example, if you wanted to resume an interrupted batch process, provided you saved the last processed ID as a checkpoint.
For example, to send newsletters only to users with the primary key starting from 2000, and to retrieve them in batches of 5000:
```ruby
User.find_each(start: 2000, batch_size: 5000) do |user|
User.find_each(begin_at: 2000, batch_size: 5000) do |user|
NewsMailer.weekly(user).deliver_now
end
```
Another example would be if you wanted multiple workers handling the same processing queue. You could have each worker handle 10000 records by setting the appropriate `:start` option on each worker.
Another example would be if you wanted multiple workers handling the same processing queue. You could have each worker handle 10000 records by setting the appropriate `:begin_at` option on each worker.
**`:end_at`**
Similar to the `:start` option, `:end_at` allows you to configure the last ID of the sequence whenever the highest ID is not the one you need.
This would be useful, for example, if you wanted to run a batch process, using a subset of records based on `:start` and `:end_at`
Similar to the `:begin_at` option, `:end_at` allows you to configure the last ID of the sequence whenever the highest ID is not the one you need.
This would be useful, for example, if you wanted to run a batch process, using a subset of records based on `:begin_at` and `:end_at`
For example, to send newsletters only to users with the primary key starting from 2000 upto 10000 and to retrieve them in batches of 1000:
```ruby
User.find_each(start: 2000, end_at: 10000, batch_size: 5000) do |user|
User.find_each(begin_at: 2000, end_at: 10000, batch_size: 5000) do |user|
NewsMailer.weekly(user).deliver_now
end
```
@ -369,7 +369,7 @@ end
##### Options for `find_in_batches`
The `find_in_batches` method accepts the same `:batch_size`, `:start` and `:end_at` options as `find_each`.
The `find_in_batches` method accepts the same `:batch_size`, `:begin_at` and `:end_at` options as `find_each`.
Conditions
----------