Add support for []= on ActiveSupport::Multibyte::Chars. Closes #9142. [ewan, manfred]

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@7257 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
2007-07-31 04:59:10 +00:00 · 2007-07-31 04:59:10 +00:00 · be196f3f7e
commit be196f3f7e
parent ea07212d97
3 changed files with 74 additions and 0 deletions
--- a/activesupport/CHANGELOG
+++ b/activesupport/CHANGELOG
@ -1,5 +1,7 @@
 *SVN*

+* Add support for []= on ActiveSupport::Multibyte::Chars. Closes #9142. [ewan, manfred]
+
 * Added Array#extract_options! to encapsulate the pattern of getting an options hash out of a variable number of parameters #8759 [norbert].

 * Let alias_attribute work with attributes with initial capital letters (legacy columns etc).  Closes #8596 [mpalmer]
--- a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
+++ b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
@ -140,6 +140,44 @@ def index(str, *args)
        bidx ? (u_unpack(str.slice(0...bidx)).size) : nil
      end
      
+      # Works just like the indexed replace method on string, except instead of byte offsets you specify
+      # character offsets.
+      #
+      # Example:
+      #
+      #   s = "Müller"
+      #   s.chars[2] = "e" # Replace character with offset 2
+      #   s
+      #   #=> "Müeler"
+      #
+      #   s = "Müller"
+      #   s.chars[1, 2] = "ö" # Replace 2 characters at character offset 1
+      #   s
+      #   #=> "Möler"
+      def []=(str, *args)
+        replace_by = args.pop
+        # Indexed replace with regular expressions already works
+        return str[*args] = replace_by if args.first.is_a?(Regexp)
+        result = u_unpack(str)
+        if args[0].is_a?(Fixnum)
+          raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
+          min = args[0]
+          max = args[1].nil? ? min : (min + args[1] - 1)
+          range = Range.new(min, max)
+          replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
+        elsif args.first.is_a?(Range)
+          raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
+          range = args[0]
+        else
+          needle = args[0].to_s
+          min = index(str, needle)
+          max = min + length(needle) - 1
+          range = Range.new(min, max)
+        end
+        result[range] = u_unpack(replace_by)
+        str.replace(result.pack('U*'))
+      end
+      
      # Does Unicode-aware rstrip
      def rstrip(str)
        str.gsub(UNICODE_TRAILERS_PAT, '')
--- a/activesupport/test/multibyte_handler_test.rb
+++ b/activesupport/test/multibyte_handler_test.rb
@ -199,6 +199,40 @@ def test_index
     assert_raise(ActiveSupport::Multibyte::Handlers::EncodingError) { @handler.index(@bytestring, "\010") }
  end
  
+  def test_indexed_insert
+    s = "Καλη!"
+    @handler[s, 2] = "a"
+    assert_equal "Καaη!", s
+    @handler[s, 2] = "ηη"
+    assert_equal "Καηηη!", s
+    assert_raises(IndexError) { @handler[s, 10] = 'a' }
+    assert_equal "Καηηη!", s
+    @handler[s, 2] = 32
+    assert_equal "Κα ηη!", s
+    @handler[s, 3, 2] = "λλλ"
+    assert_equal "Κα λλλ!", s
+    @handler[s, 1, 0] = "λ"
+    assert_equal "Κλα λλλ!", s
+    assert_raises(IndexError) { @handler[s, 10, 4] = 'a' }
+    assert_equal "Κλα λλλ!", s
+    @handler[s, 4..6] = "ηη"
+    assert_equal "Κλα ηη!", s
+    assert_raises(RangeError) { @handler[s, 10..12] = 'a' }
+    assert_equal "Κλα ηη!", s
+    @handler[s, /ηη/] = "λλλ"
+    assert_equal "Κλα λλλ!", s
+    assert_raises(IndexError) { @handler[s, /ii/] = 'a' }
+    assert_equal "Κλα λλλ!", s
+    @handler[s, /(λλ)(.)/, 2] = "α"
+    assert_equal "Κλα λλα!", s
+    assert_raises(IndexError) { @handler[s, /()/, 10] = 'a' }
+    assert_equal "Κλα λλα!", s
+    @handler[s, "α"] = "η"
+    assert_equal "Κλη λλα!", s
+    @handler[s, "λλ"] = "ααα"
+    assert_equal "Κλη αααα!", s
+  end
+  
  def test_strip
    # A unicode aware version of strip should strip all 26 types of whitespace. This includes the NO BREAK SPACE
    # aka BOM (byte order mark). The byte order mark has no place in UTF-8 because it's used to detect LE and BE.