Small optimization of 1.9 unescape. We should make sure that inbound ASCII always means UTF-8. It seems so based on a quick survey of common browsers, but let's be sure
This commit is contained in:
parent
b8af484476
commit
16ee4b4d1b
@ -6,11 +6,15 @@
|
|||||||
str = "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E" # Ni-ho-nn-go in UTF-8, means Japanese.
|
str = "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E" # Ni-ho-nn-go in UTF-8, means Japanese.
|
||||||
|
|
||||||
parser = URI::Parser.new
|
parser = URI::Parser.new
|
||||||
|
|
||||||
unless str == parser.unescape(parser.escape(str))
|
unless str == parser.unescape(parser.escape(str))
|
||||||
URI::Parser.class_eval do
|
URI::Parser.class_eval do
|
||||||
remove_method :unescape
|
remove_method :unescape
|
||||||
def unescape(str, escaped = @regexp[:ESCAPED])
|
def unescape(str, escaped = /%[a-fA-F\d]{2}/)
|
||||||
enc = (str.encoding == Encoding::US_ASCII) ? Encoding::UTF_8 : str.encoding
|
# TODO: Are we actually sure that ASCII == UTF-8?
|
||||||
|
# YK: My initial experiments say yes, but let's be sure please
|
||||||
|
enc = str.encoding
|
||||||
|
enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
|
||||||
str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(enc)
|
str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(enc)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user