diff --git a/modules/charset/escape.go b/modules/charset/escape.go
index d2e8fb0d87..9883700e88 100644
--- a/modules/charset/escape.go
+++ b/modules/charset/escape.go
@@ -63,6 +63,7 @@ func EscapeControlBytes(text []byte) (EscapeStatus, []byte) {
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
buf := make([]byte, 4096)
readStart := 0
+ runeCount := 0
var n int
var writePos int
@@ -79,6 +80,8 @@ readingloop:
for i < len(bs) {
r, size := utf8.DecodeRune(bs[i:])
+ runeCount++
+
// Now handle the codepoints
switch {
case r == utf8.RuneError:
@@ -113,6 +116,8 @@ readingloop:
lineHasRTLScript = false
lineHasLTRScript = false
+ case runeCount == 1 && r == 0xFEFF: // UTF BOM
+ // the first BOM is safe
case r == '\r' || r == '\t' || r == ' ':
// These are acceptable control characters and space characters
case unicode.IsSpace(r):
@@ -144,7 +149,8 @@ readingloop:
return
}
writePos = i + size
- case unicode.Is(unicode.C, r):
+ // 65279 == BOM rune.
+ case unicode.Is(unicode.C, r) && r != rune(65279):
escaped.Escaped = true
escaped.HasControls = true
if writePos < i {
diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go
index 1804381413..01ccca7724 100644
--- a/modules/charset/escape_test.go
+++ b/modules/charset/escape_test.go
@@ -129,6 +129,14 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
"\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n",
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
},
+ {
+ // UTF-8/16/32 all use the same codepoint for BOM
+ // Gitea could read UTF-16/32 content and convert into UTF-8 internally then render it, so we only process UTF-8 internally
+ name: "UTF BOM",
+ text: "\xef\xbb\xbftest",
+ result: "\xef\xbb\xbftest",
+ status: EscapeStatus{HasLTRScript: true},
+ },
}
func TestEscapeControlString(t *testing.T) {
@@ -163,10 +171,18 @@ func TestEscapeControlReader(t *testing.T) {
// lets add some control characters to the tests
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
copy(tests, escapeControlTests)
+
+ // if there is a BOM, we should keep the BOM
+ addPrefix := func(prefix, s string) string {
+ if strings.HasPrefix(s, "\xef\xbb\xbf") {
+ return s[:3] + prefix + s[3:]
+ }
+ return prefix + s
+ }
for _, test := range escapeControlTests {
test.name += " (+Control)"
- test.text = "\u001E" + test.text
- test.result = `` + "\u001e" + `` + test.result
+ test.text = addPrefix("\u001E", test.text)
+ test.result = addPrefix(``+"\u001e"+``, test.result)
test.status.Escaped = true
test.status.HasControls = true
tests = append(tests, test)
@@ -174,8 +190,8 @@ func TestEscapeControlReader(t *testing.T) {
for _, test := range escapeControlTests {
test.name += " (+Mark)"
- test.text = "\u0300" + test.text
- test.result = `` + "\u0300" + `` + test.result
+ test.text = addPrefix("\u0300", test.text)
+ test.result = addPrefix(``+"\u0300"+``, test.result)
test.status.Escaped = true
test.status.HasMarks = true
tests = append(tests, test)