Run processors on whole of text (#16155)

There is an inefficiency in the design of our processors which means that Emoji
and other processors run in order n^2 time.

This PR forces the processors to process the entirety of text node before passing
back up. The fundamental inefficiency remains but it should be significantly
ameliorated.

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath
2021-06-17 11:35:05 +01:00
committed by GitHub
parent 6ad5d0a306
commit 0db1048c3a
3 changed files with 414 additions and 316 deletions

View File

@ -6,6 +6,7 @@
package emoji package emoji
import ( import (
"io"
"sort" "sort"
"strings" "strings"
"sync" "sync"
@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
if n.writecount == 2 { if n.writecount == 2 {
n.idx = n.pos n.idx = n.pos
n.end = n.pos + len(p) n.end = n.pos + len(p)
n.pos += len(p)
return len(p), io.EOF
} }
n.pos += len(p) n.pos += len(p)
return len(p), nil return len(p), nil
@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
if n.writecount == 2 { if n.writecount == 2 {
n.idx = n.pos n.idx = n.pos
n.end = n.pos + len(s) n.end = n.pos + len(s)
n.pos += len(s)
return len(s), io.EOF
} }
n.pos += len(s) n.pos += len(s)
return len(s), nil return len(s), nil

File diff suppressed because it is too large Load Diff

View File

@ -464,3 +464,19 @@ func TestIssue16020(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, data, res.String()) assert.Equal(t, data, res.String())
} }
func BenchmarkEmojiPostprocess(b *testing.B) {
data := "🥰 "
for len(data) < 1<<16 {
data += data
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
var res strings.Builder
err := PostProcess(&RenderContext{
URLPrefix: "https://example.com",
Metas: localMetas,
}, strings.NewReader(data), &res)
assert.NoError(b, err)
}
}