Run processors on whole of text (#16155) (#16185)

Backport #16155

There is an inefficiency in the design of our processors which means that Emoji
and other processors run in order n^2 time.

This PR forces the processors to process the entirety of text node before passing
back up. The fundamental inefficiency remains but it should be significantly
ameliorated.

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2021-06-17 19:01:33 +01:00 committed by GitHub
parent 849d316d8d
commit 5ff807acde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 413 additions and 318 deletions

View File

@ -6,6 +6,7 @@
package emoji
import (
"io"
"sort"
"strings"
"sync"
@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(p)
n.pos += len(p)
return len(p), io.EOF
}
n.pos += len(p)
return len(p), nil
@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(s)
n.pos += len(s)
return len(s), io.EOF
}
n.pos += len(s)
return len(s), nil

File diff suppressed because it is too large Load Diff

View File

@ -425,3 +425,19 @@ func TestIssue16020(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, data, string(res))
}
func BenchmarkEmojiPostprocess(b *testing.B) {
data := "🥰 "
for len(data) < 1<<16 {
data += data
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := PostProcess(
[]byte(data),
"https://example.com",
localMetas,
false)
assert.NoError(b, err)
}
}