mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-12-22 08:38:29 +00:00
Run processors on whole of text (#16155)
There is an inefficiency in the design of our processors which means that Emoji and other processors run in order n^2 time. This PR forces the processors to process the entirety of text node before passing back up. The fundamental inefficiency remains but it should be significantly ameliorated. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
6ad5d0a306
commit
0db1048c3a
|
@ -6,6 +6,7 @@
|
|||
package emoji
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
|
|||
if n.writecount == 2 {
|
||||
n.idx = n.pos
|
||||
n.end = n.pos + len(p)
|
||||
n.pos += len(p)
|
||||
return len(p), io.EOF
|
||||
}
|
||||
n.pos += len(p)
|
||||
return len(p), nil
|
||||
|
@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
|
|||
if n.writecount == 2 {
|
||||
n.idx = n.pos
|
||||
n.end = n.pos + len(s)
|
||||
n.pos += len(s)
|
||||
return len(s), io.EOF
|
||||
}
|
||||
n.pos += len(s)
|
||||
return len(s), nil
|
||||
|
|
|
@ -89,6 +89,7 @@ func isLinkStr(link string) bool {
|
|||
return validLinksPattern.MatchString(link)
|
||||
}
|
||||
|
||||
// FIXME: This function is not concurrent safe
|
||||
func getIssueFullPattern() *regexp.Regexp {
|
||||
if issueFullPattern == nil {
|
||||
issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
|
||||
|
@ -566,11 +567,16 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
|
|||
}
|
||||
|
||||
func mentionProcessor(ctx *RenderContext, node *html.Node) {
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
// We replace only the first mention; other mentions will be addressed later
|
||||
found, loc := references.FindFirstMentionBytes([]byte(node.Data))
|
||||
found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
loc.Start += start
|
||||
loc.End += start
|
||||
mention := node.Data[loc.Start:loc.End]
|
||||
var teams string
|
||||
teams, ok := ctx.Metas["teams"]
|
||||
|
@ -582,10 +588,17 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
|
|||
mentionOrgAndTeam := strings.Split(mention, "/")
|
||||
if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
|
||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
continue
|
||||
}
|
||||
return
|
||||
start = loc.End
|
||||
continue
|
||||
}
|
||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
}
|
||||
}
|
||||
|
||||
func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
|
@ -593,6 +606,8 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
|||
}
|
||||
|
||||
func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
|
@ -672,7 +687,7 @@ func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
|||
switch ext := filepath.Ext(link); ext {
|
||||
// fast path: empty string, ignore
|
||||
case "":
|
||||
break
|
||||
// leave image as false
|
||||
case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
|
||||
image = true
|
||||
}
|
||||
|
@ -748,12 +763,17 @@ func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
|||
linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
|
||||
}
|
||||
replaceContent(node, m[0], m[1], linkNode)
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||
if ctx.Metas == nil {
|
||||
return
|
||||
}
|
||||
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
|
@ -771,23 +791,25 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
// TODO if m[4]:m[5] is not nil, then link is to a comment,
|
||||
// and we should indicate that in the text somehow
|
||||
replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
|
||||
|
||||
} else {
|
||||
orgRepoID := matchOrg + "/" + matchRepo + id
|
||||
replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
|
||||
}
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||
if ctx.Metas == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
found bool
|
||||
ref *references.RenderizableReference
|
||||
)
|
||||
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
_, exttrack := ctx.Metas["format"]
|
||||
alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
|
||||
|
||||
|
@ -828,7 +850,8 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
|
||||
if ref.Action == references.XRefActionNone {
|
||||
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
|
||||
return
|
||||
node = node.NextSibling.NextSibling
|
||||
continue
|
||||
}
|
||||
|
||||
// Decorate action keywords if actionable
|
||||
|
@ -846,6 +869,8 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
|
||||
}
|
||||
replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
|
||||
node = node.NextSibling.NextSibling.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// fullSha1PatternProcessor renders SHA containing URLs
|
||||
|
@ -853,6 +878,9 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
if ctx.Metas == nil {
|
||||
return
|
||||
}
|
||||
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
|
@ -897,14 +925,23 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
}
|
||||
|
||||
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// emojiShortCodeProcessor for rendering text like :smile: into emoji
|
||||
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data)
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m[0] += start
|
||||
m[1] += start
|
||||
|
||||
start = m[1]
|
||||
|
||||
alias := node.Data[m[0]:m[1]]
|
||||
alias = strings.ReplaceAll(alias, ":", "")
|
||||
|
@ -914,25 +951,39 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
|||
s := strings.Join(setting.UI.Reactions, " ") + "gitea"
|
||||
if strings.Contains(s, alias) {
|
||||
replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
|
||||
return
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
continue
|
||||
}
|
||||
return
|
||||
continue
|
||||
}
|
||||
|
||||
replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
}
|
||||
}
|
||||
|
||||
// emoji processor to match emoji and add emoji class
|
||||
func emojiProcessor(ctx *RenderContext, node *html.Node) {
|
||||
m := emoji.FindEmojiSubmatchIndex(node.Data)
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m[0] += start
|
||||
m[1] += start
|
||||
|
||||
codepoint := node.Data[m[0]:m[1]]
|
||||
start = m[1]
|
||||
val := emoji.FromCode(codepoint)
|
||||
if val != nil {
|
||||
replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
|
||||
node = node.NextSibling.NextSibling
|
||||
start = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -942,10 +993,17 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
|
||||
return
|
||||
}
|
||||
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
|
||||
|
||||
start := 0
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next && start < len(node.Data) {
|
||||
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m[2] += start
|
||||
m[3] += start
|
||||
|
||||
hash := node.Data[m[2]:m[3]]
|
||||
// The regex does not lie, it matches the hash pattern.
|
||||
// However, a regex cannot know if a hash actually exists or not.
|
||||
|
@ -959,32 +1017,46 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
|
|||
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
|
||||
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
|
||||
}
|
||||
return
|
||||
start = m[3]
|
||||
continue
|
||||
}
|
||||
|
||||
replaceContent(node, m[2], m[3],
|
||||
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
|
||||
start = 0
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// emailAddressProcessor replaces raw email addresses with a mailto: link.
|
||||
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := emailRegex.FindStringSubmatchIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
mail := node.Data[m[2]:m[3]]
|
||||
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
|
||||
// markdown.
|
||||
func linkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
func genDefaultLinkProcessor(defaultLink string) processor {
|
||||
|
@ -1008,12 +1080,17 @@ func genDefaultLinkProcessor(defaultLink string) processor {
|
|||
|
||||
// descriptionLinkProcessor creates links for DescriptionHTML
|
||||
func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
next := node.NextSibling
|
||||
for node != nil && node != next {
|
||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
|
||||
node = node.NextSibling.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
func createDescriptionLink(href, content string) *html.Node {
|
||||
|
|
|
@ -464,3 +464,19 @@ func TestIssue16020(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
assert.Equal(t, data, res.String())
|
||||
}
|
||||
|
||||
func BenchmarkEmojiPostprocess(b *testing.B) {
|
||||
data := "🥰 "
|
||||
for len(data) < 1<<16 {
|
||||
data += data
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var res strings.Builder
|
||||
err := PostProcess(&RenderContext{
|
||||
URLPrefix: "https://example.com",
|
||||
Metas: localMetas,
|
||||
}, strings.NewReader(data), &res)
|
||||
assert.NoError(b, err)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue