From f5af5050b34891ff16a4ef1f8e3d805fe135238d Mon Sep 17 00:00:00 2001 From: Gusted Date: Wed, 13 Sep 2023 12:04:10 +0200 Subject: [PATCH] [GITEA] Use restricted sanitizer for repository description - Currently the repository description uses the same sanitizer as a normal markdown document. This means that element such as heading and images are allowed and can be abused. - Create a minimal restricted sanitizer for the repository description, which only allows what the postprocessor currently allows, which are links and emojis. - Added unit testing. - Resolves https://codeberg.org/forgejo/forgejo/issues/1202 - Resolves https://codeberg.org/Codeberg/Community/issues/1122 (cherry picked from commit a8afa4cd181d7c31f73d6a8fae4c6a4b9622a425) (cherry picked from commit 0238587c51e2c749413ca5a63e47590399fe5a2b) (cherry picked from commit a8c7bbf728326b992e000a3d19c8833610f960c9) (cherry picked from commit 80e05a8245092b4158c6c970ca0563181b40f2eb) --- models/repo/repo.go | 4 ++-- modules/markup/sanitizer.go | 35 +++++++++++++++++++++++++++++--- modules/markup/sanitizer_test.go | 22 ++++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/models/repo/repo.go b/models/repo/repo.go index 0b0c029993..8caf1e8cc0 100644 --- a/models/repo/repo.go +++ b/models/repo/repo.go @@ -584,9 +584,9 @@ func (repo *Repository) DescriptionHTML(ctx context.Context) template.HTML { }, repo.Description) if err != nil { log.Error("Failed to render description for %s (ID: %d): %v", repo.Name, repo.ID, err) - return template.HTML(markup.Sanitize(repo.Description)) + return template.HTML(markup.SanitizeDescription(repo.Description)) } - return template.HTML(markup.Sanitize(desc)) + return template.HTML(markup.SanitizeDescription(desc)) } // CloneLink represents different types of clone URLs of repository. diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 48c08831f1..992e85b989 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -18,9 +18,10 @@ import ( // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow // any modification to the underlying policies once it's been created. type Sanitizer struct { - defaultPolicy *bluemonday.Policy - rendererPolicies map[string]*bluemonday.Policy - init sync.Once + defaultPolicy *bluemonday.Policy + descriptionPolicy *bluemonday.Policy + rendererPolicies map[string]*bluemonday.Policy + init sync.Once } var ( @@ -41,6 +42,7 @@ func NewSanitizer() { func InitializeSanitizer() { sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} sanitizer.defaultPolicy = createDefaultPolicy() + sanitizer.descriptionPolicy = createRepoDescriptionPolicy() for name, renderer := range renderers { sanitizerRules := renderer.SanitizerRules() @@ -161,6 +163,27 @@ func createDefaultPolicy() *bluemonday.Policy { return policy } +// createRepoDescriptionPolicy returns a minimal more strict policy that is used for +// repository descriptions. +func createRepoDescriptionPolicy() *bluemonday.Policy { + policy := bluemonday.NewPolicy() + + // Allow italics and bold. + policy.AllowElements("i", "b", "em", "strong") + + // Allow code. + policy.AllowElements("code") + + // Allow links + policy.AllowAttrs("href", "target", "rel").OnElements("a") + + // Allow classes for emojis + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^emoji$`)).OnElements("img", "span") + policy.AllowAttrs("aria-label").OnElements("span") + + return policy +} + func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { for _, rule := range rules { if rule.AllowDataURIImages { @@ -176,6 +199,12 @@ func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitize } } +// SanitizeDescription sanitizes the HTML generated for a repository description. +func SanitizeDescription(s string) string { + NewSanitizer() + return sanitizer.descriptionPolicy.Sanitize(s) +} + // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { NewSanitizer() diff --git a/modules/markup/sanitizer_test.go b/modules/markup/sanitizer_test.go index 0bc63ff0a7..b7b8792bd7 100644 --- a/modules/markup/sanitizer_test.go +++ b/modules/markup/sanitizer_test.go @@ -73,6 +73,28 @@ func Test_Sanitizer(t *testing.T) { } } +func TestDescriptionSanitizer(t *testing.T) { + NewSanitizer() + + testCases := []string{ + `

Title

`, `Title`, + `image`, ``, + `THUMBS UP`, `THUMBS UP`, + `Hello World`, `Hello World`, + `
`, ``, + `https://example.com`, `https://example.com`, + `Important!`, `Important!`, + `
Click me! Nothing to see here.
`, `Click me! Nothing to see here.`, + ``, ``, + `I have a strong opinion about this.`, `I have a strong opinion about this.`, + `Provides alternative wg(8) tool`, `Provides alternative wg(8) tool`, + } + + for i := 0; i < len(testCases); i += 2 { + assert.Equal(t, testCases[i+1], SanitizeDescription(testCases[i])) + } +} + func TestSanitizeNonEscape(t *testing.T) { descStr := "<script>alert(document.domain)</script>"