From 255f46d7ab124d86a71e994deffca5f4f438b49b Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Sun, 9 Sep 2018 23:29:00 +0000 Subject: [PATCH 1/5] html: new module providing a configurable markup scrubbing policy --- config/config.exs | 3 +++ lib/pleroma/html.ex | 14 ++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 lib/pleroma/html.ex diff --git a/config/config.exs b/config/config.exs index ed718c3d3..559a12a91 100644 --- a/config/config.exs +++ b/config/config.exs @@ -76,6 +76,9 @@ config :pleroma, :instance, quarantined_instances: [], managed_config: true +config :pleroma, :markup, + scrub_policy: HtmlSanitizeEx.Scrubber.BasicHTML + config :pleroma, :fe, theme: "pleroma-dark", logo: "/static/logo.png", diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex new file mode 100644 index 000000000..0ec73a91d --- /dev/null +++ b/lib/pleroma/html.ex @@ -0,0 +1,14 @@ +defmodule Pleroma.HTML do + alias HtmlSanitizeEx.Scrubber + + @markup Application.get_env(:pleroma, :markup) + + def filter_tags(html) do + scrubber = Keyword.get(@markup, :scrub_policy) + html |> Scrubber.scrub(scrubber) + end + + def strip_tags(html) do + html |> Scrubber.scrub(Scrubber.StripTags) + end +end From ac486fc59b49d26db7c3b6b61d0affeb34f9b3e0 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Sun, 9 Sep 2018 23:40:24 +0000 Subject: [PATCH 2/5] everywhere: use Pleroma.HTML module instead of HtmlSanitizeEx directly --- lib/pleroma/formatter.ex | 7 ++++--- lib/pleroma/gopher/server.ex | 3 ++- lib/pleroma/web/mastodon_api/views/account_view.ex | 3 ++- lib/pleroma/web/mastodon_api/views/status_view.ex | 11 ++++++----- .../twitter_api/representers/activity_representer.ex | 5 +++-- lib/pleroma/web/twitter_api/views/activity_view.ex | 5 +++-- lib/pleroma/web/twitter_api/views/user_view.ex | 7 ++++--- 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex index 2b4c3c2aa..62f54a3f2 100644 --- a/lib/pleroma/formatter.ex +++ b/lib/pleroma/formatter.ex @@ -1,6 +1,7 @@ defmodule Pleroma.Formatter do alias Pleroma.User alias Pleroma.Web.MediaProxy + alias Pleroma.HTML @tag_regex ~r/\#\w+/u def parse_tags(text, data \\ %{}) do @@ -144,8 +145,8 @@ defmodule Pleroma.Formatter do def emojify(text, emoji) do Enum.reduce(emoji, text, fn {emoji, file}, text -> - emoji = HtmlSanitizeEx.strip_tags(emoji) - file = HtmlSanitizeEx.strip_tags(file) + emoji = HTML.strip_tags(emoji) + file = HTML.strip_tags(file) String.replace( text, @@ -154,7 +155,7 @@ defmodule Pleroma.Formatter do MediaProxy.url(file) }' />" ) - |> HtmlSanitizeEx.basic_html() + |> HTML.filter_tags() end) end diff --git a/lib/pleroma/gopher/server.ex b/lib/pleroma/gopher/server.ex index 97a1dea77..1ad27ef27 100644 --- a/lib/pleroma/gopher/server.ex +++ b/lib/pleroma/gopher/server.ex @@ -35,6 +35,7 @@ defmodule Pleroma.Gopher.Server.ProtocolHandler do alias Pleroma.User alias Pleroma.Activity alias Pleroma.Repo + alias Pleroma.HTML @instance Application.get_env(:pleroma, :instance) @gopher Application.get_env(:pleroma, :gopher) @@ -79,7 +80,7 @@ defmodule Pleroma.Gopher.Server.ProtocolHandler do info("#{like_count} likes, #{announcement_count} repeats") <> "i\tfake\t(NULL)\t0\r\n" <> info( - HtmlSanitizeEx.strip_tags( + HTML.strip_tags( String.replace(activity.data["object"]["content"], "
", "\r") ) ) diff --git a/lib/pleroma/web/mastodon_api/views/account_view.ex b/lib/pleroma/web/mastodon_api/views/account_view.ex index 7915933be..7c92c991f 100644 --- a/lib/pleroma/web/mastodon_api/views/account_view.ex +++ b/lib/pleroma/web/mastodon_api/views/account_view.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Web.MastodonAPI.AccountView do alias Pleroma.Web.MastodonAPI.AccountView alias Pleroma.Web.CommonAPI.Utils alias Pleroma.Web.MediaProxy + alias Pleroma.HTML def render("accounts.json", %{users: users} = opts) do render_many(users, AccountView, "account.json", opts) @@ -42,7 +43,7 @@ defmodule Pleroma.Web.MastodonAPI.AccountView do followers_count: user_info.follower_count, following_count: user_info.following_count, statuses_count: user_info.note_count, - note: HtmlSanitizeEx.basic_html(user.bio) || "", + note: HTML.filter_tags(user.bio) || "", url: user.ap_id, avatar: image, avatar_static: image, diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 6962aa54f..f1daa2624 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -5,6 +5,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do alias Pleroma.Web.CommonAPI.Utils alias Pleroma.Web.MediaProxy alias Pleroma.Repo + alias Pleroma.HTML # TODO: Add cached version. defp get_replied_to_activities(activities) do @@ -111,10 +112,10 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do emojis = (activity.data["object"]["emoji"] || []) |> Enum.map(fn {name, url} -> - name = HtmlSanitizeEx.strip_tags(name) + name = HTML.strip_tags(name) url = - HtmlSanitizeEx.strip_tags(url) + HTML.strip_tags(url) |> MediaProxy.url() %{shortcode: name, url: url, static_url: url} @@ -221,7 +222,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do object["content"] end - HtmlSanitizeEx.basic_html(content) + HTML.filter_tags(content) end def render_content(%{"type" => "Article"} = object) do @@ -234,10 +235,10 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do object["content"] end - HtmlSanitizeEx.basic_html(content) + HTML.filter_tags(content) end def render_content(object) do - HtmlSanitizeEx.basic_html(object["content"]) + HTML.filter_tags(object["content"]) end end diff --git a/lib/pleroma/web/twitter_api/representers/activity_representer.ex b/lib/pleroma/web/twitter_api/representers/activity_representer.ex index 9abea59a7..5c4eed671 100644 --- a/lib/pleroma/web/twitter_api/representers/activity_representer.ex +++ b/lib/pleroma/web/twitter_api/representers/activity_representer.ex @@ -7,6 +7,7 @@ defmodule Pleroma.Web.TwitterAPI.Representers.ActivityRepresenter do alias Pleroma.Web.TwitterAPI.{TwitterAPI, UserView, ActivityView} alias Pleroma.Web.CommonAPI.Utils alias Pleroma.Formatter + alias Pleroma.HTML defp user_by_ap_id(user_list, ap_id) do Enum.find(user_list, fn %{ap_id: user_id} -> ap_id == user_id end) @@ -167,7 +168,7 @@ defmodule Pleroma.Web.TwitterAPI.Representers.ActivityRepresenter do {summary, content} = ActivityView.render_content(object) html = - HtmlSanitizeEx.basic_html(content) + HTML.filter_tags(content) |> Formatter.emojify(object["emoji"]) video = @@ -184,7 +185,7 @@ defmodule Pleroma.Web.TwitterAPI.Representers.ActivityRepresenter do "uri" => activity.data["object"]["id"], "user" => UserView.render("show.json", %{user: user, for: opts[:for]}), "statusnet_html" => html, - "text" => HtmlSanitizeEx.strip_tags(content), + "text" => HTML.strip_tags(content), "is_local" => activity.local, "is_post_verb" => true, "created_at" => created_at, diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 909eefdd8..666a35a24 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -11,6 +11,7 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do alias Pleroma.User alias Pleroma.Repo alias Pleroma.Formatter + alias Pleroma.HTML import Ecto.Query @@ -232,7 +233,7 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do {summary, content} = render_content(object) html = - HtmlSanitizeEx.basic_html(content) + HTML.filter_tags(content) |> Formatter.emojify(object["emoji"]) %{ @@ -240,7 +241,7 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do "uri" => activity.data["object"]["id"], "user" => UserView.render("show.json", %{user: user, for: opts[:for]}), "statusnet_html" => html, - "text" => HtmlSanitizeEx.strip_tags(content), + "text" => HTML.strip_tags(content), "is_local" => activity.local, "is_post_verb" => true, "created_at" => created_at, diff --git a/lib/pleroma/web/twitter_api/views/user_view.ex b/lib/pleroma/web/twitter_api/views/user_view.ex index 32f93153d..d67ef5f74 100644 --- a/lib/pleroma/web/twitter_api/views/user_view.ex +++ b/lib/pleroma/web/twitter_api/views/user_view.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Web.TwitterAPI.UserView do alias Pleroma.Formatter alias Pleroma.Web.CommonAPI.Utils alias Pleroma.Web.MediaProxy + alias Pleroma.HTML def render("show.json", %{user: user = %User{}} = assigns) do render_one(user, Pleroma.Web.TwitterAPI.UserView, "user.json", assigns) @@ -39,8 +40,8 @@ defmodule Pleroma.Web.TwitterAPI.UserView do data = %{ "created_at" => user.inserted_at |> Utils.format_naive_asctime(), "description" => - HtmlSanitizeEx.strip_tags((user.bio || "") |> String.replace("
", "\n")), - "description_html" => HtmlSanitizeEx.basic_html(user.bio), + HTML.strip_tags((user.bio || "") |> String.replace("
", "\n")), + "description_html" => HTML.filter_tags(user.bio), "favourites_count" => 0, "followers_count" => user_info[:follower_count], "following" => following, @@ -49,7 +50,7 @@ defmodule Pleroma.Web.TwitterAPI.UserView do "friends_count" => user_info[:following_count], "id" => user.id, "name" => user.name, - "name_html" => HtmlSanitizeEx.strip_tags(user.name) |> Formatter.emojify(emoji), + "name_html" => HTML.strip_tags(user.name) |> Formatter.emojify(emoji), "profile_image_url" => image, "profile_image_url_https" => image, "profile_image_url_profile_size" => image, From 40e2f6e50034e81c3bf509e9dc9f2c938d86445d Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Mon, 10 Sep 2018 00:05:26 +0000 Subject: [PATCH 3/5] html: add default scrubbing profile and configuration knobs --- config/config.exs | 6 ++- lib/pleroma/html.ex | 100 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 559a12a91..b3889ba12 100644 --- a/config/config.exs +++ b/config/config.exs @@ -77,7 +77,11 @@ config :pleroma, :instance, managed_config: true config :pleroma, :markup, - scrub_policy: HtmlSanitizeEx.Scrubber.BasicHTML + allow_inline_images: false, + allow_headings: false, + allow_tables: false, + allow_fonts: false, + scrub_policy: Pleroma.HTML.Scrubber.Default config :pleroma, :fe, theme: "pleroma-dark", diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 0ec73a91d..1c62f2ccc 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -12,3 +12,103 @@ defmodule Pleroma.HTML do html |> Scrubber.scrub(Scrubber.StripTags) end end + +defmodule Pleroma.HTML.Scrubber.TwitterText do + @moduledoc """ + An HTML scrubbing policy which limits to twitter-style text. Only + paragraphs, breaks and links are allowed through the filter. + """ + + require HtmlSanitizeEx.Scrubber.Meta + alias HtmlSanitizeEx.Scrubber.Meta + + @valid_schemes ["http", "https"] + + Meta.remove_cdata_sections_before_scrub() + Meta.strip_comments() + + # links + Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) + Meta.allow_tag_with_these_attributes("a", ["name", "title"]) + + # paragraphs and linebreaks + Meta.allow_tag_with_these_attributes("br", []) + Meta.allow_tag_with_these_attributes("p", []) + + # microformats + Meta.allow_tag_with_these_attributes("span", []) +end + +defmodule Pleroma.HTML.Scrubber.Default do + @doc "The default HTML scrubbing policy: no " + + require HtmlSanitizeEx.Scrubber.Meta + alias HtmlSanitizeEx.Scrubber.Meta + + @valid_schemes ["http", "https"] + + Meta.remove_cdata_sections_before_scrub() + Meta.strip_comments() + + Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) + Meta.allow_tag_with_these_attributes("a", ["name", "title"]) + + Meta.allow_tag_with_these_attributes("b", []) + Meta.allow_tag_with_these_attributes("blockquote", []) + Meta.allow_tag_with_these_attributes("br", []) + Meta.allow_tag_with_these_attributes("code", []) + Meta.allow_tag_with_these_attributes("del", []) + Meta.allow_tag_with_these_attributes("em", []) + Meta.allow_tag_with_these_attributes("i", []) + Meta.allow_tag_with_these_attributes("li", []) + Meta.allow_tag_with_these_attributes("ol", []) + Meta.allow_tag_with_these_attributes("p", []) + Meta.allow_tag_with_these_attributes("pre", []) + Meta.allow_tag_with_these_attributes("span", []) + Meta.allow_tag_with_these_attributes("strong", []) + Meta.allow_tag_with_these_attributes("u", []) + Meta.allow_tag_with_these_attributes("ul", []) + + @markup Application.get_env(:pleroma, :markup) + @allow_inline_images Keyword.get(@markup, :allow_inline_images) + + if @allow_inline_images do + Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + + Meta.allow_tag_with_these_attributes("img", [ + "width", + "height", + "title", + "alt" + ]) + end + + @allow_tables Keyword.get(@markup, :allow_tables) + + if @allow_tables do + Meta.allow_tag_with_these_attributes("table", []) + Meta.allow_tag_with_these_attributes("tbody", []) + Meta.allow_tag_with_these_attributes("td", []) + Meta.allow_tag_with_these_attributes("th", []) + Meta.allow_tag_with_these_attributes("thead", []) + Meta.allow_tag_with_these_attributes("tr", []) + end + + @allow_headings Keyword.get(@markup, :allow_headings) + + if @allow_headings do + Meta.allow_tag_with_these_attributes("h1", []) + Meta.allow_tag_with_these_attributes("h2", []) + Meta.allow_tag_with_these_attributes("h3", []) + Meta.allow_tag_with_these_attributes("h4", []) + Meta.allow_tag_with_these_attributes("h5", []) + end + + @allow_fonts Keyword.get(@markup, :allow_fonts) + + if @allow_fonts do + Meta.allow_tag_with_these_attributes("font", ["face"]) + end + + Meta.strip_everything_not_covered() +end From 358f88e10a7d3de0481309287b4b756087490dfc Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Mon, 10 Sep 2018 00:23:23 +0000 Subject: [PATCH 4/5] html: allow inline images by default (because of custom emoji) --- config/config.exs | 4 +++- lib/pleroma/html.ex | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index b3889ba12..d5c5b7902 100644 --- a/config/config.exs +++ b/config/config.exs @@ -77,7 +77,9 @@ config :pleroma, :instance, managed_config: true config :pleroma, :markup, - allow_inline_images: false, + # XXX - unfortunately, inline images must be enabled by default right now, because + # of custom emoji. Issue #275 discusses defanging that somehow. + allow_inline_images: true, allow_headings: false, allow_tables: false, allow_fonts: false, diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 1c62f2ccc..107784e70 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -37,6 +37,21 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do # microformats Meta.allow_tag_with_these_attributes("span", []) + + # allow inline images for custom emoji + @markup Application.get_env(:pleroma, :markup) + @allow_inline_images Keyword.get(@markup, :allow_inline_images) + + if @allow_inline_images do + Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + + Meta.allow_tag_with_these_attributes("img", [ + "width", + "height", + "title", + "alt" + ]) + end end defmodule Pleroma.HTML.Scrubber.Default do From e82ce2a4b396e448181c7729db6dd850944db140 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Mon, 10 Sep 2018 00:28:40 +0000 Subject: [PATCH 5/5] formatting --- lib/pleroma/gopher/server.ex | 6 +----- lib/pleroma/web/twitter_api/views/user_view.ex | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/pleroma/gopher/server.ex b/lib/pleroma/gopher/server.ex index 1ad27ef27..d34037f4f 100644 --- a/lib/pleroma/gopher/server.ex +++ b/lib/pleroma/gopher/server.ex @@ -79,11 +79,7 @@ defmodule Pleroma.Gopher.Server.ProtocolHandler do link("Post ##{activity.id} by #{user.nickname}", "/notices/#{activity.id}") <> info("#{like_count} likes, #{announcement_count} repeats") <> "i\tfake\t(NULL)\t0\r\n" <> - info( - HTML.strip_tags( - String.replace(activity.data["object"]["content"], "
", "\r") - ) - ) + info(HTML.strip_tags(String.replace(activity.data["object"]["content"], "
", "\r"))) end) |> Enum.join("i\tfake\t(NULL)\t0\r\n") end diff --git a/lib/pleroma/web/twitter_api/views/user_view.ex b/lib/pleroma/web/twitter_api/views/user_view.ex index d67ef5f74..f2641047f 100644 --- a/lib/pleroma/web/twitter_api/views/user_view.ex +++ b/lib/pleroma/web/twitter_api/views/user_view.ex @@ -39,8 +39,7 @@ defmodule Pleroma.Web.TwitterAPI.UserView do data = %{ "created_at" => user.inserted_at |> Utils.format_naive_asctime(), - "description" => - HTML.strip_tags((user.bio || "") |> String.replace("
", "\n")), + "description" => HTML.strip_tags((user.bio || "") |> String.replace("
", "\n")), "description_html" => HTML.filter_tags(user.bio), "favourites_count" => 0, "followers_count" => user_info[:follower_count],