Rich Media: Do not cache URLs for preview statuses

Closes #1987
This commit is contained in:
rinpatch 2020-09-05 12:37:27 +03:00
parent 2cd2a8fda3
commit e198ba492e
5 changed files with 65 additions and 22 deletions

View file

@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Mastodon API: Search parameter `following` now correctly returns the followings rather than the followers - Mastodon API: Search parameter `following` now correctly returns the followings rather than the followers
- Mastodon API: Timelines hanging for (`number of posts with links * rich media timeout`) in the worst case. - Mastodon API: Timelines hanging for (`number of posts with links * rich media timeout`) in the worst case.
Reduced to just rich media timeout. Reduced to just rich media timeout.
- Mastodon API: Cards being wrong for preview statuses due to cache key collision
- Password resets no longer processed for deactivated accounts - Password resets no longer processed for deactivated accounts
## [2.1.0] - 2020-08-28 ## [2.1.0] - 2020-08-28

View file

@ -100,21 +100,27 @@ defmodule Pleroma.HTML do
end) end)
end end
def extract_first_external_url(_, nil), do: {:error, "No content"} def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
when is_binary(content) do
def extract_first_external_url(object, content) do unless object.data["fake"] do
key = "URL|#{object.id}" key = "URL|#{object.id}"
Cachex.fetch!(:scrubber_cache, key, fn _key -> Cachex.fetch!(:scrubber_cache, key, fn _key ->
result = {:commit, {:ok, extract_first_external_url(content)}}
end)
else
{:ok, extract_first_external_url(content)}
end
end
def extract_first_external_url_from_object(_), do: {:error, :no_content}
def extract_first_external_url(content) do
content content
|> Floki.parse_fragment!() |> Floki.parse_fragment!()
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])") |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|> Enum.take(1) |> Enum.take(1)
|> Floki.attribute("href") |> Floki.attribute("href")
|> Enum.at(0) |> Enum.at(0)
{:commit, {:ok, result}}
end)
end end
end end

View file

@ -58,7 +58,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
with true <- Config.get([:rich_media, :enabled]), with true <- Config.get([:rich_media, :enabled]),
false <- object.data["sensitive"] || false, false <- object.data["sensitive"] || false,
{:ok, page_url} <- {:ok, page_url} <-
HTML.extract_first_external_url(object, object.data["content"]), HTML.extract_first_external_url_from_object(object),
:ok <- validate_page_url(page_url), :ok <- validate_page_url(page_url),
{:ok, rich_media} <- Parser.parse(page_url) do {:ok, rich_media} <- Parser.parse(page_url) do
%{page_url: page_url, rich_media: rich_media} %{page_url: page_url, rich_media: rich_media}

View file

@ -165,7 +165,7 @@ defmodule Pleroma.HTMLTest do
end end
end end
describe "extract_first_external_url" do describe "extract_first_external_url_from_object" do
test "extracts the url" do test "extracts the url" do
user = insert(:user) user = insert(:user)
@ -176,7 +176,7 @@ defmodule Pleroma.HTMLTest do
}) })
object = Object.normalize(activity) object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) {:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://github.com/komeiji-satori/Dress" assert url == "https://github.com/komeiji-satori/Dress"
end end
@ -191,7 +191,7 @@ defmodule Pleroma.HTMLTest do
}) })
object = Object.normalize(activity) object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) {:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md" assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
@ -207,7 +207,7 @@ defmodule Pleroma.HTMLTest do
}) })
object = Object.normalize(activity) object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) {:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end end
@ -223,7 +223,7 @@ defmodule Pleroma.HTMLTest do
}) })
object = Object.normalize(activity) object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) {:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end end
@ -235,7 +235,7 @@ defmodule Pleroma.HTMLTest do
object = Object.normalize(activity) object = Object.normalize(activity)
assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"]) assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
end end
test "skips attachment links" do test "skips attachment links" do
@ -249,7 +249,7 @@ defmodule Pleroma.HTMLTest do
object = Object.normalize(activity) object = Object.normalize(activity)
assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"]) assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
end end
end end
end end

View file

@ -296,9 +296,45 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do
assert real_status == fake_status assert real_status == fake_status
end end
test "fake statuses' preview card is not cached", %{conn: conn} do
clear_config([:rich_media, :enabled], true)
Tesla.Mock.mock(fn
%{
method: :get,
url: "https://example.com/twitter-card"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
env ->
apply(HttpRequestMock, :request, [env])
end)
conn1 =
conn
|> put_req_header("content-type", "application/json")
|> post("/api/v1/statuses", %{
"status" => "https://example.com/ogp",
"preview" => true
})
conn2 =
conn
|> put_req_header("content-type", "application/json")
|> post("/api/v1/statuses", %{
"status" => "https://example.com/twitter-card",
"preview" => true
})
assert %{"card" => %{"title" => "The Rock"}} = json_response_and_validate_schema(conn1, 200)
assert %{"card" => %{"title" => "Small Island Developing States Photo Submission"}} =
json_response_and_validate_schema(conn2, 200)
end
test "posting a status with OGP link preview", %{conn: conn} do test "posting a status with OGP link preview", %{conn: conn} do
Tesla.Mock.mock(fn env -> apply(HttpRequestMock, :request, [env]) end) Tesla.Mock.mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
Config.put([:rich_media, :enabled], true) clear_config([:rich_media, :enabled], true)
conn = conn =
conn conn