2020-01-19 19:45:20 +03:00
|
|
|
# Pleroma: A lightweight social networking server
|
2021-01-13 07:49:20 +01:00
|
|
|
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
2020-01-19 19:45:20 +03:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
|
|
|
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
|
|
|
|
import Ecto.Query
|
|
|
|
|
2024-06-02 21:42:36 +02:00
|
|
|
alias Pleroma.Config
|
2020-01-19 19:45:20 +03:00
|
|
|
alias Pleroma.Object
|
|
|
|
alias Pleroma.Repo
|
|
|
|
|
|
|
|
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
|
|
|
|
|
2024-06-02 21:42:36 +02:00
|
|
|
@doc """
|
|
|
|
Takes object data and if necessary enqueues a job,
|
|
|
|
deleting all attachments of the post eligible for cleanup
|
|
|
|
"""
|
|
|
|
@spec enqueue_if_needed(map()) :: {:ok, Oban.Job.t()} | {:ok, :skip} | {:error, any()}
|
|
|
|
def enqueue_if_needed(%{
|
|
|
|
"actor" => actor,
|
|
|
|
"attachment" => [_ | _] = attachments
|
|
|
|
}) do
|
|
|
|
with true <- Config.get([:instance, :cleanup_attachments]),
|
|
|
|
true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(),
|
|
|
|
[_ | _] <- attachments do
|
|
|
|
enqueue("cleanup_attachments", %{"actor" => actor, "attachments" => attachments})
|
|
|
|
else
|
|
|
|
_ -> {:ok, :skip}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def enqueue_if_needed(_), do: {:ok, :skip}
|
|
|
|
|
2020-01-19 19:45:20 +03:00
|
|
|
@impl Oban.Worker
|
2020-06-23 15:09:01 +03:00
|
|
|
def perform(%Job{
|
|
|
|
args: %{
|
2020-01-28 03:52:35 +03:00
|
|
|
"op" => "cleanup_attachments",
|
2024-06-02 21:42:36 +02:00
|
|
|
"attachments" => [_ | _] = attachments,
|
|
|
|
"actor" => actor
|
2020-06-23 15:09:01 +03:00
|
|
|
}
|
|
|
|
}) do
|
2024-06-02 21:42:36 +02:00
|
|
|
attachments
|
|
|
|
|> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
|
|
|
|
|> fetch_objects
|
|
|
|
|> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
|
|
|
|
|> filter_objects
|
|
|
|
|> do_clean
|
2020-06-14 21:02:57 +03:00
|
|
|
|
|
|
|
{:ok, :success}
|
|
|
|
end
|
2020-01-19 19:45:20 +03:00
|
|
|
|
2024-06-02 21:42:36 +02:00
|
|
|
# Left over already enqueued jobs in the old format
|
|
|
|
# This function clause can be deleted once sufficient time passed after 3.14
|
|
|
|
def perform(%Job{
|
|
|
|
args: %{
|
|
|
|
"op" => "cleanup_attachments",
|
|
|
|
"object" => %{"data" => data}
|
|
|
|
}
|
|
|
|
}) do
|
|
|
|
enqueue_if_needed(data)
|
|
|
|
end
|
|
|
|
|
2020-06-23 15:09:01 +03:00
|
|
|
def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip}
|
2020-06-14 21:02:57 +03:00
|
|
|
|
|
|
|
defp do_clean({object_ids, attachment_urls}) do
|
2020-01-19 19:45:20 +03:00
|
|
|
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
|
|
|
|
|
2020-05-15 21:34:46 +03:00
|
|
|
base_url =
|
|
|
|
String.trim_trailing(
|
2021-01-08 17:05:55 -06:00
|
|
|
Pleroma.Upload.base_url(),
|
2020-05-15 21:34:46 +03:00
|
|
|
"/"
|
|
|
|
)
|
|
|
|
|
2020-06-14 21:02:57 +03:00
|
|
|
Enum.each(attachment_urls, fn href ->
|
|
|
|
href
|
2021-01-08 17:05:55 -06:00
|
|
|
|> String.trim_leading("#{base_url}")
|
2020-06-14 21:02:57 +03:00
|
|
|
|> uploader.delete_file()
|
|
|
|
end)
|
2020-05-15 21:34:46 +03:00
|
|
|
|
2020-06-14 21:02:57 +03:00
|
|
|
delete_objects(object_ids)
|
|
|
|
end
|
2020-05-15 21:34:46 +03:00
|
|
|
|
2020-06-14 21:02:57 +03:00
|
|
|
defp delete_objects([_ | _] = object_ids) do
|
|
|
|
Repo.delete_all(from(o in Object, where: o.id in ^object_ids))
|
|
|
|
end
|
2020-05-15 21:34:46 +03:00
|
|
|
|
2020-06-14 21:02:57 +03:00
|
|
|
defp delete_objects(_), do: :ok
|
|
|
|
|
|
|
|
# we should delete 1 object for any given attachment, but don't delete
|
|
|
|
# files if there are more than 1 object for it
|
2020-06-15 20:47:02 +03:00
|
|
|
defp filter_objects(objects) do
|
|
|
|
Enum.reduce(objects, {[], []}, fn {href, %{id: id, count: count}}, {ids, hrefs} ->
|
|
|
|
with 1 <- count do
|
|
|
|
{ids ++ [id], hrefs ++ [href]}
|
|
|
|
else
|
|
|
|
_ -> {ids ++ [id], hrefs}
|
|
|
|
end
|
|
|
|
end)
|
|
|
|
end
|
|
|
|
|
|
|
|
defp prepare_objects(objects, actor, names) do
|
2020-06-14 21:02:57 +03:00
|
|
|
objects
|
|
|
|
|> Enum.reduce(%{}, fn %{
|
|
|
|
id: id,
|
|
|
|
data: %{
|
|
|
|
"url" => [%{"href" => href}],
|
|
|
|
"actor" => obj_actor,
|
|
|
|
"name" => name
|
|
|
|
}
|
|
|
|
},
|
|
|
|
acc ->
|
|
|
|
Map.update(acc, href, %{id: id, count: 1}, fn val ->
|
|
|
|
case obj_actor == actor and name in names do
|
|
|
|
true ->
|
|
|
|
# set id of the actor's object that will be deleted
|
|
|
|
%{val | id: id, count: val.count + 1}
|
|
|
|
|
|
|
|
false ->
|
|
|
|
# another actor's object, just increase count to not delete file
|
|
|
|
%{val | count: val.count + 1}
|
|
|
|
end
|
|
|
|
end)
|
|
|
|
end)
|
2020-01-19 19:45:20 +03:00
|
|
|
end
|
|
|
|
|
2020-06-15 20:47:02 +03:00
|
|
|
defp fetch_objects(hrefs) do
|
2020-06-14 21:02:57 +03:00
|
|
|
from(o in Object,
|
|
|
|
where:
|
|
|
|
fragment(
|
|
|
|
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
|
|
|
|
o.data,
|
|
|
|
o.data,
|
|
|
|
^hrefs
|
|
|
|
)
|
|
|
|
)
|
|
|
|
# The query above can be time consumptive on large instances until we
|
|
|
|
# refactor how uploads are stored
|
|
|
|
|> Repo.all(timeout: :infinity)
|
|
|
|
end
|
2020-01-19 19:45:20 +03:00
|
|
|
end
|