2019-04-18 23:34:01 +03:00
|
|
|
|
# Pleroma: A lightweight social networking server
|
2021-01-13 07:49:20 +01:00
|
|
|
|
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
2019-04-18 23:34:01 +03:00
|
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
|
|
|
|
|
defmodule Mix.Tasks.Pleroma.Database do
|
2019-05-16 13:14:48 -05:00
|
|
|
|
alias Pleroma.Conversation
|
2020-05-27 16:40:51 -05:00
|
|
|
|
alias Pleroma.Maintenance
|
2020-05-27 17:17:06 -05:00
|
|
|
|
alias Pleroma.Object
|
2019-05-16 20:04:08 +00:00
|
|
|
|
alias Pleroma.Repo
|
|
|
|
|
alias Pleroma.User
|
2020-12-26 22:20:55 +03:00
|
|
|
|
|
2019-04-19 00:17:37 +03:00
|
|
|
|
require Logger
|
2019-07-29 02:43:19 +00:00
|
|
|
|
require Pleroma.Constants
|
2020-12-26 22:20:55 +03:00
|
|
|
|
|
2020-08-08 16:29:40 +04:00
|
|
|
|
import Ecto.Query
|
2019-06-20 02:05:19 +03:00
|
|
|
|
import Mix.Pleroma
|
2020-12-26 22:20:55 +03:00
|
|
|
|
|
2019-04-18 23:34:01 +03:00
|
|
|
|
use Mix.Task
|
|
|
|
|
|
|
|
|
|
@shortdoc "A collection of database related tasks"
|
2022-07-15 12:27:16 +00:00
|
|
|
|
@moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md")
|
2019-04-18 23:34:01 +03:00
|
|
|
|
|
2023-10-23 17:29:02 +02:00
|
|
|
|
defp maybe_limit(query, limit_cnt) do
|
|
|
|
|
if is_number(limit_cnt) and limit_cnt > 0 do
|
|
|
|
|
limit(query, [], ^limit_cnt)
|
|
|
|
|
else
|
|
|
|
|
query
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2024-05-15 02:15:31 +02:00
|
|
|
|
defp limit_statement(limit) when is_number(limit) do
|
|
|
|
|
if limit > 0 do
|
|
|
|
|
"LIMIT #{limit}"
|
|
|
|
|
else
|
|
|
|
|
""
|
|
|
|
|
end
|
|
|
|
|
end
|
2024-05-15 01:20:27 +02:00
|
|
|
|
|
2024-05-15 02:15:31 +02:00
|
|
|
|
defp prune_orphaned_activities_singles(limit) do
|
2024-05-15 01:38:59 +02:00
|
|
|
|
%{:num_rows => del_single} =
|
2023-10-23 01:27:56 +02:00
|
|
|
|
"""
|
|
|
|
|
delete from public.activities
|
|
|
|
|
where id in (
|
|
|
|
|
select a.id from public.activities a
|
|
|
|
|
left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
|
|
|
|
|
left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
|
|
|
|
|
left join public.users u on a.data ->> 'object' = u.ap_id
|
|
|
|
|
where not a.local
|
|
|
|
|
and jsonb_typeof(a."data" -> 'object') = 'string'
|
|
|
|
|
and o.id is null
|
|
|
|
|
and a2.id is null
|
|
|
|
|
and u.id is null
|
2024-05-15 02:15:31 +02:00
|
|
|
|
#{limit_statement(limit)}
|
2023-10-23 01:27:56 +02:00
|
|
|
|
)
|
|
|
|
|
"""
|
2024-05-15 01:38:59 +02:00
|
|
|
|
|> Repo.query!([], timeout: :infinity)
|
2023-10-23 00:52:34 +02:00
|
|
|
|
|
2024-05-15 01:33:41 +02:00
|
|
|
|
Logger.info("Prune activity singles: deleted #{del_single} rows...")
|
2024-05-15 02:15:31 +02:00
|
|
|
|
del_single
|
|
|
|
|
end
|
2024-05-15 01:33:41 +02:00
|
|
|
|
|
2024-05-15 02:15:31 +02:00
|
|
|
|
defp prune_orphaned_activities_array(limit) do
|
2024-05-15 01:38:59 +02:00
|
|
|
|
%{:num_rows => del_array} =
|
2023-10-23 01:27:56 +02:00
|
|
|
|
"""
|
|
|
|
|
delete from public.activities
|
|
|
|
|
where id in (
|
|
|
|
|
select a.id from public.activities a
|
2024-05-15 01:20:27 +02:00
|
|
|
|
join json_array_elements_text((a."data" -> 'object')::json) as j
|
|
|
|
|
on a.data->>'type' = 'Flag'
|
2023-10-23 01:27:56 +02:00
|
|
|
|
left join public.objects o on j.value = o.data ->> 'id'
|
|
|
|
|
left join public.activities a2 on j.value = a2.data ->> 'id'
|
|
|
|
|
left join public.users u on j.value = u.ap_id
|
|
|
|
|
group by a.id
|
|
|
|
|
having max(o.data ->> 'id') is null
|
|
|
|
|
and max(a2.data ->> 'id') is null
|
|
|
|
|
and max(u.ap_id) is null
|
2024-05-15 02:15:31 +02:00
|
|
|
|
#{limit_statement(limit)}
|
2023-10-23 01:27:56 +02:00
|
|
|
|
)
|
|
|
|
|
"""
|
2024-05-15 01:38:59 +02:00
|
|
|
|
|> Repo.query!([], timeout: :infinity)
|
2023-10-23 01:27:56 +02:00
|
|
|
|
|
2024-05-15 01:33:41 +02:00
|
|
|
|
Logger.info("Prune activity arrays: deleted #{del_array} rows...")
|
2024-05-15 02:15:31 +02:00
|
|
|
|
del_array
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do
|
|
|
|
|
# Activities can either refer to a single object id, and array of object ids
|
|
|
|
|
# or contain an inlined object (at least after going through our normalisation)
|
|
|
|
|
#
|
|
|
|
|
# Flag is the only type we support with an array (and always has arrays).
|
|
|
|
|
# Update the only one with inlined objects.
|
|
|
|
|
#
|
|
|
|
|
# We already regularly purge old Delete, Undo, Update and Remove and if
|
|
|
|
|
# rejected Follow requests anyway; no need to explicitly deal with those here.
|
|
|
|
|
#
|
|
|
|
|
# Since there’s an index on types and there are typically only few Flag
|
|
|
|
|
# activites, it’s _much_ faster to utilise the index. To avoid accidentally
|
|
|
|
|
# deleting useful activities should more types be added, keep typeof for singles.
|
|
|
|
|
|
|
|
|
|
# Prune activities who link to an array of objects
|
|
|
|
|
del_array =
|
|
|
|
|
if Keyword.get(opts, :arrays, true) do
|
|
|
|
|
prune_orphaned_activities_array(limit)
|
|
|
|
|
else
|
|
|
|
|
0
|
|
|
|
|
end
|
2024-05-15 01:33:41 +02:00
|
|
|
|
|
2024-05-15 02:17:34 +02:00
|
|
|
|
# Prune activities who link to a single object
|
|
|
|
|
del_single =
|
|
|
|
|
if Keyword.get(opts, :singles, true) do
|
|
|
|
|
prune_orphaned_activities_singles(limit)
|
|
|
|
|
else
|
|
|
|
|
0
|
|
|
|
|
end
|
|
|
|
|
|
2023-10-23 01:27:56 +02:00
|
|
|
|
del_single + del_array
|
2023-10-23 00:52:34 +02:00
|
|
|
|
end
|
|
|
|
|
|
2019-04-18 23:34:01 +03:00
|
|
|
|
def run(["remove_embedded_objects" | args]) do
|
|
|
|
|
{options, [], []} =
|
|
|
|
|
OptionParser.parse(
|
|
|
|
|
args,
|
|
|
|
|
strict: [
|
|
|
|
|
vacuum: :boolean
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2019-06-20 02:05:19 +03:00
|
|
|
|
start_pleroma()
|
2019-04-19 00:17:37 +03:00
|
|
|
|
Logger.info("Removing embedded objects")
|
2019-04-18 23:34:01 +03:00
|
|
|
|
|
2019-05-16 20:04:08 +00:00
|
|
|
|
Repo.query!(
|
2019-10-18 14:11:30 +03:00
|
|
|
|
"update activities set data = safe_jsonb_set(data, '{object}'::text[], data->'object'->'id') where data->'object'->>'id' is not null;",
|
2019-04-18 23:58:59 +03:00
|
|
|
|
[],
|
|
|
|
|
timeout: :infinity
|
2019-04-18 23:34:01 +03:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if Keyword.get(options, :vacuum) do
|
2020-05-27 16:40:51 -05:00
|
|
|
|
Maintenance.vacuum("full")
|
2019-04-18 23:34:01 +03:00
|
|
|
|
end
|
|
|
|
|
end
|
2019-05-16 13:14:48 -05:00
|
|
|
|
|
|
|
|
|
def run(["bump_all_conversations"]) do
|
2019-06-20 02:05:19 +03:00
|
|
|
|
start_pleroma()
|
2019-05-16 13:14:48 -05:00
|
|
|
|
Conversation.bump_for_all_activities()
|
|
|
|
|
end
|
2019-05-16 20:04:08 +00:00
|
|
|
|
|
|
|
|
|
def run(["update_users_following_followers_counts"]) do
|
2019-06-20 02:05:19 +03:00
|
|
|
|
start_pleroma()
|
2019-05-16 20:04:08 +00:00
|
|
|
|
|
2020-12-12 17:30:08 +03:00
|
|
|
|
Repo.transaction(
|
|
|
|
|
fn ->
|
|
|
|
|
from(u in User, select: u)
|
|
|
|
|
|> Repo.stream()
|
|
|
|
|
|> Stream.each(&User.update_follower_count/1)
|
|
|
|
|
|> Stream.run()
|
|
|
|
|
end,
|
|
|
|
|
timeout: :infinity
|
|
|
|
|
)
|
2019-05-16 20:04:08 +00:00
|
|
|
|
end
|
2019-05-21 01:21:28 +00:00
|
|
|
|
|
2023-10-23 01:01:07 +02:00
|
|
|
|
def run(["prune_orphaned_activities" | args]) do
|
|
|
|
|
{options, [], []} =
|
|
|
|
|
OptionParser.parse(
|
|
|
|
|
args,
|
|
|
|
|
strict: [
|
2024-05-15 02:15:31 +02:00
|
|
|
|
limit: :integer,
|
|
|
|
|
singles: :boolean,
|
|
|
|
|
arrays: :boolean
|
2023-10-23 01:01:07 +02:00
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
start_pleroma()
|
|
|
|
|
|
2024-05-15 02:15:31 +02:00
|
|
|
|
{limit, options} = Keyword.pop(options, :limit, 0)
|
2023-10-23 01:01:07 +02:00
|
|
|
|
|
|
|
|
|
log_message = "Pruning orphaned activities"
|
|
|
|
|
|
|
|
|
|
log_message =
|
|
|
|
|
if limit > 0 do
|
|
|
|
|
log_message <> ", limiting deletion to #{limit} rows"
|
|
|
|
|
else
|
|
|
|
|
log_message
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
Logger.info(log_message)
|
|
|
|
|
|
2024-05-15 02:15:31 +02:00
|
|
|
|
deleted = prune_orphaned_activities(limit, options)
|
2023-10-23 01:27:56 +02:00
|
|
|
|
|
|
|
|
|
Logger.info("Deleted #{deleted} rows")
|
2023-10-23 01:01:07 +02:00
|
|
|
|
end
|
|
|
|
|
|
2019-05-21 01:21:28 +00:00
|
|
|
|
def run(["prune_objects" | args]) do
|
|
|
|
|
{options, [], []} =
|
|
|
|
|
OptionParser.parse(
|
|
|
|
|
args,
|
|
|
|
|
strict: [
|
Prune Objects --keep-threads option (#350)
This adds an option to the prune_objects mix task.
The original way deleted all non-local public posts older than a certain time frame.
Here we add a different query which you can call using the option --keep-threads.
We query from the activities table all context id's where
1. the newest activity with this context is still old
2. none of the activities with this context is is local
3. none of the activities with this context is bookmarked
and delete all objects with these contexts.
The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep.
Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well.
Caveats:
* ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379
* Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!**
* ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that.
Some statistics from explain analyse:
(cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1)
Planning Time: 505.327 ms
Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740
Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740
Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740
Execution Time: 3287933.902 ms
***
**TODO**
1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly)
* See https://akkoma.dev/AkkomaGang/akkoma/pulls/379
2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this
1. ~~We keep it as currently implemented and just don't care about scope with this option~~
2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~
3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place.
3. [x] See if we can do the query using Elixir
4. [x] Test on a bigger DB to see that we don't run into a timeout
5. [x] Add docs
Co-authored-by: ilja <git@ilja.space>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
|
|
|
|
vacuum: :boolean,
|
|
|
|
|
keep_threads: :boolean,
|
2023-01-07 20:52:02 +01:00
|
|
|
|
keep_non_public: :boolean,
|
2023-10-23 17:29:02 +02:00
|
|
|
|
prune_orphaned_activities: :boolean,
|
|
|
|
|
limit: :integer
|
2019-05-21 01:21:28 +00:00
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2019-06-20 02:05:19 +03:00
|
|
|
|
start_pleroma()
|
2019-05-21 01:21:28 +00:00
|
|
|
|
|
|
|
|
|
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
|
Prune Objects --keep-threads option (#350)
This adds an option to the prune_objects mix task.
The original way deleted all non-local public posts older than a certain time frame.
Here we add a different query which you can call using the option --keep-threads.
We query from the activities table all context id's where
1. the newest activity with this context is still old
2. none of the activities with this context is is local
3. none of the activities with this context is bookmarked
and delete all objects with these contexts.
The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep.
Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well.
Caveats:
* ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379
* Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!**
* ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that.
Some statistics from explain analyse:
(cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1)
Planning Time: 505.327 ms
Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740
Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740
Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740
Execution Time: 3287933.902 ms
***
**TODO**
1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly)
* See https://akkoma.dev/AkkomaGang/akkoma/pulls/379
2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this
1. ~~We keep it as currently implemented and just don't care about scope with this option~~
2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~
3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place.
3. [x] See if we can do the query using Elixir
4. [x] Test on a bigger DB to see that we don't run into a timeout
5. [x] Add docs
Co-authored-by: ilja <git@ilja.space>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
|
|
|
|
time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
|
|
|
|
|
|
2023-10-23 17:29:02 +02:00
|
|
|
|
limit_cnt = Keyword.get(options, :limit, 0)
|
|
|
|
|
|
Prune Objects --keep-threads option (#350)
This adds an option to the prune_objects mix task.
The original way deleted all non-local public posts older than a certain time frame.
Here we add a different query which you can call using the option --keep-threads.
We query from the activities table all context id's where
1. the newest activity with this context is still old
2. none of the activities with this context is is local
3. none of the activities with this context is bookmarked
and delete all objects with these contexts.
The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep.
Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well.
Caveats:
* ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379
* Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!**
* ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that.
Some statistics from explain analyse:
(cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1)
Planning Time: 505.327 ms
Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740
Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740
Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740
Execution Time: 3287933.902 ms
***
**TODO**
1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly)
* See https://akkoma.dev/AkkomaGang/akkoma/pulls/379
2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this
1. ~~We keep it as currently implemented and just don't care about scope with this option~~
2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~
3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place.
3. [x] See if we can do the query using Elixir
4. [x] Test on a bigger DB to see that we don't run into a timeout
5. [x] Add docs
Co-authored-by: ilja <git@ilja.space>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
|
|
|
|
log_message = "Pruning objects older than #{deadline} days"
|
|
|
|
|
|
|
|
|
|
log_message =
|
|
|
|
|
if Keyword.get(options, :keep_non_public) do
|
|
|
|
|
log_message <> ", keeping non public posts"
|
|
|
|
|
else
|
|
|
|
|
log_message
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
log_message =
|
|
|
|
|
if Keyword.get(options, :keep_threads) do
|
|
|
|
|
log_message <> ", keeping threads intact"
|
|
|
|
|
else
|
|
|
|
|
log_message
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-07 20:52:02 +01:00
|
|
|
|
log_message =
|
|
|
|
|
if Keyword.get(options, :prune_orphaned_activities) do
|
|
|
|
|
log_message <> ", pruning orphaned activities"
|
|
|
|
|
else
|
|
|
|
|
log_message
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
log_message =
|
|
|
|
|
if Keyword.get(options, :vacuum) do
|
|
|
|
|
log_message <>
|
|
|
|
|
", doing a full vacuum (you shouldn't do this as a recurring maintanance task)"
|
|
|
|
|
else
|
|
|
|
|
log_message
|
|
|
|
|
end
|
|
|
|
|
|
2023-10-23 17:29:02 +02:00
|
|
|
|
log_message =
|
|
|
|
|
if limit_cnt > 0 do
|
|
|
|
|
log_message <> ", limiting to #{limit_cnt} rows"
|
|
|
|
|
else
|
|
|
|
|
log_message
|
|
|
|
|
end
|
|
|
|
|
|
Prune Objects --keep-threads option (#350)
This adds an option to the prune_objects mix task.
The original way deleted all non-local public posts older than a certain time frame.
Here we add a different query which you can call using the option --keep-threads.
We query from the activities table all context id's where
1. the newest activity with this context is still old
2. none of the activities with this context is is local
3. none of the activities with this context is bookmarked
and delete all objects with these contexts.
The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep.
Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well.
Caveats:
* ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379
* Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!**
* ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that.
Some statistics from explain analyse:
(cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1)
Planning Time: 505.327 ms
Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740
Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740
Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740
Execution Time: 3287933.902 ms
***
**TODO**
1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly)
* See https://akkoma.dev/AkkomaGang/akkoma/pulls/379
2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this
1. ~~We keep it as currently implemented and just don't care about scope with this option~~
2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~
3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place.
3. [x] See if we can do the query using Elixir
4. [x] Test on a bigger DB to see that we don't run into a timeout
5. [x] Add docs
Co-authored-by: ilja <git@ilja.space>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
|
|
|
|
Logger.info(log_message)
|
|
|
|
|
|
2024-05-15 01:33:41 +02:00
|
|
|
|
{del_obj, _} =
|
|
|
|
|
if Keyword.get(options, :keep_threads) do
|
|
|
|
|
# We want to delete objects from threads where
|
|
|
|
|
# 1. the newest post is still old
|
|
|
|
|
# 2. none of the activities is local
|
|
|
|
|
# 3. none of the activities is bookmarked
|
|
|
|
|
# 4. optionally none of the posts is non-public
|
|
|
|
|
deletable_context =
|
|
|
|
|
if Keyword.get(options, :keep_non_public) do
|
|
|
|
|
Pleroma.Activity
|
|
|
|
|
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|
|
|
|
|
|> group_by([a], fragment("? ->> 'context'::text", a.data))
|
|
|
|
|
|> having(
|
|
|
|
|
[a],
|
|
|
|
|
not fragment(
|
|
|
|
|
# Posts (checked on Create Activity) is non-public
|
|
|
|
|
"bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
|
|
|
|
|
a.data,
|
|
|
|
|
^Pleroma.Constants.as_public(),
|
|
|
|
|
a.data,
|
|
|
|
|
^Pleroma.Constants.as_public(),
|
|
|
|
|
a.data
|
|
|
|
|
)
|
Prune Objects --keep-threads option (#350)
This adds an option to the prune_objects mix task.
The original way deleted all non-local public posts older than a certain time frame.
Here we add a different query which you can call using the option --keep-threads.
We query from the activities table all context id's where
1. the newest activity with this context is still old
2. none of the activities with this context is is local
3. none of the activities with this context is bookmarked
and delete all objects with these contexts.
The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep.
Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well.
Caveats:
* ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379
* Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!**
* ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that.
Some statistics from explain analyse:
(cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1)
Planning Time: 505.327 ms
Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740
Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740
Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740
Execution Time: 3287933.902 ms
***
**TODO**
1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly)
* See https://akkoma.dev/AkkomaGang/akkoma/pulls/379
2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this
1. ~~We keep it as currently implemented and just don't care about scope with this option~~
2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~
3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place.
3. [x] See if we can do the query using Elixir
4. [x] Test on a bigger DB to see that we don't run into a timeout
5. [x] Add docs
Co-authored-by: ilja <git@ilja.space>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
|
|
|
|
)
|
2024-05-15 01:33:41 +02:00
|
|
|
|
else
|
|
|
|
|
Pleroma.Activity
|
|
|
|
|
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|
|
|
|
|
|> group_by([a], fragment("? ->> 'context'::text", a.data))
|
|
|
|
|
end
|
|
|
|
|
|> having([a], max(a.updated_at) < ^time_deadline)
|
|
|
|
|
|> having([a], not fragment("bool_or(?)", a.local))
|
|
|
|
|
|> having([_, b], fragment("max(?::text) is null", b.id))
|
|
|
|
|
|> maybe_limit(limit_cnt)
|
|
|
|
|
|> select([a], fragment("? ->> 'context'::text", a.data))
|
|
|
|
|
|
|
|
|
|
Pleroma.Object
|
|
|
|
|
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
|
|
|
|
|
else
|
|
|
|
|
deletable =
|
|
|
|
|
if Keyword.get(options, :keep_non_public) do
|
|
|
|
|
Pleroma.Object
|
|
|
|
|
|> where(
|
|
|
|
|
[o],
|
|
|
|
|
fragment(
|
|
|
|
|
"?->'to' \\? ? OR ?->'cc' \\? ?",
|
|
|
|
|
o.data,
|
|
|
|
|
^Pleroma.Constants.as_public(),
|
|
|
|
|
o.data,
|
|
|
|
|
^Pleroma.Constants.as_public()
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
else
|
|
|
|
|
Pleroma.Object
|
|
|
|
|
end
|
|
|
|
|
|> where([o], o.updated_at < ^time_deadline)
|
2023-10-23 17:29:02 +02:00
|
|
|
|
|> where(
|
|
|
|
|
[o],
|
2024-05-15 01:33:41 +02:00
|
|
|
|
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
|
2023-10-23 17:29:02 +02:00
|
|
|
|
)
|
2024-05-15 01:33:41 +02:00
|
|
|
|
|> maybe_limit(limit_cnt)
|
|
|
|
|
|> select([o], o.id)
|
2023-10-23 17:29:02 +02:00
|
|
|
|
|
2024-05-15 01:33:41 +02:00
|
|
|
|
Pleroma.Object
|
|
|
|
|
|> where([o], o.id in subquery(deletable))
|
|
|
|
|
end
|
|
|
|
|
|> Repo.delete_all(timeout: :infinity)
|
|
|
|
|
|
|
|
|
|
Logger.info("Deleted #{del_obj} objects...")
|
2019-05-21 01:21:28 +00:00
|
|
|
|
|
2023-05-21 13:02:28 +02:00
|
|
|
|
if !Keyword.get(options, :keep_threads) do
|
|
|
|
|
# Without the --keep-threads option, it's possible that bookmarked
|
|
|
|
|
# objects have been deleted. We remove the corresponding bookmarks.
|
2024-05-15 01:38:59 +02:00
|
|
|
|
%{:num_rows => del_bookmarks} =
|
2024-05-15 01:33:41 +02:00
|
|
|
|
"""
|
|
|
|
|
delete from public.bookmarks
|
|
|
|
|
where id in (
|
|
|
|
|
select b.id from public.bookmarks b
|
|
|
|
|
left join public.activities a on b.activity_id = a.id
|
|
|
|
|
left join public.objects o on a."data" ->> 'object' = o.data ->> 'id'
|
|
|
|
|
where o.id is null
|
|
|
|
|
)
|
|
|
|
|
"""
|
2024-05-15 01:38:59 +02:00
|
|
|
|
|> Repo.query!([], timeout: :infinity)
|
2024-05-15 01:33:41 +02:00
|
|
|
|
|
|
|
|
|
Logger.info("Deleted #{del_bookmarks} orphaned bookmarks...")
|
2023-05-21 13:02:28 +02:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-07 20:52:02 +01:00
|
|
|
|
if Keyword.get(options, :prune_orphaned_activities) do
|
2024-05-15 01:33:41 +02:00
|
|
|
|
del_activities = prune_orphaned_activities()
|
|
|
|
|
Logger.info("Deleted #{del_activities} orphaned activities...")
|
2023-01-07 20:52:02 +01:00
|
|
|
|
end
|
|
|
|
|
|
2024-05-15 01:38:59 +02:00
|
|
|
|
%{:num_rows => del_hashtags} =
|
2024-05-15 01:33:41 +02:00
|
|
|
|
"""
|
|
|
|
|
DELETE FROM hashtags AS ht
|
|
|
|
|
WHERE NOT EXISTS (
|
|
|
|
|
SELECT 1 FROM hashtags_objects hto
|
|
|
|
|
WHERE ht.id = hto.hashtag_id)
|
|
|
|
|
"""
|
2024-05-15 01:38:59 +02:00
|
|
|
|
|> Repo.query!()
|
2024-05-15 01:33:41 +02:00
|
|
|
|
|
|
|
|
|
Logger.info("Deleted #{del_hashtags} no longer used hashtags...")
|
2021-04-21 02:38:59 +05:00
|
|
|
|
|
2019-05-21 01:21:28 +00:00
|
|
|
|
if Keyword.get(options, :vacuum) do
|
2024-05-15 01:33:41 +02:00
|
|
|
|
Logger.info("Starting vacuum...")
|
2020-05-27 16:40:51 -05:00
|
|
|
|
Maintenance.vacuum("full")
|
2019-05-21 01:21:28 +00:00
|
|
|
|
end
|
2024-05-15 01:33:41 +02:00
|
|
|
|
|
|
|
|
|
Logger.info("All done!")
|
2019-05-21 01:21:28 +00:00
|
|
|
|
end
|
2019-08-10 18:47:40 +00:00
|
|
|
|
|
2022-12-01 15:00:53 +00:00
|
|
|
|
def run(["prune_task"]) do
|
|
|
|
|
start_pleroma()
|
|
|
|
|
|
|
|
|
|
nil
|
|
|
|
|
|> Pleroma.Workers.Cron.PruneDatabaseWorker.perform()
|
|
|
|
|
end
|
|
|
|
|
|
2019-08-10 18:47:40 +00:00
|
|
|
|
def run(["fix_likes_collections"]) do
|
|
|
|
|
start_pleroma()
|
|
|
|
|
|
|
|
|
|
from(object in Object,
|
|
|
|
|
where: fragment("(?)->>'likes' is not null", object.data),
|
|
|
|
|
select: %{id: object.id, likes: fragment("(?)->>'likes'", object.data)}
|
|
|
|
|
)
|
2020-09-16 09:47:18 +03:00
|
|
|
|
|> Pleroma.Repo.chunk_stream(100, :batches)
|
2019-08-10 18:47:40 +00:00
|
|
|
|
|> Stream.each(fn objects ->
|
|
|
|
|
ids =
|
|
|
|
|
objects
|
|
|
|
|
|> Enum.filter(fn object -> object.likes |> Jason.decode!() |> is_map() end)
|
|
|
|
|
|> Enum.map(& &1.id)
|
|
|
|
|
|
|
|
|
|
Object
|
|
|
|
|
|> where([object], object.id in ^ids)
|
|
|
|
|
|> update([object],
|
|
|
|
|
set: [
|
|
|
|
|
data:
|
|
|
|
|
fragment(
|
2019-10-18 14:11:30 +03:00
|
|
|
|
"safe_jsonb_set(?, '{likes}', '[]'::jsonb, true)",
|
2019-08-10 18:47:40 +00:00
|
|
|
|
object.data
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|> Repo.update_all([], timeout: :infinity)
|
|
|
|
|
end)
|
|
|
|
|
|> Stream.run()
|
|
|
|
|
end
|
2020-05-27 16:27:07 -05:00
|
|
|
|
|
|
|
|
|
def run(["vacuum", args]) do
|
|
|
|
|
start_pleroma()
|
|
|
|
|
|
2020-05-27 16:40:51 -05:00
|
|
|
|
Maintenance.vacuum(args)
|
2020-05-27 16:27:07 -05:00
|
|
|
|
end
|
2020-08-08 16:29:40 +04:00
|
|
|
|
|
|
|
|
|
def run(["ensure_expiration"]) do
|
|
|
|
|
start_pleroma()
|
|
|
|
|
days = Pleroma.Config.get([:mrf_activity_expiration, :days], 365)
|
|
|
|
|
|
|
|
|
|
Pleroma.Activity
|
2020-08-22 20:46:01 +03:00
|
|
|
|
|> join(:inner, [a], o in Object,
|
2020-08-11 11:28:22 -05:00
|
|
|
|
on:
|
|
|
|
|
fragment(
|
|
|
|
|
"(?->>'id') = COALESCE((?)->'object'->> 'id', (?)->>'object')",
|
|
|
|
|
o.data,
|
|
|
|
|
a.data,
|
|
|
|
|
a.data
|
|
|
|
|
)
|
|
|
|
|
)
|
2020-08-08 16:29:40 +04:00
|
|
|
|
|> where(local: true)
|
2020-08-08 12:40:52 -05:00
|
|
|
|
|> where([a], fragment("(? ->> 'type'::text) = 'Create'", a.data))
|
2020-08-22 20:46:01 +03:00
|
|
|
|
|> where([_a, o], fragment("?->>'type' = 'Note'", o.data))
|
2020-09-16 09:47:18 +03:00
|
|
|
|
|> Pleroma.Repo.chunk_stream(100, :batches)
|
2020-08-08 16:29:40 +04:00
|
|
|
|
|> Stream.each(fn activities ->
|
|
|
|
|
Enum.each(activities, fn activity ->
|
2020-08-22 20:46:01 +03:00
|
|
|
|
expires_at =
|
|
|
|
|
activity.inserted_at
|
|
|
|
|
|> DateTime.from_naive!("Etc/UTC")
|
|
|
|
|
|> Timex.shift(days: days)
|
|
|
|
|
|
|
|
|
|
Pleroma.Workers.PurgeExpiredActivity.enqueue(%{
|
|
|
|
|
activity_id: activity.id,
|
2020-09-08 15:11:18 +03:00
|
|
|
|
expires_at: expires_at
|
2020-08-22 20:46:01 +03:00
|
|
|
|
})
|
2020-08-08 16:29:40 +04:00
|
|
|
|
end)
|
|
|
|
|
end)
|
|
|
|
|
|> Stream.run()
|
|
|
|
|
end
|
2021-02-07 22:24:12 +03:00
|
|
|
|
|
2021-02-06 09:42:17 +00:00
|
|
|
|
def run(["set_text_search_config", tsconfig]) do
|
|
|
|
|
start_pleroma()
|
|
|
|
|
%{rows: [[tsc]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SHOW default_text_search_config;")
|
|
|
|
|
shell_info("Current default_text_search_config: #{tsc}")
|
|
|
|
|
|
|
|
|
|
%{rows: [[db]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SELECT current_database();")
|
|
|
|
|
shell_info("Update default_text_search_config: #{tsconfig}")
|
|
|
|
|
|
|
|
|
|
%{messages: msg} =
|
|
|
|
|
Ecto.Adapters.SQL.query!(
|
|
|
|
|
Pleroma.Repo,
|
|
|
|
|
"ALTER DATABASE #{db} SET default_text_search_config = '#{tsconfig}';"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# non-exist config will not raise excpetion but only give >0 messages
|
|
|
|
|
if length(msg) > 0 do
|
|
|
|
|
shell_info("Error: #{inspect(msg, pretty: true)}")
|
|
|
|
|
else
|
|
|
|
|
rum_enabled = Pleroma.Config.get([:database, :rum_enabled])
|
|
|
|
|
shell_info("Recreate index, RUM: #{rum_enabled}")
|
|
|
|
|
|
|
|
|
|
# Note SQL below needs to be kept up-to-date with latest GIN or RUM index definition in future
|
|
|
|
|
if rum_enabled do
|
|
|
|
|
Ecto.Adapters.SQL.query!(
|
|
|
|
|
Pleroma.Repo,
|
|
|
|
|
"CREATE OR REPLACE FUNCTION objects_fts_update() RETURNS trigger AS $$ BEGIN
|
|
|
|
|
new.fts_content := to_tsvector(new.data->>'content');
|
|
|
|
|
RETURN new;
|
|
|
|
|
END
|
2021-08-15 13:49:12 -04:00
|
|
|
|
$$ LANGUAGE plpgsql",
|
|
|
|
|
[],
|
|
|
|
|
timeout: :infinity
|
2021-02-06 09:42:17 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
shell_info("Refresh RUM index")
|
|
|
|
|
Ecto.Adapters.SQL.query!(Pleroma.Repo, "UPDATE objects SET updated_at = NOW();")
|
|
|
|
|
else
|
|
|
|
|
Ecto.Adapters.SQL.query!(Pleroma.Repo, "DROP INDEX IF EXISTS objects_fts;")
|
|
|
|
|
|
|
|
|
|
Ecto.Adapters.SQL.query!(
|
|
|
|
|
Pleroma.Repo,
|
2021-08-15 13:49:12 -04:00
|
|
|
|
"CREATE INDEX CONCURRENTLY objects_fts ON objects USING gin(to_tsvector('#{tsconfig}', data->>'content')); ",
|
|
|
|
|
[],
|
|
|
|
|
timeout: :infinity
|
2021-02-06 09:42:17 +00:00
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
|
2023-08-01 11:43:50 +01:00
|
|
|
|
shell_info(~c"Done.")
|
2021-02-06 09:42:17 +00:00
|
|
|
|
end
|
|
|
|
|
end
|
2021-02-11 19:31:57 +03:00
|
|
|
|
|
2021-02-07 22:24:12 +03:00
|
|
|
|
# Rolls back a specific migration (leaving subsequent migrations applied).
|
|
|
|
|
# WARNING: imposes a risk of unrecoverable data loss — proceed at your own responsibility.
|
|
|
|
|
# Based on https://stackoverflow.com/a/53825840
|
|
|
|
|
def run(["rollback", version]) do
|
|
|
|
|
prompt = "SEVERE WARNING: this operation may result in unrecoverable data loss. Continue?"
|
|
|
|
|
|
|
|
|
|
if shell_prompt(prompt, "n") in ~w(Yn Y y) do
|
|
|
|
|
{_, result, _} =
|
|
|
|
|
Ecto.Migrator.with_repo(Pleroma.Repo, fn repo ->
|
|
|
|
|
version = String.to_integer(version)
|
|
|
|
|
re = ~r/^#{version}_.*\.exs/
|
|
|
|
|
path = Ecto.Migrator.migrations_path(repo)
|
|
|
|
|
|
2021-02-23 18:11:25 +03:00
|
|
|
|
with {_, "" <> file} <- {:find, Enum.find(File.ls!(path), &String.match?(&1, re))},
|
|
|
|
|
{_, [{mod, _} | _]} <- {:compile, Code.compile_file(Path.join(path, file))},
|
|
|
|
|
{_, :ok} <- {:rollback, Ecto.Migrator.down(repo, version, mod)} do
|
2021-02-07 22:24:12 +03:00
|
|
|
|
{:ok, "Reversed migration: #{file}"}
|
|
|
|
|
else
|
|
|
|
|
{:find, _} -> {:error, "No migration found with version prefix: #{version}"}
|
|
|
|
|
{:compile, e} -> {:error, "Problem compiling migration module: #{inspect(e)}"}
|
|
|
|
|
{:rollback, e} -> {:error, "Problem reversing migration: #{inspect(e)}"}
|
|
|
|
|
end
|
|
|
|
|
end)
|
|
|
|
|
|
2021-02-23 18:11:25 +03:00
|
|
|
|
shell_info(inspect(result))
|
2021-02-07 22:24:12 +03:00
|
|
|
|
end
|
|
|
|
|
end
|
2019-04-18 23:34:01 +03:00
|
|
|
|
end
|