From c03e8d46e8a30511d614cf498d50773ff9436680 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Tue, 14 Dec 2021 13:53:46 +0000 Subject: [PATCH] Search through users and hashtags as well --- lib/mix/tasks/pleroma/search.ex | 2 +- .../elasticsearch/document_mappings/user.ex | 3 +- lib/pleroma/elasticsearch/store.ex | 105 +++++++++++++--- lib/pleroma/search/elasticsearch.ex | 117 ++++++++++-------- .../search/elasticsearch/activity_parser.ex | 38 ++++++ .../search/elasticsearch/hashtag_parser.ex | 30 +++++ .../search/elasticsearch/user_paser.ex | 53 ++++++++ lib/pleroma/user.ex | 1 + lib/pleroma/web/activity_pub/side_effects.ex | 2 +- 9 files changed, 280 insertions(+), 71 deletions(-) create mode 100644 lib/pleroma/search/elasticsearch/activity_parser.ex create mode 100644 lib/pleroma/search/elasticsearch/hashtag_parser.ex create mode 100644 lib/pleroma/search/elasticsearch/user_paser.ex diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 751e0ca11..2324561c1 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -26,7 +26,7 @@ defmodule Mix.Tasks.Pleroma.Search do def run(["import", "users" | _rest]) do start_pleroma() - from(u in User, where: not ilike(u.ap_id, "%/relay")) + from(u in User, where: u.nickname not in ["internal.fetch", "relay"]) |> get_all(:users) end diff --git a/lib/pleroma/elasticsearch/document_mappings/user.ex b/lib/pleroma/elasticsearch/document_mappings/user.ex index 5d9945c23..0e57438f2 100644 --- a/lib/pleroma/elasticsearch/document_mappings/user.ex +++ b/lib/pleroma/elasticsearch/document_mappings/user.ex @@ -6,7 +6,8 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.User do timestamp: user.inserted_at, instance: URI.parse(user.ap_id).host, nickname: user.nickname, - bio: user.bio + bio: user.bio, + display_name: user.name } end end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 74c933038..2d8aeabc2 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -1,24 +1,32 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity + alias Pleroma.User alias Pleroma.Elasticsearch.DocumentMappings alias Pleroma.Config + require Logger defp url do Config.get([:elasticsearch, :url]) end - def put_by_id(id) do + defp enabled? do + Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch + end + + def put_by_id(:activity, id) do id |> Activity.get_by_id_with_object() |> maybe_put_into_elasticsearch() end - def maybe_put_into_elasticsearch({:ok, activity}) do - maybe_put_into_elasticsearch(activity) + def maybe_put_into_elasticsearch({:ok, item}) do + maybe_put_into_elasticsearch(item) end - def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity) do - if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do + def maybe_put_into_elasticsearch( + %{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity + ) do + if enabled?() do actor = Pleroma.Activity.user_actor(activity) activity @@ -27,27 +35,48 @@ defmodule Pleroma.Elasticsearch do end end + def maybe_put_into_elasticsearch(%User{} = user) do + if enabled?() do + put(user) + end + end + def maybe_put_into_elasticsearch(_) do {:ok, :skipped} end def put(%Activity{} = activity) do - Elastix.Document.index( + {:ok, _} = Elastix.Document.index( url(), "activities", "activity", DocumentMappings.Activity.id(activity), DocumentMappings.Activity.encode(activity) ) + {:ok, _} = bulk_post( + activity.object.hashtags, :hashtags + ) + end + + def put(%User{} = user) do + {:ok, _ } = Elastix.Document.index( + url(), + "users", + "user", + DocumentMappings.User.id(user), + DocumentMappings.User.encode(user) + ) end def bulk_post(data, :activities) do d = data |> Enum.filter(fn x -> - t = x.object - |> Map.get(:data, %{}) - |> Map.get("type", "") + t = + x.object + |> Map.get(:data, %{}) + |> Map.get("type", "") + t == "Note" end) |> Enum.map(fn d -> @@ -58,7 +87,7 @@ defmodule Pleroma.Elasticsearch do end) |> List.flatten() - Elastix.Bulk.post( + {:ok, %{body: %{"errors" => false}}} = Elastix.Bulk.post( url(), d, index: "activities", @@ -104,12 +133,54 @@ defmodule Pleroma.Elasticsearch do ) end - def search_activities(q) do - Elastix.Search.search( - url(), - "activities", - ["activity"], - q - ) + def search(:raw, index, type, q) do + with {:ok, raw_results} <- Elastix.Search.search(url(), index, [type], q) do + results = + raw_results + |> Map.get(:body, %{}) + |> Map.get("hits", %{}) + |> Map.get("hits", []) + + {:ok, results} + else + {:error, e} -> + Logger.error(e) + {:error, e} + end + end + + def search(:activities, q) do + with {:ok, results} <- search(:raw, "activities", "activity", q) do + results + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.Activity.all_by_ids_with_object() + else + e -> + Logger.error(e) + [] + end + end + + def search(:users, q) do + with {:ok, results} <- search(:raw, "users", "user", q) do + results + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.User.get_all_by_ids() + else + e -> + Logger.error(e) + [] + end + end + + def search(:hashtags, q) do + with {:ok, results} <- search(:raw, "hashtags", "hashtag", q) do + results + |> Enum.map(fn result -> result["_source"]["hashtag"] end) + else + e -> + Logger.error(e) + [] + end end end diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index 181009ad6..e770fe536 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -2,79 +2,94 @@ defmodule Pleroma.Search.Elasticsearch do @behaviour Pleroma.Search alias Pleroma.Web.MastodonAPI.StatusView + alias Pleroma.Web.MastodonAPI.AccountView alias Pleroma.Web.ActivityPub.Visibility + alias Pleroma.Search.Elasticsearch.Parsers + alias Pleroma.Web.Endpoint - defp to_es(term) when is_binary(term) do + defp es_query(:activity, query) do %{ - match: %{ - content: %{ - query: term, - operator: "AND" + query: %{ + bool: %{ + must: Parsers.Activity.parse(query) } } } end - defp to_es({:quoted, term}), do: to_es(term) - - defp to_es({:filter, ["hashtag", query]}) do + defp es_query(:user, query) do %{ - term: %{ - hashtags: %{ - value: query + query: %{ + bool: %{ + must: Parsers.User.parse(query) } } } end - defp to_es({:filter, [field, query]}) do + defp es_query(:hashtag, query) do %{ - term: %{ - field => %{ - value: query + query: %{ + bool: %{ + must: Parsers.Hashtag.parse(query) } } } end - defp parse(query) do - query - |> SearchParser.parse!() - |> Enum.map(&to_es/1) - end - @impl Pleroma.Search def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do - q = %{ - query: %{ - bool: %{ - must: parse(String.trim(query)) - } - } + parsed_query = + query + |> String.trim() + |> SearchParser.parse!() + + activity_task = + Task.async(fn -> + q = es_query(:activity, parsed_query) + + Pleroma.Elasticsearch.search(:activities, q) + |> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end) + end) + + user_task = + Task.async(fn -> + q = es_query(:user, parsed_query) + + Pleroma.Elasticsearch.search(:users, q) + |> Enum.filter(fn x -> Pleroma.User.visible_for(x, user) == :visible end) + end) + + hashtag_task = + Task.async(fn -> + q = es_query(:hashtag, parsed_query) + + Pleroma.Elasticsearch.search(:hashtags, q) + end) + + activity_results = Task.await(activity_task) + user_results = Task.await(user_task) + hashtag_results = Task.await(hashtag_task) + + %{ + "accounts" => + AccountView.render("index.json", + users: user_results, + for: user + ), + "hashtags" => + Enum.map(hashtag_results, fn x -> + %{ + url: Endpoint.url() <> "/tag/" <> x, + name: x + } + end), + "statuses" => + StatusView.render("index.json", + activities: activity_results, + for: user, + as: :activity + ) } - - out = Pleroma.Elasticsearch.search_activities(q) - - with {:ok, raw_results} <- out do - results = - raw_results - |> Map.get(:body, %{}) - |> Map.get("hits", %{}) - |> Map.get("hits", []) - |> Enum.map(fn result -> result["_id"] end) - |> Pleroma.Activity.all_by_ids_with_object() - |> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end) - - %{ - "accounts" => [], - "hashtags" => [], - "statuses" => - StatusView.render("index.json", - activities: results, - for: user, - as: :activity - ) - } - end end end diff --git a/lib/pleroma/search/elasticsearch/activity_parser.ex b/lib/pleroma/search/elasticsearch/activity_parser.ex new file mode 100644 index 000000000..0c124d537 --- /dev/null +++ b/lib/pleroma/search/elasticsearch/activity_parser.ex @@ -0,0 +1,38 @@ +defmodule Pleroma.Search.Elasticsearch.Parsers.Activity do + defp to_es(term) when is_binary(term) do + %{ + match: %{ + content: %{ + query: term, + operator: "AND" + } + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["hashtag", query]}) do + %{ + term: %{ + hashtags: %{ + value: query + } + } + } + end + + defp to_es({:filter, [field, query]}) do + %{ + term: %{ + field => %{ + value: query + } + } + } + end + + def parse(q) do + Enum.map(q, &to_es/1) + end +end diff --git a/lib/pleroma/search/elasticsearch/hashtag_parser.ex b/lib/pleroma/search/elasticsearch/hashtag_parser.ex new file mode 100644 index 000000000..6e2801ed0 --- /dev/null +++ b/lib/pleroma/search/elasticsearch/hashtag_parser.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Search.Elasticsearch.Parsers.Hashtag do + defp to_es(term) when is_binary(term) do + %{ + term: %{ + hashtag: %{ + value: String.downcase(term), + } + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["hashtag", query]}) do + %{ + term: %{ + hashtag: %{ + value: String.downcase(query) + } + } + } + end + + defp to_es({:filter, _}), do: nil + + def parse(q) do + Enum.map(q, &to_es/1) + |> Enum.filter(fn x -> x != nil end) + end +end diff --git a/lib/pleroma/search/elasticsearch/user_paser.ex b/lib/pleroma/search/elasticsearch/user_paser.ex new file mode 100644 index 000000000..96bfdc7d2 --- /dev/null +++ b/lib/pleroma/search/elasticsearch/user_paser.ex @@ -0,0 +1,53 @@ +defmodule Pleroma.Search.Elasticsearch.Parsers.User do + defp to_es(term) when is_binary(term) do + %{ + bool: %{ + minimum_should_match: 1, + should: [ + %{ + match: %{ + bio: %{ + query: term, + operator: "AND" + } + } + }, + %{ + term: %{ + nickname: %{ + value: term + } + } + }, + %{ + match: %{ + display_name: %{ + query: term, + operator: "AND" + } + } + } + ] + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["user", query]}) do + %{ + term: %{ + nickname: %{ + value: query + } + } + } + end + + defp to_es({:filter, _}), do: nil + + def parse(q) do + Enum.map(q, &to_es/1) + |> Enum.filter(fn x -> x != nil end) + end +end diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 8e40dfc0d..a2cf22e55 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -1088,6 +1088,7 @@ defmodule Pleroma.User do def update_and_set_cache(changeset) do with {:ok, user} <- Repo.update(changeset, stale_error_field: :id) do + Pleroma.Elasticsearch.maybe_put_into_elasticsearch(user) set_cache(user) end end diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 91e9c72e0..a93961922 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -538,7 +538,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do @impl true def handle_after_transaction(%Pleroma.Activity{data: %{"type" => "Create"}} = activity) do - Pleroma.Elasticsearch.put_by_id(activity.id) + Pleroma.Elasticsearch.put_by_id(:activity, activity.id) end def handle_after_transaction(%Pleroma.Activity{}) do