mirror of
https://akkoma.dev/AkkomaGang/akkoma.git
synced 2024-10-30 13:33:12 +00:00
16197ff57a
With kilobyte the resulting numbers got too large and were cut off in the charts, making them useless. However, even an idle Akkoma server’s memory usage is in the lower hundreths of megabytes, so we don’t need this much precision to begin with for the dashboard. Other metric users might prefer base units and can handle scaling in a smarter way, so keep this configurable.
155 lines
4.8 KiB
Elixir
155 lines
4.8 KiB
Elixir
defmodule Pleroma.Web.Telemetry do
|
|
use Supervisor
|
|
import Telemetry.Metrics
|
|
alias Pleroma.Stats
|
|
alias Pleroma.Config
|
|
|
|
def start_link(arg) do
|
|
Supervisor.start_link(__MODULE__, arg, name: __MODULE__)
|
|
end
|
|
|
|
@impl true
|
|
def init(_arg) do
|
|
children =
|
|
[
|
|
{:telemetry_poller, measurements: periodic_measurements(), period: 10_000}
|
|
] ++
|
|
prometheus_children()
|
|
|
|
Supervisor.init(children, strategy: :one_for_one)
|
|
end
|
|
|
|
defp prometheus_children do
|
|
config = Config.get([:instance, :export_prometheus_metrics], true)
|
|
|
|
if config do
|
|
[
|
|
{TelemetryMetricsPrometheus.Core, metrics: prometheus_metrics()},
|
|
Pleroma.PrometheusExporter
|
|
]
|
|
else
|
|
[]
|
|
end
|
|
end
|
|
|
|
# A seperate set of metrics for distributions because phoenix dashboard does NOT handle them well
|
|
defp distribution_metrics do
|
|
[
|
|
distribution(
|
|
"phoenix.router_dispatch.stop.duration",
|
|
# event_name: [:pleroma, :repo, :query, :total_time],
|
|
measurement: :duration,
|
|
unit: {:native, :second},
|
|
tags: [:route],
|
|
reporter_options: [
|
|
buckets: [0.1, 0.2, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000]
|
|
]
|
|
),
|
|
|
|
# Database Time Metrics
|
|
distribution(
|
|
"pleroma.repo.query.total_time",
|
|
# event_name: [:pleroma, :repo, :query, :total_time],
|
|
measurement: :total_time,
|
|
unit: {:native, :millisecond},
|
|
reporter_options: [
|
|
buckets: [0.1, 0.2, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000]
|
|
]
|
|
),
|
|
distribution(
|
|
"pleroma.repo.query.queue_time",
|
|
# event_name: [:pleroma, :repo, :query, :total_time],
|
|
measurement: :queue_time,
|
|
unit: {:native, :millisecond},
|
|
reporter_options: [
|
|
buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
|
|
]
|
|
),
|
|
distribution(
|
|
"oban_job_exception",
|
|
event_name: [:oban, :job, :exception],
|
|
measurement: :duration,
|
|
tags: [:worker],
|
|
tag_values: fn tags -> Map.put(tags, :worker, tags.job.worker) end,
|
|
unit: {:native, :second},
|
|
reporter_options: [
|
|
buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
|
|
]
|
|
),
|
|
distribution(
|
|
"tesla_request_completed",
|
|
event_name: [:tesla, :request, :stop],
|
|
measurement: :duration,
|
|
tags: [:response_code],
|
|
tag_values: fn tags -> Map.put(tags, :response_code, tags.env.status) end,
|
|
unit: {:native, :second},
|
|
reporter_options: [
|
|
buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
|
|
]
|
|
),
|
|
distribution(
|
|
"oban_job_completion",
|
|
event_name: [:oban, :job, :stop],
|
|
measurement: :duration,
|
|
tags: [:worker],
|
|
tag_values: fn tags -> Map.put(tags, :worker, tags.job.worker) end,
|
|
unit: {:native, :second},
|
|
reporter_options: [
|
|
buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
|
|
]
|
|
)
|
|
]
|
|
end
|
|
|
|
# Summary metrics are currently not (yet) supported by the prometheus exporter
|
|
defp summary_metrics(byte_unit) do
|
|
[
|
|
# Phoenix Metrics
|
|
summary("phoenix.endpoint.stop.duration",
|
|
unit: {:native, :millisecond}
|
|
),
|
|
summary("phoenix.router_dispatch.stop.duration",
|
|
tags: [:route],
|
|
unit: {:native, :millisecond}
|
|
),
|
|
summary("pleroma.repo.query.total_time", unit: {:native, :millisecond}),
|
|
summary("pleroma.repo.query.decode_time", unit: {:native, :millisecond}),
|
|
summary("pleroma.repo.query.query_time", unit: {:native, :millisecond}),
|
|
summary("pleroma.repo.query.queue_time", unit: {:native, :millisecond}),
|
|
summary("pleroma.repo.query.idle_time", unit: {:native, :millisecond}),
|
|
|
|
# VM Metrics
|
|
summary("vm.memory.total", unit: {:byte, byte_unit}),
|
|
summary("vm.total_run_queue_lengths.total"),
|
|
summary("vm.total_run_queue_lengths.cpu"),
|
|
summary("vm.total_run_queue_lengths.io")
|
|
]
|
|
end
|
|
|
|
defp common_metrics do
|
|
[
|
|
last_value("pleroma.local_users.total"),
|
|
last_value("pleroma.domains.total"),
|
|
last_value("pleroma.local_statuses.total"),
|
|
last_value("pleroma.remote_users.total")
|
|
]
|
|
end
|
|
|
|
def prometheus_metrics, do: common_metrics() ++ distribution_metrics()
|
|
def live_dashboard_metrics, do: common_metrics() ++ summary_metrics(:megabyte)
|
|
|
|
defp periodic_measurements do
|
|
[
|
|
{__MODULE__, :instance_stats, []}
|
|
]
|
|
end
|
|
|
|
def instance_stats do
|
|
stats = Stats.get_stats()
|
|
:telemetry.execute([:pleroma, :local_users], %{total: stats.user_count}, %{})
|
|
:telemetry.execute([:pleroma, :domains], %{total: stats.domain_count}, %{})
|
|
:telemetry.execute([:pleroma, :local_statuses], %{total: stats.status_count}, %{})
|
|
:telemetry.execute([:pleroma, :remote_users], %{total: stats.remote_user_count}, %{})
|
|
end
|
|
end
|