feat: new stats embed / api

2026-02-04 04:56:49 +00:00 · 2025-01-05 00:52:45 +00:00 · 2025-01-05 00:52:45 +00:00 · 9f8d3d22d2
commit 9f8d3d22d2
parent e88d6b7e2a
8 changed files with 215 additions and 100 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2925,6 +2925,7 @@ dependencies = [
 "num-format",
 "reqwest 0.12.8",
 "serde",
 "serde_json",
 "sqlx",
 "tokio",
 "tracing",
--- a/PluralKit.Bot/Commands/Misc.cs
+++ b/PluralKit.Bot/Commands/Misc.cs
@ -3,12 +3,11 @@ using System.Diagnostics;
 using App.Metrics;
 using Myriad.Builders;
 using Myriad.Cache;
 using Myriad.Gateway;
 using Myriad.Rest;
 using Myriad.Rest.Types.Requests;
 using Myriad.Types;
 using Newtonsoft.Json;
 using NodaTime;
 using PluralKit.Core;
@ -57,78 +56,85 @@ public class Misc
        var timeAfter = SystemClock.Instance.GetCurrentInstant();
        var apiLatency = timeAfter - timeBefore;
-        var embed = new EmbedBuilder();
+        var process = Process.GetCurrentProcess();
-
+        var stats = await GetStats(ctx);
        // todo: these will be inaccurate when the bot is actually multi-process
        var messagesReceived = _metrics.Snapshot.GetForContext("Bot").Meters
            .FirstOrDefault(m => m.MultidimensionalName == BotMetrics.MessagesReceived.Name)?.Value;
        if (messagesReceived != null)
            embed.Field(new Embed.Field("Messages processed",
                $"{messagesReceived.OneMinuteRate * 60:F1}/m ({messagesReceived.FifteenMinuteRate * 60:F1}/m over 15m)",
                true));
        var messagesProxied = _metrics.Snapshot.GetForContext("Bot").Meters
            .FirstOrDefault(m => m.MultidimensionalName == BotMetrics.MessagesProxied.Name)?.Value;
        if (messagesProxied != null)
            embed.Field(new Embed.Field("Messages proxied",
                $"{messagesProxied.OneMinuteRate * 60:F1}/m ({messagesProxied.FifteenMinuteRate * 60:F1}/m over 15m)",
                true));
        var commandsRun = _metrics.Snapshot.GetForContext("Bot").Meters
            .FirstOrDefault(m => m.MultidimensionalName == BotMetrics.CommandsRun.Name)?.Value;
        if (commandsRun != null)
            embed.Field(new Embed.Field("Commands executed",
                $"{commandsRun.OneMinuteRate * 60:F1}/m ({commandsRun.FifteenMinuteRate * 60:F1}/m over 15m)",
                true));
        var isCluster = _botConfig.Cluster != null && _botConfig.Cluster.TotalShards != ctx.Cluster.Shards.Count;
        var counts = await _repo.GetStats();
        var shards = await _shards.GetShards();
        var shardInfo = shards.Where(s => s.ShardId == ctx.ShardId).FirstOrDefault();
-        // todo: if we're running multiple processes, it is not useful to get the CPU/RAM usage of just the current one
+        var embed = new EmbedBuilder();
        var process = Process.GetCurrentProcess();
        var memoryUsage = process.WorkingSet64;
        var now = SystemClock.Instance.GetCurrentInstant().ToUnixTimeSeconds();
        var shardUptime = Duration.FromSeconds(now - shardInfo.LastConnection);
        var shardTotal = _botConfig.Cluster?.TotalShards ?? shards.Count();
        int shardClusterTotal = ctx.Cluster.Shards.Count;
        var shardUpTotal = shards.Where(x => x.Up).Count();
        embed
-            .Field(new Embed.Field("Current shard",
+            .Field(new("Connection status", $"**{shards.Count()}** shards across **{shards.Select(s => s.ClusterId).Distinct().Count()}** clusters\n"
-                $"Shard #{ctx.ShardId} (of {shardTotal} total,"
+                                            + $"Current server is on **shard {ctx.ShardId} (cluster {shardInfo.ClusterId ?? 0})**\n"
-                    + (isCluster ? $" {shardClusterTotal} in this cluster," : "") + $" {shardUpTotal} are up)"
+                                            + $"Latency: API **{apiLatency.TotalMilliseconds:F0}ms** (p90: {stats.prom.nirn_proxy_latency_p90 * 1000:F0}ms, p99: {stats.prom.nirn_proxy_latency_p99 * 1000:F0}ms), "
-                , true))
+                                            + $"shard **{shardInfo.Latency}ms** (avg: {stats.prom.shard_latency_average}ms)", true))
-            .Field(new Embed.Field("Shard uptime",
+            .Field(new("Resource usage", $"**CPU:** {stats.prom.cpu_used}% used / {stats.prom.cpu_total_cores} total cores ({stats.prom.cpu_total_threads} threads)\n"
-                $"{shardUptime.FormatDuration()} ({shardInfo.DisconnectionCount} disconnections)", true))
+                                        + $"**Memory:** {(stats.prom.memory_used / 1_000_000_000):N1}GB used / {(stats.prom.memory_total / 1_000_000_000):N1}GB total", true))
-            .Field(new Embed.Field("CPU usage", $"{_cpu.LastCpuMeasure:P1}", true))
+            .Field(new("Usage metrics", $"Messages received: **{stats.prom.messages_1m}/s** ({stats.prom.messages_15m}/s over 15m)\n" +
-            .Field(new Embed.Field("Memory usage", $"{memoryUsage / 1024 / 1024} MiB", true))
+                                        $"Messages proxied: **{stats.prom.proxy_1m}/s** ({stats.prom.proxy_15m}/s over 15m, {stats.db.messages_24h} total in last 24h)\n" +
-            .Field(new Embed.Field("Latency",
+                                        $"Commands executed: **{stats.prom.commands_1m}/m** ({stats.prom.commands_15m}/m over 15m)"));
                $"API: {apiLatency.TotalMilliseconds:F0} ms, shard: {shardInfo.Latency} ms",
                true));
-        embed.Field(new("Total numbers", $" {counts.SystemCount:N0} systems,"
+        embed.Field(new("Total numbers", $"**{stats.db.systems:N0}** systems, **{stats.db.members:N0}** members, **{stats.db.groups:N0}** groups, "
-                                       + $" {counts.MemberCount:N0} members,"
+                                       + $"**${stats.db.switches:N0}** switches, **{stats.db.messages:N0}** messages\n" +
-                                       + $" {counts.GroupCount:N0} groups,"
+                                         $"**{stats.db.guilds:N0}** servers with **{stats.db.channels:N0}** channels"));
                                       + $" {counts.SwitchCount:N0} switches,"
                                       + $" {counts.MessageCount:N0} messages"));
-        embed
+        embed.Footer(Help.helpEmbed.Footer);
-            .Footer(new(String.Join(" \u2022 ", new[] {
+
-                $"PluralKit {BuildInfoService.Version}",
+        var uptime = ((DateTimeOffset)process.StartTime).ToUnixTimeSeconds();
-                (isCluster ? $"Cluster {_botConfig.Cluster.NodeIndex}" : ""),
+        embed.Description($"### PluralKit [{BuildInfoService.Version}](https://github.com/pluralkit/pluralkit/commit/{BuildInfoService.FullVersion})\n" +
-                "https://github.com/PluralKit/PluralKit",
+                          $"Built on <t:{BuildInfoService.Timestamp}> (<t:{BuildInfoService.Timestamp}:R>)"
-                "Last restarted:",
+                        + (BuildInfoService.IsDev ? ", **development build**" : "")
-            })))
+                        + $"\nLast restart: <t:{uptime}:R>");
            .Timestamp(process.StartTime.ToString("O"));
        await ctx.Rest.EditMessage(msg.ChannelId, msg.Id,
            new MessageEditRequest { Content = "", Embeds = new[] { embed.Build() } });
    }
    private async Task<Stats> GetStats(Context ctx)
    {
        var db = ctx.Redis.Connection.GetDatabase();
        var data = await db.StringGetAsync("statsapi");
        return JsonConvert.DeserializeObject<Stats>(data);
    }
 }
 // none of these fields are "assigned to" for some reason
 #pragma warning disable CS0649
 class Stats
 {
    public DbStats db;
    public PrometheusStats prom;
 };
 class DbStats
 {
    public double systems;
    public double members;
    public double groups;
    public double switches;
    public double messages;
    public double messages_24h;
    public double guilds;
    public double channels;
 };
 class PrometheusStats
 {
    public double messages_1m;
    public double messages_15m;
    public double proxy_1m;
    public double proxy_15m;
    public double commands_1m;
    public double commands_15m;
    public double cpu_total_cores;
    public double cpu_total_threads;
    public double cpu_used;
    public double memory_total;
    public double memory_used;
    public double nirn_proxy_rps;
    public double nirn_proxy_latency_p90;
    public double nirn_proxy_latency_p99;
    public double shard_latency_average;
 };
--- a/PluralKit.Core/PluralKit.Core.csproj
+++ b/PluralKit.Core/PluralKit.Core.csproj
@ -57,7 +57,7 @@
    </ItemGroup>
    <Target Name="SetSourceRevisionId" BeforeTargets="InitializeSourceControlInformation">
-        <Exec Command="git rev-parse HEAD &gt; ../.version" IgnoreExitCode="False">
+        <Exec Command="git rev-parse HEAD &gt; ../.version &amp; git show --no-patch --format=%at $(git rev-parse HEAD) &gt;&gt; ../.version &amp; (git diff-index --quiet HEAD -- &amp;&amp; echo 1) &gt;&gt; ../.version" IgnoreExitCode="True">
        </Exec>
    </Target>
--- a/PluralKit.Core/Services/BuildInfoService.cs
+++ b/PluralKit.Core/Services/BuildInfoService.cs
@ -4,20 +4,21 @@ public static class BuildInfoService
 {
    public static string Version { get; private set; }
    public static string FullVersion { get; private set; }
    public static string Timestamp { get; private set; }
    public static bool IsDev { get; private set; }
    public static async Task LoadVersion()
    {
-        using (var stream = typeof(BuildInfoService).Assembly.GetManifestResourceStream("version"))
+        using var stream = typeof(BuildInfoService).Assembly.GetManifestResourceStream("version");
-        {
+        if (stream == null) throw new Exception("missing version information");
            // if this happens, something broke
            if (stream == null) FullVersion = "(unknown version) ";
            else
                using (var reader = new StreamReader(stream))
                    FullVersion = await reader.ReadToEndAsync();
        }
-        // cheap hack to remove newline
+        using var reader = new StreamReader(stream);
-        FullVersion = FullVersion.Remove(FullVersion.Length - 1);
+        var data = (await reader.ReadToEndAsync()).Split("\n");
        FullVersion = data[0];
        Timestamp = data[1];
        IsDev = data[2] == "";
        // show only short commit hash to users
        Version = FullVersion.Remove(7);
--- a/crates/api/src/endpoints/private.rs
+++ b/crates/api/src/endpoints/private.rs
@ -31,29 +31,16 @@ pub async fn discord_state(State(ctx): State<ApiContext>) -> Json<Value> {
 }
 pub async fn meta(State(ctx): State<ApiContext>) -> Json<Value> {
-    let cluster_stats = ctx
+    let stats = serde_json::from_str::<Value>(
-        .redis
+        ctx.redis
-        .hgetall::<HashMap<String, String>, &str>("pluralkit:cluster_stats")
+            .get::<String, &'static str>("statsapi")
            .await
            .unwrap()
-        .values()
+            .as_str(),
-        .map(|v| serde_json::from_str(v).unwrap())
+    )
-        .collect::<Vec<ClusterStats>>();
+    .unwrap();
-    let db_stats = libpk::db::repository::get_stats(&ctx.db).await.unwrap();
+    Json(stats)
    let guild_count: i32 = cluster_stats.iter().map(|v| v.guild_count).sum();
    let channel_count: i32 = cluster_stats.iter().map(|v| v.channel_count).sum();
    Json(json!({
        "system_count": db_stats.system_count,
        "member_count": db_stats.member_count,
        "group_count": db_stats.group_count,
        "switch_count": db_stats.switch_count,
        "message_count": db_stats.message_count,
        "guild_count": guild_count,
        "channel_count": channel_count,
    }))
 }
 use std::time::Duration;
--- a/crates/scheduled_tasks/Cargo.toml
+++ b/crates/scheduled_tasks/Cargo.toml
@ -12,6 +12,7 @@ fred = { workspace = true }
 metrics = { workspace = true }
 reqwest = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 sqlx = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
--- a/crates/scheduled_tasks/src/main.rs
+++ b/crates/scheduled_tasks/src/main.rs
@ -74,7 +74,6 @@ async fn real_main() -> anyhow::Result<()> {
        "message stats updater",
        update_db_message_meta
    );
    // every minute
    doforever!("* * * * *", "discord stats updater", update_discord_stats);
    // on :00 and :30
    doforever!(
@ -82,6 +81,7 @@ async fn real_main() -> anyhow::Result<()> {
        "queue deleted image cleanup job",
        queue_deleted_image_cleanup
    );
    doforever!("0,30 * * * * *", "stats api updater", update_stats_api);
    set.join_next()
        .await
--- a/crates/scheduled_tasks/src/tasks.rs
+++ b/crates/scheduled_tasks/src/tasks.rs
@ -149,3 +149,122 @@ select id, now() from images where
        .await?;
    Ok(())
 }
 pub async fn update_stats_api(ctx: AppCtx) -> anyhow::Result<()> {
    let client = ClientBuilder::new()
        .connect_timeout(Duration::from_secs(3))
        .timeout(Duration::from_secs(3))
        .build()
        .expect("error making client");
    #[derive(serde::Deserialize, Debug)]
    struct PrometheusResult {
        data: PrometheusResultData,
    }
    #[derive(serde::Deserialize, Debug)]
    struct PrometheusResultData {
        result: Vec<PrometheusData>,
    }
    #[derive(serde::Deserialize, Debug)]
    struct PrometheusData {
        value: Vec<serde_json::Value>,
    }
    macro_rules! prom_instant_query {
        ($t:ty, $q:expr) => {{
            let resp = client
                .get(format!(
                    "http://vm.svc.pluralkit.net/select/0/prometheus/api/v1/query?query={}",
                    $q
                ))
                .send()
                .await?;
            let data = resp.json::<PrometheusResult>().await?;
            data.data
                .result
                .get(0)
                .expect("missing data")
                .value
                .clone()
                .get(1)
                .expect("missing data")
                .as_str()
                .expect("invalid data")
                .parse::<$t>()?
        }};
        ($t:ty, $q:expr, $wrap:expr) => {{
            let val = prom_instant_query!($t, $q);
            let val = (val * $wrap).round() / $wrap;
            format!("{:.2}", val).parse::<f64>().unwrap()
        }};
    }
    #[derive(serde::Serialize, sqlx::FromRow)]
    struct DbStats {
        systems: i64,
        members: i64,
        groups: i64,
        switches: i64,
        messages: i64,
        messages_24h: i64,
        guilds: i64,
        channels: i64,
    }
    let db_stats: DbStats = sqlx::query_as(r#"
        select
            t1.value as systems,
            t2.value as members,
            t3.value as groups,
            t4.value as switches,
            t5.value as messages,
            (t5.value - t6.value) as messages_24h,
            t7.value as guilds,
            t8.value as channels
        from
            (select value from systems order by timestamp desc limit 1) as t1,
            (select value from members order by timestamp desc limit 1) as t2,
            (select value from groups order by timestamp desc limit 1) as t3,
            (select value from switches order by timestamp desc limit 1) as t4,
            (select value from messages order by timestamp desc limit 1) as t5,
            (select value from messages where timestamp > now() - interval '1 day' order by timestamp asc limit 1) as t6,
            (select value from guilds order by timestamp desc limit 1) as t7,
            (select value from channels order by timestamp desc limit 1) as t8
    "#).fetch_one(&ctx.stats).await?;
    let data = serde_json::json!({
        "db": db_stats,
        "prom": {
            "messages_1m": prom_instant_query!(f32, "sum(bot__messages_processed_rate1m)", 10.0),
            "messages_15m": prom_instant_query!(f32, "sum(bot__messages_processed_rate15m)", 10.0),
            "proxy_1m": prom_instant_query!(f32, "sum(bot__messages_proxied_rate1m)", 10.0),
            "proxy_15m": prom_instant_query!(f32, "sum(bot__messages_proxied_rate15m)", 10.0),
            "commands_1m": prom_instant_query!(f32, "sum(bot__commands_run_rate1m)", 10.0),
            "commands_15m": prom_instant_query!(f32, "sum(bot__commands_run_rate15m)", 10.0),
            "cpu_total_cores": prom_instant_query!(usize, "sum(host_physical_cpus)"),
            "cpu_total_threads": prom_instant_query!(usize, "sum(host_logical_cpus)"),
            "cpu_used": prom_instant_query!(f32, "100 * ((sum(host_logical_cpus) - sum(rate(host_cpu_seconds_total{mode=\"idle\"}[1m]))) / sum(host_logical_cpus)) * sum(host_logical_cpus)", 10.0),
            "memory_total": prom_instant_query!(i64, "sum(host_memory_total_bytes)").to_string(),
            "memory_used": prom_instant_query!(i64, "sum(host_memory_total_bytes) - sum(host_memory_available_bytes)").to_string(),
            "nirn_proxy_rps": prom_instant_query!(f32, "sum(rate(nirn_proxy_requests_count))", 10.0),
            "nirn_proxy_latency_p90": prom_instant_query!(f32, "histogram_quantile(0.9, sum(rate(nirn_proxy_requests_bucket[5m])) by (le))", 1000.0),
            "nirn_proxy_latency_p99": prom_instant_query!(f32, "histogram_quantile(0.99, sum(rate(nirn_proxy_requests_bucket[5m])) by (le))", 1000.0),
            "shard_latency_average": prom_instant_query!(f32, "avg(pluralkit_gateway_shard_latency)", 10.0),
        }
    });
    ctx.redis
        .set::<(), &str, String>(
            "statsapi",
            serde_json::to_string(&data).expect("should not fail"),
            // Some(fred::types::Expiration::EX(60)),
            None,
            None,
            false,
        )
        .await?;
    Ok(())
 }