feat(stats): query http gateway, wait until gateway up to collect discord stats

This commit is contained in:
alyssa 2024-09-27 18:53:04 +09:00
parent e4ed354536
commit 9ff824c37b
7 changed files with 130 additions and 17 deletions

View file

@ -2,8 +2,9 @@ name: Build scheduled tasks runner Docker image
on:
push:
branches: [main]
branches: [main, gateway-service]
paths:
- .github/workflows/scheduled_tasks.yml
- 'services/scheduled_tasks/**'
jobs:

View file

@ -2,5 +2,4 @@ go 1.19
use (
./services/scheduled_tasks
./services/web-proxy
)

View file

@ -5,12 +5,15 @@ use axum::{
routing::get,
Router,
};
use serde_json::to_string;
use serde_json::{json, to_string};
use tracing::{error, info};
use twilight_model::guild::Permissions;
use twilight_model::id::Id;
use crate::discord::cache::{dm_channel, DiscordCache, DM_PERMISSIONS};
use crate::discord::{
cache::{dm_channel, DiscordCache, DM_PERMISSIONS},
gateway::cluster_config,
};
use std::sync::Arc;
fn status_code(code: StatusCode, body: String) -> Response {
@ -156,6 +159,17 @@ pub async fn run_server(cache: Arc<DiscordCache>) -> anyhow::Result<()> {
})
)
.route("/stats", get(|State(cache): State<Arc<DiscordCache>>| async move {
let cluster = cluster_config();
let has_been_up = cache.2.read().await.len() as u32 == if cluster.total_shards > 16 {16} else {cluster.total_shards};
let stats = json!({
"guild_count": cache.0.stats().guilds(),
"channel_count": cache.0.stats().channels(),
"up": has_been_up,
});
status_code(StatusCode::FOUND, to_string(&stats).unwrap())
}))
.layer(axum::middleware::from_fn(crate::logger::logger))
.with_state(cache);

View file

@ -1,6 +1,7 @@
use anyhow::format_err;
use lazy_static::lazy_static;
use std::sync::Arc;
use tokio::sync::RwLock;
use twilight_cache_inmemory::{
model::CachedMember,
permission::{MemberRoles, RootError},
@ -110,10 +111,14 @@ pub fn new() -> DiscordCache {
.build(),
);
DiscordCache(cache, client)
DiscordCache(cache, client, RwLock::new(Vec::new()))
}
pub struct DiscordCache(pub Arc<InMemoryCache>, pub Arc<twilight_http::Client>);
pub struct DiscordCache(
pub Arc<InMemoryCache>,
pub Arc<twilight_http::Client>,
pub RwLock<Vec<u32>>,
);
impl DiscordCache {
pub async fn guild_permissions(

View file

@ -1,3 +1,4 @@
use libpk::_config::ClusterSettings;
use std::sync::{mpsc::Sender, Arc};
use tracing::{info, warn};
use twilight_gateway::{
@ -13,6 +14,18 @@ use crate::discord::identify_queue::{self, RedisQueue};
use super::{cache::DiscordCache, shard_state::ShardStateManager};
pub fn cluster_config() -> ClusterSettings {
libpk::config
.discord
.cluster
.clone()
.unwrap_or(libpk::_config::ClusterSettings {
node_id: 0,
total_shards: 1,
total_nodes: 1,
})
}
pub fn create_shards(redis: fred::pool::RedisPool) -> anyhow::Result<Vec<Shard<RedisQueue>>> {
let intents = Intents::GUILDS
| Intents::DIRECT_MESSAGES
@ -23,16 +36,7 @@ pub fn create_shards(redis: fred::pool::RedisPool) -> anyhow::Result<Vec<Shard<R
let queue = identify_queue::new(redis);
let cluster_settings =
libpk::config
.discord
.cluster
.clone()
.unwrap_or(libpk::_config::ClusterSettings {
node_id: 0,
total_shards: 1,
total_nodes: 1,
});
let cluster_settings = cluster_config();
let (start_shard, end_shard): (u32, u32) = if cluster_settings.total_shards < 16 {
warn!("we have less than 16 shards, assuming single gateway process");
@ -77,6 +81,11 @@ pub async fn runner(
{
tracing::warn!(?error, "error updating redis state")
}
if let Event::Ready(_) = event {
if !cache.2.read().await.contains(&shard.id().number()) {
cache.2.write().await.push(shard.id().number());
}
}
cache.0.update(&event);
//if let Err(error) = tx.send((shard.id(), event)) {
// tracing::warn!(?error, "error sending event to global handler: {error}",);

View file

@ -5,8 +5,93 @@ import (
"encoding/json"
"fmt"
"log"
"io"
"os"
"net/http"
"strconv"
)
type httpstats struct {
Up bool `json:"up"`
GuildCount int `json:"guild_count"`
ChannelCount int `json:"channel_count"`
}
func query_http_cache() []httpstats {
var values []httpstats
url := os.Getenv("CONSUL_URL")
if url == "" {
panic("missing CONSUL_URL in environment")
}
expected_gateway_count, err := strconv.Atoi(os.Getenv("EXPECTED_GATEWAY_COUNT"))
if err != nil {
panic(fmt.Sprintf("missing or invalid EXPECTED_GATEWAY_COUNT in environment"))
}
resp, err := http.Get(fmt.Sprintf("%v/v1/health/service/pluralkit-gateway", url))
if err != nil {
panic(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
panic(fmt.Sprintf("got status %v trying to query consul for all_gateway_instances", resp.Status))
}
var ips []string
data, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
var cs []any
err = json.Unmarshal(data, &cs)
if err != nil {
panic(err)
}
if len(cs) != expected_gateway_count {
panic(fmt.Sprintf("got unexpected number of gateway instances from consul (expected %v, got %v)", expected_gateway_count, len(cs)))
}
for idx, itm := range cs {
if ip, ok := itm.(map[string]any)["Service"].(map[string]any)["Address"].(string); ok {
ips = append(ips, ip)
} else {
panic(fmt.Sprintf("got bad data from consul for all_gateway_instances, at index %v", idx))
}
}
log.Printf("querying %v gateway clusters for discord stats\n", len(ips))
for _, ip := range ips {
resp, err := http.Get("http://"+ip+":5000/stats")
if err != nil {
panic(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusFound {
panic(fmt.Sprintf("got status %v trying to query %v:5000", resp.Status, ip))
}
var s httpstats
data, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
err = json.Unmarshal(data, &s)
if err != nil {
panic(err)
}
if s.Up == false {
panic("gateway is not up yet, skipping stats collection")
}
values = append(values, s)
}
return values
}
type rstatval struct {
GuildCount int `json:"GuildCount"`
ChannelCount int `json:"ChannelCount"`

View file

@ -49,7 +49,7 @@ func update_db_message_meta() {
}
func get_discord_counts() (int, int) {
redisStats := run_redis_query()
redisStats := query_http_cache()
guild_count := 0
channel_count := 0