fix(gateway): log close code, correctly set latency, don't spam redis error on identify

This commit is contained in:
alyssa 2024-12-26 19:18:46 +00:00
parent d1b617f6be
commit 88e136d22a
5 changed files with 91 additions and 48 deletions

View file

@ -1,9 +1,10 @@
use futures::StreamExt;
use libpk::_config::ClusterSettings;
use metrics::counter;
use std::sync::{mpsc::Sender, Arc};
use tracing::{info, warn};
use tracing::{info, warn, error};
use twilight_gateway::{
create_iterator, ConfigBuilder, Event, EventTypeFlags, Shard, ShardId, StreamExt,
create_iterator, ConfigBuilder, Event, EventTypeFlags, Shard, ShardId, Message,
};
use twilight_model::gateway::{
payload::outgoing::update_presence::UpdatePresencePayload,
@ -77,48 +78,94 @@ pub fn create_shards(redis: fred::clients::RedisPool) -> anyhow::Result<Vec<Shar
pub async fn runner(
mut shard: Shard<RedisQueue>,
tx: Sender<(ShardId, Event)>,
_tx: Sender<(ShardId, String)>,
shard_state: ShardStateManager,
cache: Arc<DiscordCache>,
) {
//let _span = info_span!("shard_runner", shard_id = shard.id().number()).entered();
// let _span = info_span!("shard_runner", shard_id = shard.id().number()).entered();
info!("waiting for events");
while let Some(item) = shard.next_event(EventTypeFlags::all()).await {
match item {
Ok(event) => {
// event_type * shard_id is too many labels and prometheus fails to query it
// so we split it into two metrics
counter!(
"pluralkit_gateway_events_type",
"event_type" => serde_variant::to_variant_name(&event.kind()).unwrap(),
)
.increment(1);
counter!(
"pluralkit_gateway_events_shard",
"shard_id" => shard.id().number().to_string(),
)
.increment(1);
if let Err(error) = shard_state
.handle_event(shard.id().number(), event.clone())
.await
{
tracing::warn!(?error, "error updating redis state")
}
if let Event::Ready(_) = event {
if !cache.2.read().await.contains(&shard.id().number()) {
cache.2.write().await.push(shard.id().number());
while let Some(item) = shard.next().await {
let raw_event = match item {
Ok(evt) => match evt {
Message::Close(frame) => {
info!(
"shard {} closed: {}",
shard.id().number(),
if let Some(close) = frame {
format!("{} ({})", close.code, close.reason)
} else {
"unknown".to_string()
}
);
if let Err(error) = shard_state.socket_closed(shard.id().number()).await {
error!("failed to update shard state for socket closure: {error}");
}
continue;
}
cache.0.update(&event);
//if let Err(error) = tx.send((shard.id(), event)) {
// tracing::warn!(?error, "error sending event to global handler: {error}",);
//}
}
Message::Text(text) => text,
},
Err(error) => {
tracing::warn!(?error, "error receiving event from shard {}", shard.id());
continue;
}
};
let event = match twilight_gateway::parse(raw_event.clone(), EventTypeFlags::all()) {
Ok(Some(parsed)) => Event::from(parsed),
Ok(None) => {
// we received an event type unknown to twilight
// that's fine, we probably don't need it anyway
continue;
}
Err(error) => {
error!(
"shard {} failed to parse gateway event: {}",
shard.id().number(),
error
);
continue;
}
};
// log the event in metrics
// event_type * shard_id is too many labels and prometheus fails to query it
// so we split it into two metrics
counter!(
"pluralkit_gateway_events_type",
"event_type" => serde_variant::to_variant_name(&event.kind()).unwrap(),
)
.increment(1);
counter!(
"pluralkit_gateway_events_shard",
"shard_id" => shard.id().number().to_string(),
)
.increment(1);
// update shard state and discord cache
if let Err(error) = shard_state
.handle_event(shard.id().number(), event.clone())
.await
{
tracing::warn!(?error, "error updating redis state");
}
// need to do heartbeat separately, to get the latency
if let Event::GatewayHeartbeatAck = event
&& let Err(error) = shard_state
.heartbeated(shard.id().number(), shard.latency())
.await
{
tracing::warn!(?error, "error updating redis state for latency");
}
if let Event::Ready(_) = event {
if !cache.2.read().await.contains(&shard.id().number()) {
cache.2.write().await.push(shard.id().number());
}
}
cache.0.update(&event);
// okay, we've handled the event internally, let's send it to consumers
// tx.send((shard.id(), raw_event)).unwrap();
}
}