feat: add basic api/gateway metrics

This commit is contained in:
alyssa 2024-11-14 13:39:47 +09:00
parent 8831e8fabe
commit 93f8786da1
8 changed files with 141 additions and 53 deletions

70
Cargo.lock generated
View file

@ -89,7 +89,7 @@ dependencies = [
"anyhow",
"axum 0.7.5",
"fred",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-util",
"lazy_static",
"libpk",
@ -256,7 +256,7 @@ dependencies = [
"http 1.1.0",
"http-body 1.0.0",
"http-body-util",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-util",
"itoa",
"matchit",
@ -398,9 +398,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.6.0"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
[[package]]
name = "bytes-utils"
@ -1022,6 +1022,7 @@ dependencies = [
"metrics",
"prost",
"serde_json",
"serde_variant",
"signal-hook",
"tokio",
"tracing",
@ -1350,9 +1351,9 @@ dependencies = [
[[package]]
name = "hyper"
version = "1.3.1"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d"
checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a"
dependencies = [
"bytes",
"futures-channel",
@ -1391,7 +1392,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
dependencies = [
"futures-util",
"http 1.1.0",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-util",
"rustls 0.22.4",
"rustls-native-certs",
@ -1409,7 +1410,7 @@ checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333"
dependencies = [
"futures-util",
"http 1.1.0",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-util",
"rustls 0.23.10",
"rustls-pki-types",
@ -1421,20 +1422,19 @@ dependencies = [
[[package]]
name = "hyper-util"
version = "0.1.5"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56"
checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http 1.1.0",
"http-body 1.0.0",
"hyper 1.3.1",
"hyper 1.5.0",
"pin-project-lite",
"socket2 0.5.7",
"tokio",
"tower",
"tower-service",
"tracing",
]
@ -1753,7 +1753,7 @@ checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6"
dependencies = [
"base64 0.22.1",
"http-body-util",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-util",
"indexmap",
"ipnet",
@ -2523,7 +2523,7 @@ dependencies = [
"http 1.1.0",
"http-body 1.0.0",
"http-body-util",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-rustls 0.27.3",
"hyper-util",
"ipnet",
@ -2963,6 +2963,15 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_variant"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a0068df419f9d9b6488fdded3f1c818522cdea328e02ce9d9f147380265a432"
dependencies = [
"serde",
]
[[package]]
name = "sha-1"
version = "0.10.1"
@ -3584,9 +3593,9 @@ dependencies = [
[[package]]
name = "tokio-stream"
version = "0.1.11"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce"
checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1"
dependencies = [
"futures-core",
"pin-project-lite",
@ -3764,12 +3773,12 @@ dependencies = [
[[package]]
name = "tracing-log"
version = "0.1.3"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"lazy_static",
"log",
"once_cell",
"tracing-core",
]
@ -3785,9 +3794,9 @@ dependencies = [
[[package]]
name = "tracing-subscriber"
version = "0.3.16"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
@ -3813,11 +3822,12 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
[[package]]
name = "twilight-cache-inmemory"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"bitflags 2.5.0",
"dashmap",
"serde",
"tracing",
"twilight-model",
"twilight-util",
]
@ -3825,7 +3835,7 @@ dependencies = [
[[package]]
name = "twilight-gateway"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"bitflags 2.5.0",
"fastrand",
@ -3845,7 +3855,7 @@ dependencies = [
[[package]]
name = "twilight-gateway-queue"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"tokio",
"tracing",
@ -3854,12 +3864,12 @@ dependencies = [
[[package]]
name = "twilight-http"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"fastrand",
"http 1.1.0",
"http-body-util",
"hyper 1.3.1",
"hyper 1.5.0",
"hyper-rustls 0.26.0",
"hyper-util",
"percent-encoding",
@ -3875,7 +3885,7 @@ dependencies = [
[[package]]
name = "twilight-http-ratelimiting"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"tokio",
"tracing",
@ -3884,7 +3894,7 @@ dependencies = [
[[package]]
name = "twilight-model"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"bitflags 2.5.0",
"serde",
@ -3896,7 +3906,7 @@ dependencies = [
[[package]]
name = "twilight-util"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"twilight-model",
]
@ -3904,7 +3914,7 @@ dependencies = [
[[package]]
name = "twilight-validate"
version = "0.16.0-rc.1"
source = "git+https://github.com/pluralkit/twilight#41a71cc74441b54ed10e813e76ad040ce4c366e2"
source = "git+https://github.com/pluralkit/twilight#b346757b3054d461f5652b5ba0148a70eda5697e"
dependencies = [
"twilight-model",
]

View file

@ -133,11 +133,11 @@ async fn main() -> anyhow::Result<()> {
.route("/v2/members/:member_id/oembed.json", get(rproxy))
.route("/v2/groups/:group_id/oembed.json", get(rproxy))
.layer(axum::middleware::from_fn(middleware::logger))
.layer(middleware::ratelimit::ratelimiter(middleware::ratelimit::do_request_ratelimited)) // this sucks
.layer(axum::middleware::from_fn_with_state(ctx.clone(), middleware::authnz))
.layer(axum::middleware::from_fn(middleware::ignore_invalid_routes))
.layer(axum::middleware::from_fn(middleware::cors))
.layer(axum::middleware::from_fn(middleware::logger))
.layer(tower_http::catch_panic::CatchPanicLayer::custom(util::handle_panic))

View file

@ -8,6 +8,8 @@ use tracing::error;
use crate::ApiContext;
use super::logger::DID_AUTHENTICATE_HEADER;
pub async fn authnz(State(ctx): State<ApiContext>, mut request: Request, next: Next) -> Response {
let headers = request.headers_mut();
headers.remove("x-pluralkit-systemid");
@ -15,6 +17,7 @@ pub async fn authnz(State(ctx): State<ApiContext>, mut request: Request, next: N
.get("authorization")
.map(|h| h.to_str().ok())
.flatten();
let mut authenticated = false;
if let Some(auth_header) = auth_header {
if let Some(system_id) =
match libpk::db::repository::legacy_token_auth(&ctx.db, auth_header).await {
@ -29,7 +32,14 @@ pub async fn authnz(State(ctx): State<ApiContext>, mut request: Request, next: N
"x-pluralkit-systemid",
HeaderValue::from_str(format!("{system_id}").as_str()).unwrap(),
);
authenticated = true;
}
}
next.run(request).await
let mut response = next.run(request).await;
if authenticated {
response
.headers_mut()
.insert(DID_AUTHENTICATE_HEADER, HeaderValue::from_static("1"));
}
response
}

View file

@ -1,7 +1,7 @@
use std::time::Instant;
use axum::{extract::MatchedPath, extract::Request, middleware::Next, response::Response};
use metrics::histogram;
use metrics::{counter, histogram};
use tracing::{info, span, warn, Instrument, Level};
use crate::util::header_or_unknown;
@ -10,11 +10,12 @@ use crate::util::header_or_unknown;
// todo: change as necessary
const MIN_LOG_TIME: u128 = 2_000;
pub const DID_AUTHENTICATE_HEADER: &'static str = "x-pluralkit-didauthenticate";
pub async fn logger(request: Request, next: Next) -> Response {
let method = request.method().clone();
let request_id = header_or_unknown(request.headers().get("Fly-Request-Id"));
let remote_ip = header_or_unknown(request.headers().get("Fly-Client-IP"));
let remote_ip = header_or_unknown(request.headers().get("X-PluralKit-Client-IP"));
let user_agent = header_or_unknown(request.headers().get("User-Agent"));
let endpoint = request
@ -26,10 +27,9 @@ pub async fn logger(request: Request, next: Next) -> Response {
let uri = request.uri().clone();
let request_id_span = span!(
let request_span = span!(
Level::INFO,
"request",
request_id,
remote_ip,
method = method.as_str(),
endpoint = endpoint.clone(),
@ -37,9 +37,37 @@ pub async fn logger(request: Request, next: Next) -> Response {
);
let start = Instant::now();
let response = next.run(request).instrument(request_id_span).await;
let mut response = next.run(request).instrument(request_span).await;
let elapsed = start.elapsed().as_millis();
let authenticated = {
let headers = response.headers_mut();
println!("{:#?}", headers.keys());
if headers.contains_key(DID_AUTHENTICATE_HEADER) {
headers.remove(DID_AUTHENTICATE_HEADER);
true
} else {
false
}
};
counter!(
"pluralkit_api_requests",
"method" => method.to_string(),
"endpoint" => endpoint.clone(),
"status" => response.status().to_string(),
"authenticated" => authenticated.to_string(),
)
.increment(1);
histogram!(
"pluralkit_api_requests_bucket",
"method" => method.to_string(),
"endpoint" => endpoint.clone(),
"status" => response.status().to_string(),
"authenticated" => authenticated.to_string(),
)
.record(elapsed as f64 / 1_000_f64);
info!(
"{} handled request for {} {} in {}ms",
response.status(),
@ -47,15 +75,17 @@ pub async fn logger(request: Request, next: Next) -> Response {
endpoint,
elapsed
);
histogram!(
"pk_http_requests",
"method" => method.to_string(),
"route" => endpoint.clone(),
"status" => response.status().to_string()
)
.record((elapsed as f64) / 1_000_f64);
if elapsed > MIN_LOG_TIME {
counter!(
"pluralkit_api_slow_requests_count",
"method" => method.to_string(),
"endpoint" => endpoint.clone(),
"status" => response.status().to_string(),
"authenticated" => authenticated.to_string(),
)
.increment(1);
warn!(
"request to {} full path {} (endpoint {}) took a long time ({}ms)!",
method,

View file

@ -24,3 +24,5 @@ twilight-cache-inmemory = { workspace = true }
twilight-util = { workspace = true }
twilight-model = { workspace = true }
twilight-http = { workspace = true }
serde_variant = "0.1.3"

View file

@ -1,4 +1,5 @@
use libpk::_config::ClusterSettings;
use metrics::counter;
use std::sync::{mpsc::Sender, Arc};
use tracing::{info, warn};
use twilight_gateway::{
@ -85,6 +86,12 @@ pub async fn runner(
while let Some(item) = shard.next_event(EventTypeFlags::all()).await {
match item {
Ok(event) => {
counter!(
"pluralkit_gateway_events",
"shard_id" => shard.id().number().to_string(),
"event_type" => serde_variant::to_variant_name(&event.kind()).unwrap(),
)
.increment(1);
if let Err(error) = shard_state
.handle_event(shard.id().number(), event.clone())
.await

View file

@ -1,5 +1,6 @@
use bytes::Bytes;
use fred::{clients::RedisPool, interfaces::HashesInterface};
use metrics::{counter, gauge};
use prost::Message;
use tracing::info;
use twilight_gateway::Event;
@ -59,6 +60,13 @@ impl ShardStateManager {
shard_id,
if resumed { "resumed" } else { "ready" }
);
counter!(
"pluralkit_gateway_shard_reconnect",
"shard_id" => shard_id.to_string(),
"resumed" => resumed.to_string(),
)
.increment(1);
gauge!("pluralkit_gateway_shard_up").increment(1);
let mut info = self.get_shard(shard_id).await?;
info.last_connection = chrono::offset::Utc::now().timestamp() as i32;
info.up = true;
@ -68,6 +76,7 @@ impl ShardStateManager {
async fn socket_closed(&self, shard_id: u32) -> anyhow::Result<()> {
info!("shard {} closed", shard_id);
gauge!("pluralkit_gateway_shard_up").decrement(1);
let mut info = self.get_shard(shard_id).await?;
info.up = false;
info.disconnection_count += 1;

View file

@ -1,6 +1,9 @@
use std::time::Instant;
use axum::{extract::MatchedPath, extract::Request, middleware::Next, response::Response};
use axum::{
extract::MatchedPath, extract::Request, http::StatusCode, middleware::Next, response::Response,
};
use metrics::{counter, histogram};
use tracing::{info, span, warn, Instrument, Level};
// log any requests that take longer than 2 seconds
@ -30,13 +33,30 @@ pub async fn logger(request: Request, next: Next) -> Response {
let response = next.run(request).instrument(request_id_span).await;
let elapsed = start.elapsed().as_millis();
info!(
"{} handled request for {} {} in {}ms",
response.status(),
method,
uri.path(),
elapsed
);
counter!(
"pluralkit_gateway_cache_api_requests",
"method" => method.to_string(),
"endpoint" => endpoint.clone(),
"status" => response.status().to_string(),
)
.increment(1);
histogram!(
"pluralkit_gateway_cache_api_requests_bucket",
"method" => method.to_string(),
"endpoint" => endpoint.clone(),
"status" => response.status().to_string(),
)
.record(elapsed as f64 / 1_000_f64);
if response.status() != StatusCode::FOUND {
info!(
"{} handled request for {} {} in {}ms",
response.status(),
method,
uri.path(),
elapsed
);
}
if elapsed > MIN_LOG_TIME {
warn!(