chore: reorganize rust crates

This commit is contained in:
alyssa 2025-01-02 00:50:36 +00:00
parent 357122a892
commit 16ce67e02c
58 changed files with 6 additions and 13 deletions

30
crates/avatars/Cargo.toml Normal file
View file

@ -0,0 +1,30 @@
[package]
name = "avatars"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "avatar_cleanup"
path = "src/cleanup.rs"
[dependencies]
libpk = { path = "../libpk" }
anyhow = { workspace = true }
axum = { workspace = true }
futures = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
sqlx = { workspace = true }
time = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
data-encoding = "2.5.0"
gif = "0.13.1"
image = { version = "0.24.8", default-features = false, features = ["gif", "jpeg", "png", "webp", "tiff"] }
form_urlencoded = "1.2.1"
rust-s3 = { version = "0.33.0", default-features = false, features = ["tokio-rustls-tls"] }
sha2 = "0.10.8"
thiserror = "1.0.56"
webp = "0.2.6"

View file

@ -0,0 +1,146 @@
use anyhow::Context;
use reqwest::{ClientBuilder, StatusCode};
use sqlx::prelude::FromRow;
use std::{sync::Arc, time::Duration};
use tracing::{error, info};
libpk::main!("avatar_cleanup");
async fn real_main() -> anyhow::Result<()> {
let config = libpk::config
.avatars
.as_ref()
.expect("missing avatar service config");
let bucket = {
let region = s3::Region::Custom {
region: "s3".to_string(),
endpoint: config.s3.endpoint.to_string(),
};
let credentials = s3::creds::Credentials::new(
Some(&config.s3.application_id),
Some(&config.s3.application_key),
None,
None,
None,
)
.unwrap();
let bucket = s3::Bucket::new(&config.s3.bucket, region, credentials)?;
Arc::new(bucket)
};
let pool = libpk::db::init_data_db().await?;
loop {
// no infinite loops
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
match cleanup_job(pool.clone(), bucket.clone()).await {
Ok(()) => {}
Err(err) => {
error!("failed to run avatar cleanup job: {}", err);
// sentry
}
}
}
}
#[derive(FromRow)]
struct CleanupJobEntry {
id: String,
}
async fn cleanup_job(pool: sqlx::PgPool, bucket: Arc<s3::Bucket>) -> anyhow::Result<()> {
let mut tx = pool.begin().await?;
let image_id: Option<CleanupJobEntry> = sqlx::query_as(
r#"
select id from image_cleanup_jobs
where ts < now() - interval '1 day'
for update skip locked limit 1;"#,
)
.fetch_optional(&mut *tx)
.await?;
if image_id.is_none() {
info!("no job to run, sleeping for 1 minute");
tokio::time::sleep(tokio::time::Duration::from_secs(60)).await;
return Ok(());
}
let image_id = image_id.unwrap().id;
info!("got image {image_id}, cleaning up...");
let image_data = libpk::db::repository::avatars::get_by_id(&pool, image_id.clone()).await?;
if image_data.is_none() {
info!("image {image_id} was already deleted, skipping");
sqlx::query("delete from image_cleanup_jobs where id = $1")
.bind(image_id)
.execute(&mut *tx)
.await?;
return Ok(());
}
let image_data = image_data.unwrap();
let config = libpk::config
.avatars
.as_ref()
.expect("missing avatar service config");
let path = image_data
.url
.strip_prefix(config.cdn_url.as_str())
.unwrap();
let s3_resp = bucket.delete_object(path).await?;
match s3_resp.status_code() {
204 => {
info!("successfully deleted image {image_id} from s3");
}
_ => {
anyhow::bail!("s3 returned bad error code {}", s3_resp.status_code());
}
}
if let Some(zone_id) = config.cloudflare_zone_id.as_ref() {
let client = ClientBuilder::new()
.connect_timeout(Duration::from_secs(3))
.timeout(Duration::from_secs(3))
.build()
.context("error making client")?;
let cf_resp = client
.post(format!(
"https://api.cloudflare.com/client/v4/zones/{zone_id}/purge_cache"
))
.header(
"Authorization",
format!("Bearer {}", config.cloudflare_token.as_ref().unwrap()),
)
.body(format!(r#"{{"files":["{}"]}}"#, image_data.url))
.send()
.await?;
match cf_resp.status() {
StatusCode::OK => {
info!(
"successfully purged url {} from cloudflare cache",
image_data.url
);
}
_ => {
let status = cf_resp.status();
tracing::info!("raw response from cloudflare: {:#?}", cf_resp.text().await?);
anyhow::bail!("cloudflare returned bad error code {}", status);
}
}
}
sqlx::query("delete from images where id = $1")
.bind(image_id.clone())
.execute(&mut *tx)
.await?;
tx.commit().await?;
Ok(())
}

View file

@ -0,0 +1,21 @@
use std::fmt::Display;
use sha2::{Digest, Sha256};
#[derive(Debug)]
pub struct Hash([u8; 32]);
impl Hash {
pub fn sha256(data: &[u8]) -> Hash {
let mut hasher = Sha256::new();
hasher.update(data);
Hash(hasher.finalize().into())
}
}
impl Display for Hash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let encoding = data_encoding::BASE32_NOPAD;
write!(f, "{}", encoding.encode(&self.0[..16]).to_lowercase())
}
}

View file

@ -0,0 +1,26 @@
create table if not exists images
(
id text primary key,
url text not null,
original_url text,
original_file_size int,
original_type text,
original_attachment_id bigint,
file_size int not null,
width int not null,
height int not null,
kind text not null,
uploaded_at timestamptz not null,
uploaded_by_account bigint
);
create index if not exists images_original_url_idx on images (original_url);
create index if not exists images_original_attachment_id_idx on images (original_attachment_id);
create index if not exists images_uploaded_by_account_idx on images (uploaded_by_account);
create table if not exists image_queue (itemid serial primary key, url text not null, kind text not null);
alter table images add column if not exists uploaded_by_system uuid;
alter table images add column if not exists content_type text default 'image/webp';
create table image_cleanup_jobs(id text references images(id) on delete cascade);

257
crates/avatars/src/main.rs Normal file
View file

@ -0,0 +1,257 @@
mod hash;
mod migrate;
mod process;
mod pull;
mod store;
use anyhow::Context;
use axum::extract::State;
use axum::routing::get;
use axum::{
http::StatusCode,
response::{IntoResponse, Response},
routing::post,
Json, Router,
};
use libpk::_config::AvatarsConfig;
use libpk::db::repository::avatars as db;
use libpk::db::types::avatars::*;
use reqwest::{Client, ClientBuilder};
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::error::Error;
use std::sync::Arc;
use std::time::Duration;
use thiserror::Error;
use tracing::{error, info};
use uuid::Uuid;
#[derive(Error, Debug)]
pub enum PKAvatarError {
// todo: split off into logical groups (cdn/url error, image format error, etc)
#[error("invalid cdn url")]
InvalidCdnUrl,
#[error("discord cdn responded with status code: {0}")]
BadCdnResponse(reqwest::StatusCode),
#[error("network error: {0}")]
NetworkError(reqwest::Error),
#[error("response is missing header: {0}")]
MissingHeader(&'static str),
#[error("unsupported content type: {0}")]
UnsupportedContentType(String),
#[error("image file size too large ({0} > {1})")]
ImageFileSizeTooLarge(u64, u64),
#[error("unsupported image format: {0:?}")]
UnsupportedImageFormat(image::ImageFormat),
#[error("could not detect image format")]
UnknownImageFormat,
#[error("original image dimensions too large: {0:?} > {1:?}")]
ImageDimensionsTooLarge((u32, u32), (u32, u32)),
#[error("could not decode image, is it corrupted?")]
ImageFormatError(#[from] image::ImageError),
#[error("unknown error")]
InternalError(#[from] anyhow::Error),
}
#[derive(Deserialize, Debug)]
pub struct PullRequest {
url: String,
kind: ImageKind,
uploaded_by: Option<u64>, // should be String? serde makes this hard :/
system_id: Option<Uuid>,
#[serde(default)]
force: bool,
}
#[derive(Serialize)]
pub struct PullResponse {
url: String,
new: bool,
}
async fn pull(
State(state): State<AppState>,
Json(req): Json<PullRequest>,
) -> Result<Json<PullResponse>, PKAvatarError> {
let parsed = pull::parse_url(&req.url) // parsing beforehand to "normalize"
.map_err(|_| PKAvatarError::InvalidCdnUrl)?;
if !req.force {
if let Some(existing) = db::get_by_attachment_id(&state.pool, parsed.attachment_id).await? {
// remove any pending image cleanup
db::remove_deletion_queue(&state.pool, parsed.attachment_id).await?;
return Ok(Json(PullResponse {
url: existing.url,
new: false,
}));
}
}
let result = crate::pull::pull(state.pull_client, &parsed).await?;
let original_file_size = result.data.len();
let encoded = process::process_async(result.data, req.kind).await?;
let store_res = crate::store::store(&state.bucket, &encoded).await?;
let final_url = format!("{}{}", state.config.cdn_url, store_res.path);
let is_new = db::add_image(
&state.pool,
ImageMeta {
id: store_res.id,
url: final_url.clone(),
content_type: encoded.format.mime_type().to_string(),
original_url: Some(parsed.full_url),
original_type: Some(result.content_type),
original_file_size: Some(original_file_size as i32),
original_attachment_id: Some(parsed.attachment_id as i64),
file_size: encoded.data.len() as i32,
width: encoded.width as i32,
height: encoded.height as i32,
kind: req.kind,
uploaded_at: None,
uploaded_by_account: req.uploaded_by.map(|x| x as i64),
uploaded_by_system: req.system_id,
},
)
.await?;
Ok(Json(PullResponse {
url: final_url,
new: is_new,
}))
}
pub async fn stats(State(state): State<AppState>) -> Result<Json<Stats>, PKAvatarError> {
Ok(Json(db::get_stats(&state.pool).await?))
}
#[derive(Clone)]
pub struct AppState {
bucket: Arc<s3::Bucket>,
pull_client: Arc<Client>,
pool: PgPool,
config: Arc<AvatarsConfig>,
}
libpk::main!("avatars");
async fn real_main() -> anyhow::Result<()> {
let config = libpk::config
.avatars
.as_ref()
.expect("missing avatar service config");
let bucket = {
let region = s3::Region::Custom {
region: "s3".to_string(),
endpoint: config.s3.endpoint.to_string(),
};
let credentials = s3::creds::Credentials::new(
Some(&config.s3.application_id),
Some(&config.s3.application_key),
None,
None,
None,
)
.unwrap();
let bucket = s3::Bucket::new(&config.s3.bucket, region, credentials)?;
Arc::new(bucket)
};
let pull_client = Arc::new(
ClientBuilder::new()
.connect_timeout(Duration::from_secs(3))
.timeout(Duration::from_secs(3))
.user_agent("PluralKit-Avatars/0.1")
.build()
.context("error making client")?,
);
let pool = libpk::db::init_data_db().await?;
let state = AppState {
bucket,
pull_client,
pool,
config: Arc::new(config.clone()),
};
// migrations are done, disable this
// migrate::spawn_migrate_workers(Arc::new(state.clone()), state.config.migrate_worker_count);
let app = Router::new()
.route("/pull", post(pull))
.route("/stats", get(stats))
.with_state(state);
let host = &config.bind_addr;
info!("starting server on {}!", host);
let listener = tokio::net::TcpListener::bind(host).await.unwrap();
axum::serve(listener, app).await.unwrap();
Ok(())
}
struct AppError(anyhow::Error);
#[derive(Serialize)]
struct ErrorResponse {
error: String,
}
impl IntoResponse for AppError {
fn into_response(self) -> Response {
error!("error handling request: {}", self.0);
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: self.0.to_string(),
}),
)
.into_response()
}
}
impl IntoResponse for PKAvatarError {
fn into_response(self) -> Response {
let status_code = match self {
PKAvatarError::InternalError(_) | PKAvatarError::NetworkError(_) => {
StatusCode::INTERNAL_SERVER_ERROR
}
_ => StatusCode::BAD_REQUEST,
};
// print inner error if otherwise hidden
error!("error: {}", self.source().unwrap_or(&self));
(
status_code,
Json(ErrorResponse {
error: self.to_string(),
}),
)
.into_response()
}
}
impl<E> From<E> for AppError
where
E: Into<anyhow::Error>,
{
fn from(err: E) -> Self {
Self(err.into())
}
}

View file

@ -0,0 +1,146 @@
use crate::pull::parse_url;
use crate::{db, process, AppState, PKAvatarError};
use libpk::db::types::avatars::{ImageMeta, ImageQueueEntry};
use reqwest::StatusCode;
use std::error::Error;
use std::sync::Arc;
use std::time::Duration;
use time::Instant;
use tokio::sync::Semaphore;
use tracing::{error, info, instrument, warn};
static PROCESS_SEMAPHORE: Semaphore = Semaphore::const_new(100);
pub async fn handle_item_inner(
state: &AppState,
item: &ImageQueueEntry,
) -> Result<(), PKAvatarError> {
let parsed = parse_url(&item.url).map_err(|_| PKAvatarError::InvalidCdnUrl)?;
if let Some(_) = db::get_by_attachment_id(&state.pool, parsed.attachment_id).await? {
info!(
"attachment {} already migrated, skipping",
parsed.attachment_id
);
return Ok(());
}
let pulled = crate::pull::pull(state.pull_client.clone(), &parsed).await?;
let data_len = pulled.data.len();
let encoded = {
// Trying to reduce CPU load/potentially blocking the worker by adding a bottleneck on parallel encodes
// no semaphore on the main api though, that one should ideally be low latency
// todo: configurable?
let time_before_semaphore = Instant::now();
let permit = PROCESS_SEMAPHORE
.acquire()
.await
.map_err(|e| PKAvatarError::InternalError(e.into()))?;
let time_after_semaphore = Instant::now();
let semaphore_time = time_after_semaphore - time_before_semaphore;
if semaphore_time.whole_milliseconds() > 100 {
warn!(
"waited more than {} ms for process semaphore",
semaphore_time.whole_milliseconds()
);
}
let encoded = process::process_async(pulled.data, item.kind).await?;
drop(permit);
encoded
};
let store_res = crate::store::store(&state.bucket, &encoded).await?;
let final_url = format!("{}{}", state.config.cdn_url, store_res.path);
db::add_image(
&state.pool,
ImageMeta {
id: store_res.id,
url: final_url.clone(),
content_type: encoded.format.mime_type().to_string(),
original_url: Some(parsed.full_url),
original_type: Some(pulled.content_type),
original_file_size: Some(data_len as i32),
original_attachment_id: Some(parsed.attachment_id as i64),
file_size: encoded.data.len() as i32,
width: encoded.width as i32,
height: encoded.height as i32,
kind: item.kind,
uploaded_at: None,
uploaded_by_account: None,
uploaded_by_system: None,
},
)
.await?;
info!(
"migrated {} ({}k -> {}k)",
final_url,
data_len,
encoded.data.len()
);
Ok(())
}
pub async fn handle_item(state: &AppState) -> Result<(), PKAvatarError> {
// let queue_length = db::get_queue_length(&state.pool).await?;
// info!("migrate queue length: {}", queue_length);
if let Some((mut tx, item)) = db::pop_queue(&state.pool).await? {
match handle_item_inner(state, &item).await {
Ok(_) => {
tx.commit().await.map_err(Into::<anyhow::Error>::into)?;
Ok(())
}
Err(
// Errors that mean the image can't be migrated and doesn't need to be retried
e @ (PKAvatarError::ImageDimensionsTooLarge(_, _)
| PKAvatarError::UnknownImageFormat
| PKAvatarError::UnsupportedImageFormat(_)
| PKAvatarError::UnsupportedContentType(_)
| PKAvatarError::ImageFileSizeTooLarge(_, _)
| PKAvatarError::InvalidCdnUrl
| PKAvatarError::BadCdnResponse(StatusCode::NOT_FOUND | StatusCode::FORBIDDEN)),
) => {
warn!("error migrating {}, skipping: {}", item.url, e);
tx.commit().await.map_err(Into::<anyhow::Error>::into)?;
Ok(())
}
Err(e @ PKAvatarError::ImageFormatError(_)) => {
// will add this item back to the end of the queue
db::push_queue(&mut *tx, &item.url, item.kind).await?;
tx.commit().await.map_err(Into::<anyhow::Error>::into)?;
Err(e)
}
Err(e) => Err(e),
}
} else {
tokio::time::sleep(Duration::from_secs(5)).await;
Ok(())
}
}
#[instrument(skip(state))]
pub async fn worker(worker_id: u32, state: Arc<AppState>) {
info!("spawned migrate worker with id {}", worker_id);
loop {
match handle_item(&state).await {
Ok(()) => {}
Err(e) => {
error!(
"error in migrate worker {}: {}",
worker_id,
e.source().unwrap_or(&e)
);
tokio::time::sleep(Duration::from_secs(5)).await;
}
}
}
}
pub fn spawn_migrate_workers(state: Arc<AppState>, count: u32) {
for i in 0..count {
tokio::spawn(worker(i, state.clone()));
}
}

View file

@ -0,0 +1,257 @@
use image::{DynamicImage, ImageFormat};
use std::borrow::Cow;
use std::io::Cursor;
use time::Instant;
use tracing::{debug, error, info, instrument};
use crate::{hash::Hash, ImageKind, PKAvatarError};
const MAX_DIMENSION: u32 = 4000;
pub struct ProcessOutput {
pub width: u32,
pub height: u32,
pub hash: Hash,
pub format: ProcessedFormat,
pub data: Vec<u8>,
}
#[derive(Copy, Clone, Debug)]
pub enum ProcessedFormat {
Webp,
Gif,
}
impl ProcessedFormat {
pub fn mime_type(&self) -> &'static str {
match self {
ProcessedFormat::Gif => "image/gif",
ProcessedFormat::Webp => "image/webp",
}
}
pub fn extension(&self) -> &'static str {
match self {
ProcessedFormat::Webp => "webp",
ProcessedFormat::Gif => "gif",
}
}
}
// Moving Vec<u8> in here since the thread needs ownership of it now, it's fine, don't need it after
pub async fn process_async(data: Vec<u8>, kind: ImageKind) -> Result<ProcessOutput, PKAvatarError> {
tokio::task::spawn_blocking(move || process(&data, kind))
.await
.map_err(|je| PKAvatarError::InternalError(je.into()))?
}
#[instrument(skip_all)]
pub fn process(data: &[u8], kind: ImageKind) -> Result<ProcessOutput, PKAvatarError> {
let time_before = Instant::now();
let reader = reader_for(data);
match reader.format() {
Some(ImageFormat::Png | ImageFormat::WebP | ImageFormat::Jpeg | ImageFormat::Tiff) => {} // ok :)
Some(ImageFormat::Gif) => {
// animated gifs will need to be handled totally differently
// so split off processing here and come back if it's not applicable
// (non-banner gifs + 1-frame animated gifs still need to be webp'd)
if let Some(output) = process_gif(data, kind)? {
return Ok(output);
}
}
Some(other) => return Err(PKAvatarError::UnsupportedImageFormat(other)),
None => return Err(PKAvatarError::UnknownImageFormat),
}
// want to check dimensions *before* decoding so we don't accidentally end up with a memory bomb
// eg. a 16000x16000 png file is only 31kb and expands to almost a gig of memory
let (width, height) = assert_dimensions(reader.into_dimensions()?)?;
// need to make a new reader??? why can't it just use the same one. reduce duplication?
let reader = reader_for(data);
let time_after_parse = Instant::now();
// apparently `image` sometimes decodes webp images wrong/weird.
// see: https://discord.com/channels/466707357099884544/667795132971614229/1209925940835262464
// instead, for webp, we use libwebp itself to decode, as well.
// (pls no cve)
let image = if reader.format() == Some(ImageFormat::WebP) {
let webp_image = webp::Decoder::new(data).decode().ok_or_else(|| {
PKAvatarError::InternalError(anyhow::anyhow!("webp decode failed").into())
})?;
webp_image.to_image()
} else {
reader.decode().map_err(|e| {
// print the ugly error, return the nice error
error!("error decoding image: {}", e);
PKAvatarError::ImageFormatError(e)
})?
};
let time_after_decode = Instant::now();
let image = resize(image, kind);
let time_after_resize = Instant::now();
let encoded = encode(image);
let time_after = Instant::now();
info!(
"{}: lossy size {}K (parse: {} ms, decode: {} ms, resize: {} ms, encode: {} ms)",
encoded.hash,
encoded.data.len() / 1024,
(time_after_parse - time_before).whole_milliseconds(),
(time_after_decode - time_after_parse).whole_milliseconds(),
(time_after_resize - time_after_decode).whole_milliseconds(),
(time_after - time_after_resize).whole_milliseconds(),
);
debug!(
"processed image {}: {} bytes, {}x{} -> {} bytes, {}x{}",
encoded.hash,
data.len(),
width,
height,
encoded.data.len(),
encoded.width,
encoded.height
);
Ok(encoded)
}
fn assert_dimensions((width, height): (u32, u32)) -> Result<(u32, u32), PKAvatarError> {
if width > MAX_DIMENSION || height > MAX_DIMENSION {
return Err(PKAvatarError::ImageDimensionsTooLarge(
(width, height),
(MAX_DIMENSION, MAX_DIMENSION),
));
}
return Ok((width, height));
}
fn process_gif(input_data: &[u8], kind: ImageKind) -> Result<Option<ProcessOutput>, PKAvatarError> {
// gifs only supported for banners
if kind != ImageKind::Banner {
return Ok(None);
}
// and we can't rescale gifs (i tried :/) so the max size is the real limit
if kind != ImageKind::Banner {
return Ok(None);
}
let reader = gif::Decoder::new(Cursor::new(input_data)).map_err(Into::<anyhow::Error>::into)?;
let (max_width, max_height) = kind.size();
if reader.width() as u32 > max_width || reader.height() as u32 > max_height {
return Err(PKAvatarError::ImageDimensionsTooLarge(
(reader.width() as u32, reader.height() as u32),
(max_width, max_height),
));
}
Ok(process_gif_inner(reader).map_err(Into::<anyhow::Error>::into)?)
}
fn process_gif_inner(
mut reader: gif::Decoder<Cursor<&[u8]>>,
) -> Result<Option<ProcessOutput>, anyhow::Error> {
let time_before = Instant::now();
let (width, height) = (reader.width(), reader.height());
let mut writer = gif::Encoder::new(
Vec::new(),
width as u16,
height as u16,
reader.global_palette().unwrap_or(&[]),
)?;
writer.set_repeat(reader.repeat())?;
let mut frame_buf = Vec::new();
let mut frame_count = 0;
while let Some(frame) = reader.next_frame_info()? {
let mut frame = frame.clone();
assert_dimensions((frame.width as u32, frame.height as u32))?;
frame_buf.clear();
frame_buf.resize(reader.buffer_size(), 0);
reader.read_into_buffer(&mut frame_buf)?;
frame.buffer = Cow::Borrowed(&frame_buf);
frame.make_lzw_pre_encoded();
writer.write_lzw_pre_encoded_frame(&frame)?;
frame_count += 1;
}
if frame_count == 1 {
// If there's only one frame, then this doesn't need to be a gif. webp it
// (unfortunately we can't tell if there's only one frame until after the first frame's been decoded...)
return Ok(None);
}
let data = writer.into_inner()?;
let time_after = Instant::now();
let hash = Hash::sha256(&data);
let original_data = reader.into_inner();
info!(
"processed gif {}: {}K -> {}K ({} ms, frames: {})",
hash,
original_data.buffer().len() / 1024,
data.len() / 1024,
(time_after - time_before).whole_milliseconds(),
frame_count
);
Ok(Some(ProcessOutput {
data,
format: ProcessedFormat::Gif,
hash,
width: width as u32,
height: height as u32,
}))
}
fn reader_for(data: &[u8]) -> image::io::Reader<Cursor<&[u8]>> {
image::io::Reader::new(Cursor::new(data))
.with_guessed_format()
.expect("cursor i/o is infallible")
}
#[instrument(skip_all)]
fn resize(image: DynamicImage, kind: ImageKind) -> DynamicImage {
let (target_width, target_height) = kind.size();
if image.width() <= target_width && image.height() <= target_height {
// don't resize if already smaller
return image;
}
// todo: best filter?
let resized = image.resize(
target_width,
target_height,
image::imageops::FilterType::Lanczos3,
);
return resized;
}
#[instrument(skip_all)]
// can't believe this is infallible
fn encode(image: DynamicImage) -> ProcessOutput {
let (width, height) = (image.width(), image.height());
let image_buf = image.to_rgba8();
let encoded_lossy = webp::Encoder::new(&*image_buf, webp::PixelLayout::Rgba, width, height)
.encode_simple(false, 90.0)
.expect("encode should be infallible")
.to_vec();
let hash = Hash::sha256(&encoded_lossy);
ProcessOutput {
data: encoded_lossy,
format: ProcessedFormat::Webp,
hash,
width,
height,
}
}

166
crates/avatars/src/pull.rs Normal file
View file

@ -0,0 +1,166 @@
use std::time::Duration;
use std::{str::FromStr, sync::Arc};
use crate::PKAvatarError;
use anyhow::Context;
use reqwest::{Client, ClientBuilder, StatusCode, Url};
use time::Instant;
use tracing::{error, instrument};
const MAX_SIZE: u64 = 8 * 1024 * 1024;
pub struct PullResult {
pub data: Vec<u8>,
pub content_type: String,
pub last_modified: Option<String>,
}
#[instrument(skip_all)]
pub async fn pull(
client: Arc<Client>,
parsed_url: &ParsedUrl,
) -> Result<PullResult, PKAvatarError> {
let time_before = Instant::now();
let mut trimmed_url = trim_url_query(&parsed_url.full_url)?;
if trimmed_url.host_str() == Some("media.discordapp.net") {
trimmed_url
.set_host(Some("cdn.discordapp.com"))
.expect("set_host should not fail");
}
let response = client.get(trimmed_url.clone()).send().await.map_err(|e| {
error!("network error for {}: {}", parsed_url.full_url, e);
PKAvatarError::NetworkError(e)
})?;
let time_after_headers = Instant::now();
let status = response.status();
if status != StatusCode::OK {
return Err(PKAvatarError::BadCdnResponse(status));
}
let size = match response.content_length() {
None => return Err(PKAvatarError::MissingHeader("Content-Length")),
Some(size) if size > MAX_SIZE => {
return Err(PKAvatarError::ImageFileSizeTooLarge(size, MAX_SIZE))
}
Some(size) => size,
};
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|x| x.to_str().ok()) // invalid (non-unicode) header = missing, why not
.map(|mime| mime.split(';').next().unwrap_or("")) // cut off at ;
.ok_or(PKAvatarError::MissingHeader("Content-Type"))?
.to_owned();
let mime = match content_type.as_str() {
mime @ ("image/jpeg" | "image/png" | "image/gif" | "image/webp" | "image/tiff") => mime,
_ => return Err(PKAvatarError::UnsupportedContentType(content_type)),
};
let last_modified = response
.headers()
.get(reqwest::header::LAST_MODIFIED)
.and_then(|x| x.to_str().ok())
.map(|x| x.to_string());
let body = response.bytes().await.map_err(|e| {
error!("network error for {}: {}", parsed_url.full_url, e);
PKAvatarError::NetworkError(e)
})?;
if body.len() != size as usize {
// ???does this ever happen?
return Err(PKAvatarError::InternalError(anyhow::anyhow!(
"server responded with wrong length"
)));
}
let time_after_body = Instant::now();
let headers_time = time_after_headers - time_before;
let body_time = time_after_body - time_after_headers;
// can't do dynamic log level lmao
if status != StatusCode::OK {
tracing::warn!(
"{}: {} (headers: {}ms, body: {}ms)",
status,
&trimmed_url,
headers_time.whole_milliseconds(),
body_time.whole_milliseconds()
);
} else {
tracing::info!(
"{}: {} (headers: {}ms, body: {}ms)",
status,
&trimmed_url,
headers_time.whole_milliseconds(),
body_time.whole_milliseconds()
);
};
Ok(PullResult {
data: body.to_vec(),
content_type: mime.to_string(),
last_modified,
})
}
#[derive(Debug)]
pub struct ParsedUrl {
pub channel_id: u64,
pub attachment_id: u64,
pub filename: String,
pub full_url: String,
}
pub fn parse_url(url: &str) -> anyhow::Result<ParsedUrl> {
// todo: should this return PKAvatarError::InvalidCdnUrl?
let url = Url::from_str(url).context("invalid url")?;
match (url.scheme(), url.domain()) {
("https", Some("media.discordapp.net" | "cdn.discordapp.com")) => {}
_ => anyhow::bail!("not a discord cdn url"),
}
match url
.path_segments()
.map(|x| x.collect::<Vec<_>>())
.as_deref()
{
Some([_, channel_id, attachment_id, filename]) => {
let channel_id = u64::from_str(channel_id).context("invalid channel id")?;
let attachment_id = u64::from_str(attachment_id).context("invalid channel id")?;
Ok(ParsedUrl {
channel_id,
attachment_id,
filename: filename.to_string(),
full_url: url.to_string(),
})
}
_ => anyhow::bail!("invaild discord cdn url"),
}
}
fn trim_url_query(url: &str) -> anyhow::Result<Url> {
let mut parsed = Url::parse(url)?;
let mut qs = form_urlencoded::Serializer::new(String::new());
for (key, value) in parsed.query_pairs() {
match key.as_ref() {
"ex" | "is" | "hm" => {
qs.append_pair(key.as_ref(), value.as_ref());
}
_ => {}
}
}
let new_query = qs.finish();
parsed.set_query(if new_query.len() > 0 {
Some(&new_query)
} else {
None
});
Ok(parsed)
}

View file

@ -0,0 +1,60 @@
use crate::process::ProcessOutput;
use tracing::error;
pub struct StoreResult {
pub id: String,
pub path: String,
}
pub async fn store(bucket: &s3::Bucket, res: &ProcessOutput) -> anyhow::Result<StoreResult> {
// errors here are all going to be internal
let encoded_hash = res.hash.to_string();
let path = format!(
"images/{}/{}.{}",
&encoded_hash[..2],
&encoded_hash[2..],
res.format.extension()
);
// todo: something better than these retries
let mut retry_count = 0;
loop {
if retry_count == 2 {
tokio::time::sleep(tokio::time::Duration::new(2, 0)).await;
}
if retry_count > 2 {
anyhow::bail!("error uploading image to cdn, too many retries") // nicer user-facing error?
}
retry_count += 1;
let resp = bucket
.put_object_with_content_type(&path, &res.data, res.format.mime_type())
.await?;
match resp.status_code() {
200 => {
tracing::debug!("uploaded image to {}", &path);
return Ok(StoreResult {
id: encoded_hash,
path,
});
}
500 | 503 => {
tracing::warn!(
"got 503 uploading image to {} ({}), retrying... (try {}/3)",
&path,
resp.as_str()?,
retry_count
);
continue;
}
_ => {
error!(
"storage backend responded status code {}",
resp.status_code()
);
anyhow::bail!("error uploading image to cdn") // nicer user-facing error?
}
}
}
}