PluralKit/crates/command_parser/src/string.rs
2025-10-15 20:39:50 +00:00

178 lines
6.4 KiB
Rust

use std::collections::HashMap;
use smol_str::{SmolStr, ToSmolStr};
lazy_static::lazy_static! {
// Dictionary of (left, right) quote pairs
// Each char in the string is an individual quote, multi-char strings imply "one of the following chars"
// Certain languages can have quote patterns that have a different character for open and close
pub static ref QUOTE_PAIRS: HashMap<SmolStr, SmolStr> = {
let mut pairs: HashMap<SmolStr, SmolStr> = HashMap::new();
let mut insert_pair = |a: &'static str, b: &'static str| {
let a = SmolStr::new_static(a);
let b = SmolStr::new_static(b);
pairs.insert(a.clone(), b.clone());
// make it easier to look up right quotes
for char in a.chars() {
pairs.insert(char.to_smolstr(), b.clone());
}
};
// Basic
insert_pair( "'", "'" ); // ASCII single quotes
insert_pair( "\"", "\"" ); // ASCII double quotes
// "Smart quotes"
// Specifically ignore the left/right status of the quotes and match any combination of them
// Left string also includes "low" quotes to allow for the low-high style used in some locales
insert_pair( "\u{201C}\u{201D}\u{201F}\u{201E}", "\u{201C}\u{201D}\u{201F}" ); // double quotes
insert_pair( "\u{2018}\u{2019}\u{201B}\u{201A}", "\u{2018}\u{2019}\u{201B}" ); // single quotes
// Chevrons (normal and "fullwidth" variants)
insert_pair( "\u{00AB}\u{300A}", "\u{00BB}\u{300B}" ); // double chevrons, pointing away (<<text>>)
insert_pair( "\u{00BB}\u{300B}", "\u{00AB}\u{300A}" ); // double chevrons, pointing together (>>text<<)
insert_pair( "\u{2039}\u{3008}", "\u{203A}\u{3009}" ); // single chevrons, pointing away (<text>)
insert_pair( "\u{203A}\u{3009}", "\u{2039}\u{3008}" ); // single chevrons, pointing together (>text<)
// Other
insert_pair( "\u{300C}\u{300E}", "\u{300D}\u{300F}" ); // corner brackets (Japanese/Chinese)
pairs
};
}
// very very simple quote matching
// expects match_str to be trimmed (no whitespace, from the start at least)
// returns the position of an end quote if any is found
// quotes need to be at start/end of words, and are ignored if a closing quote is not present
// WTB POSIX quoting: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html
fn find_quotes(match_str: &str) -> Option<usize> {
if let Some(right) = QUOTE_PAIRS.get(&match_str[0..match_str.ceil_char_boundary(1)]) {
// try matching end quote
for possible_quote in right.chars() {
for (pos, _) in match_str.match_indices(possible_quote) {
if match_str.len() == pos + 1
|| match_str.chars().nth(pos + 1).unwrap().is_whitespace()
{
return Some(pos);
}
}
}
}
None
}
#[derive(Debug)]
pub(super) struct MatchedParam<'a> {
pub(super) value: &'a str,
pub(super) next_pos: usize,
#[allow(dead_code)] // this'll prolly be useful sometime later
pub(super) in_quotes: bool,
}
pub(super) fn next_param<'a>(input: &'a str, current_pos: usize) -> Option<MatchedParam<'a>> {
if input.len() == current_pos {
return None;
}
let leading_whitespace_count =
input[..current_pos].len() - input[..current_pos].trim_start().len();
let substr_to_match = &input[current_pos + leading_whitespace_count..];
println!("stuff: {input} {current_pos} {leading_whitespace_count}");
println!("to match: {substr_to_match}");
if let Some(end_quote_pos) = find_quotes(substr_to_match) {
// return quoted string, without quotes
return Some(MatchedParam {
value: &substr_to_match[1..end_quote_pos],
next_pos: current_pos + end_quote_pos + 1,
in_quotes: true,
});
}
// find next whitespace character
for (pos, char) in substr_to_match.char_indices() {
if char.is_whitespace() {
return Some(MatchedParam {
value: &substr_to_match[..pos],
next_pos: current_pos + pos + 1,
in_quotes: false,
});
}
}
// if we're here, we went to EOF and didn't match any whitespace
// so we return the whole string
Some(MatchedParam {
value: substr_to_match,
next_pos: current_pos + substr_to_match.len(),
in_quotes: false,
})
}
#[derive(Debug, Clone)]
pub(super) struct MatchedFlag<'a> {
pub(super) name: &'a str,
pub(super) value: Option<&'a str>,
pub(super) next_pos: usize,
}
pub(super) fn next_flag<'a>(input: &'a str, mut current_pos: usize) -> Option<MatchedFlag<'a>> {
if input.len() == current_pos {
return None;
}
let leading_whitespace_count =
input[..current_pos].len() - input[..current_pos].trim_start().len();
let substr_to_match = &input[current_pos + leading_whitespace_count..];
// if the param is quoted, it should not be processed as a flag
if find_quotes(substr_to_match).is_some() {
return None;
}
println!("flag input {substr_to_match}");
// strip the -
let Some(substr_to_match) = substr_to_match.strip_prefix('-') else {
// if it doesn't have one, then it is not a flag
return None;
};
current_pos += 1;
// try finding = or whitespace
for (pos, char) in substr_to_match.char_indices() {
println!("flag find char {char} at {pos}");
if char == '=' {
let name = &substr_to_match[..pos];
println!("flag find {name}");
// try to get the value
let Some(param) = next_param(input, current_pos + pos + 1) else {
return Some(MatchedFlag {
name,
value: Some(""),
next_pos: current_pos + pos + 1,
});
};
return Some(MatchedFlag {
name,
value: Some(param.value),
next_pos: param.next_pos,
});
} else if char.is_whitespace() {
// no value if whitespace
return Some(MatchedFlag {
name: &substr_to_match[..pos],
value: None,
next_pos: current_pos + pos + 1,
});
}
}
// if eof then no value
Some(MatchedFlag {
name: substr_to_match,
value: None,
next_pos: current_pos + substr_to_match.len(),
})
}