Merge remote-tracking branch 'libglfw/rust-command-parser' into rust-command-parser

This commit is contained in:
dusk 2025-01-03 20:18:35 +09:00
commit 7bd50db63e
No known key found for this signature in database
12 changed files with 575 additions and 1 deletions

View file

@ -1,8 +1,8 @@
[workspace]
resolver = "2"
members = [
"./crates/*"
]
resolver = "2"
[workspace.dependencies]
anyhow = "1"

View file

@ -51,6 +51,17 @@ public class Context
Parameters = new Parameters(message.Content?.Substring(commandParseOffset));
Rest = provider.Resolve<DiscordApiClient>();
Cluster = provider.Resolve<Cluster>();
try
{
Parameters = new ParametersFFI(message.Content?.Substring(commandParseOffset));
}
catch (PKError e)
{
// todo: not this
Reply($"{Emojis.Error} {e.Message}");
throw;
}
}
public readonly IDiscordCache Cache;

View file

@ -0,0 +1,66 @@
using uniffi.commands;
namespace PluralKit.Bot;
public class ParametersFFI
{
private string _cb { get; init; }
private List<string> _args { get; init; }
public int _ptr = -1;
private Dictionary<string, string?> _flags { get; init; }
// just used for errors, temporarily
public string FullCommand { get; init; }
public ParametersFFI(string cmd)
{
FullCommand = cmd;
var result = CommandsMethods.ParseCommand(cmd);
if (result is CommandResult.Ok)
{
var command = ((CommandResult.Ok)result).@command;
_cb = command.@commandRef;
_args = command.@args;
_flags = command.@flags;
}
else
{
throw new PKError(((CommandResult.Err)result).@error);
}
}
public string Pop()
{
if (_args.Count > _ptr + 1) Console.WriteLine($"pop: {_ptr + 1}, {_args[_ptr + 1]}");
else Console.WriteLine("pop: no more arguments");
if (_args.Count() == _ptr + 1) return "";
_ptr++;
return _args[_ptr];
}
public string Peek()
{
if (_args.Count > _ptr + 1) Console.WriteLine($"peek: {_ptr + 1}, {_args[_ptr + 1]}");
else Console.WriteLine("peek: no more arguments");
if (_args.Count() == _ptr + 1) return "";
return _args[_ptr + 1];
}
// this might not work quite right
public string PeekWithPtr(ref int ptr)
{
return _args[ptr];
}
public ISet<string> Flags()
{
return new HashSet<string>(_flags.Keys);
}
// parsed differently in new commands, does this work right?
// note: skipFlags here does nothing
public string Remainder(bool skipFlags = false)
{
return Pop();
}
}

View file

@ -4,6 +4,7 @@
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<Nullable>annotations</Nullable>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>

15
lib/commands/Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "commands"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
lazy_static = { workspace = true }
uniffi = { version = "0.25" }
[build-dependencies]
uniffi = { version = "0.25", features = [ "build" ] }

3
lib/commands/build.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
uniffi::generate_scaffolding("src/commands.udl").unwrap();
}

View file

@ -0,0 +1,13 @@
namespace commands {
CommandResult parse_command(string input);
};
[Enum]
interface CommandResult {
Ok(ParsedCommand command);
Err(string error);
};
dictionary ParsedCommand {
string command_ref;
sequence<string> args;
record<string, string?> flags;
};

288
lib/commands/src/lib.rs Normal file
View file

@ -0,0 +1,288 @@
#![feature(let_chains)]
use core::panic;
use std::{cmp::Ordering, collections::HashMap};
uniffi::include_scaffolding!("commands");
mod string;
mod token;
use token::*;
// todo!: move all this stuff into a different file
// lib.rs should just have exported symbols and command definitions
#[derive(Debug, Clone)]
struct TreeBranch {
current_command_key: Option<String>,
/// branches.keys(), but sorted by specificity
possible_tokens: Vec<Token>,
branches: HashMap<Token, TreeBranch>,
}
impl TreeBranch {
fn register_command(&mut self, command: Command) {
let mut current_branch = self;
// iterate over tokens in command
for token in command.tokens {
// recursively get or create a sub-branch for each token
current_branch = current_branch.branches.entry(token).or_insert(TreeBranch {
current_command_key: None,
possible_tokens: vec![],
branches: HashMap::new(),
})
}
// when we're out of tokens, add an Empty branch with the callback and no sub-branches
current_branch.branches.insert(
Token::Empty,
TreeBranch {
current_command_key: Some(command.cb),
possible_tokens: vec![],
branches: HashMap::new(),
},
);
}
fn sort_tokens(&mut self) {
for branch in self.branches.values_mut() {
branch.sort_tokens();
}
// put Value tokens at the end
// i forget exactly how this works
// todo!: document this before PR mergs
self.possible_tokens = self
.branches
.keys()
.into_iter()
.map(|v| v.clone())
.collect();
self.possible_tokens.sort_by(|v, _| {
if matches!(v, Token::Value(_)) {
Ordering::Greater
} else {
Ordering::Less
}
});
}
}
#[derive(Clone)]
struct Command {
tokens: Vec<Token>,
help: String,
cb: String,
}
fn command(tokens: &[&Token], help: &str, cb: &str) -> Command {
Command {
tokens: tokens.iter().map(|&x| x.clone()).collect(),
help: help.to_string(),
cb: cb.to_string(),
}
}
mod commands {
use super::Token;
use super::command;
use super::Token::*;
fn cmd(value: &str) -> Token {
Token::Value(vec![value.to_string()])
}
pub fn cmd_with_alias(value: &[&str]) -> Token {
Token::Value(value.iter().map(|x| x.to_string()).collect())
}
// todo: this needs to have less ampersands -alyssa
pub fn happy() -> Vec<super::Command> {
let system = &cmd_with_alias(&["system", "s"]);
let member = &cmd_with_alias(&["member", "m"]);
let description = &cmd_with_alias(&["description", "desc"]);
let privacy = &cmd_with_alias(&["privacy", "priv"]);
vec![
command(&[&cmd("help")], "help", "Shows the help command"),
command(
&[system],
"system_show",
"Shows information about your system",
),
command(&[system, &cmd("new")], "system_new", "Creates a new system"),
command(
&[member, &cmd_with_alias(&["new", "n"])],
"member_new",
"Creates a new system member",
),
command(
&[member, &MemberRef],
"member_show",
"Shows information about a member",
),
command(
&[member, &MemberRef, description],
"member_desc_show",
"Shows a member's description",
),
command(
&[member, &MemberRef, description, &FullString],
"member_desc_update",
"Changes a member's description",
),
command(
&[member, &MemberRef, privacy],
"member_privacy_show",
"Displays a member's current privacy settings",
),
command(
&[
member,
&MemberRef,
privacy,
&MemberPrivacyTarget,
&PrivacyLevel,
],
"member_privacy_update",
"Changes a member's privacy settings",
),
]
}
}
lazy_static::lazy_static! {
static ref COMMAND_TREE: TreeBranch = {
let mut tree = TreeBranch {
current_command_key: None,
possible_tokens: vec![],
branches: HashMap::new(),
};
commands::happy().iter().for_each(|x| tree.register_command(x.clone()));
tree.sort_tokens();
// println!("{{tree:#?}}");
tree
};
}
pub enum CommandResult {
Ok { command: ParsedCommand },
Err { error: String },
}
pub struct ParsedCommand {
pub command_ref: String,
pub args: Vec<String>,
pub flags: HashMap<String, Option<String>>,
}
/// Find the next token from an either raw or partially parsed command string
///
/// Returns:
/// - matched token, to move deeper into the tree
/// - matched value (if this command matched an user-provided value such as a member name)
/// - end position of matched token
/// - optionally a short-circuit error
fn next_token(
possible_tokens: Vec<Token>,
input: String,
current_pos: usize,
) -> Result<(Token, Option<String>, usize), Option<String>> {
// get next parameter, matching quotes
let param = crate::string::next_param(input.clone(), current_pos);
println!("matched: {param:?}\n---");
// try checking if this is a flag
// todo!: this breaks full text matching if the full text starts with a flag
// (but that's kinda already broken anyway)
if let Some((value, new_pos)) = param.clone()
&& value.starts_with('-')
{
return Ok((
Token::Flag,
Some(value.trim_start_matches('-').to_string()),
new_pos,
));
}
// iterate over tokens and run try_match
for token in possible_tokens {
if let TokenMatchResult::Match(value) =
// for FullString just send the whole string
token.try_match(if matches!(token, Token::FullString) {
if input.is_empty() {
None
} else {
Some(input.clone())
}
} else {
param.clone().map(|v| v.0)
})
{
return Ok((token, value, param.map(|v| v.1).unwrap_or(current_pos)));
}
}
Err(None)
}
fn parse_command(input: String) -> CommandResult {
let mut local_tree: TreeBranch = COMMAND_TREE.clone();
// end position of all currently matched tokens
let mut current_pos = 0;
let mut args: Vec<String> = Vec::new();
let mut flags: HashMap<String, Option<String>> = HashMap::new();
loop {
match next_token(
local_tree.possible_tokens.clone(),
input.clone(),
current_pos,
) {
Ok((found_token, arg, new_pos)) => {
current_pos = new_pos;
if let Token::Flag = found_token {
flags.insert(arg.unwrap(), None);
// don't try matching flags as tree elements
continue;
}
if let Some(arg) = arg {
args.push(arg);
}
if let Some(next_tree) = local_tree.branches.get(&found_token) {
local_tree = next_tree.clone();
} else {
panic!("found token could not match tree, at {input}");
}
}
Err(None) => {
if let Some(command_ref) = local_tree.current_command_key {
return CommandResult::Ok {
command: ParsedCommand {
command_ref,
args,
flags,
},
};
}
// todo: check if last token is a common incorrect unquote (multi-member names etc)
// todo: check if this is a system name in pk;s command
return CommandResult::Err {
error: "Command not found.".to_string(),
};
}
Err(Some(short_circuit)) => {
return CommandResult::Err {
error: short_circuit,
};
}
}
}
}

View file

@ -0,0 +1,88 @@
use std::collections::HashMap;
lazy_static::lazy_static! {
// Dictionary of (left, right) quote pairs
// Each char in the string is an individual quote, multi-char strings imply "one of the following chars"
// Certain languages can have quote patterns that have a different character for open and close
pub static ref QUOTE_PAIRS: HashMap<String, String> = {
let mut pairs = HashMap::new();
macro_rules! insert_pair {
($a:literal, $b:literal) => {
pairs.insert($a.to_string(), $b.to_string());
// make it easier to look up right quotes
for char in $a.chars() {
pairs.insert(char.to_string(), $b.to_string());
}
}
}
// Basic
insert_pair!( "'", "'" ); // ASCII single quotes
insert_pair!( "\"", "\"" ); // ASCII double quotes
// "Smart quotes"
// Specifically ignore the left/right status of the quotes and match any combination of them
// Left string also includes "low" quotes to allow for the low-high style used in some locales
insert_pair!( "\u{201C}\u{201D}\u{201F}\u{201E}", "\u{201C}\u{201D}\u{201F}" ); // double quotes
insert_pair!( "\u{2018}\u{2019}\u{201B}\u{201A}", "\u{2018}\u{2019}\u{201B}" ); // single quotes
// Chevrons (normal and "fullwidth" variants)
insert_pair!( "\u{00AB}\u{300A}", "\u{00BB}\u{300B}" ); // double chevrons, pointing away (<<text>>)
insert_pair!( "\u{00BB}\u{300B}", "\u{00AB}\u{300A}" ); // double chevrons, pointing together (>>text<<)
insert_pair!( "\u{2039}\u{3008}", "\u{203A}\u{3009}" ); // single chevrons, pointing away (<text>)
insert_pair!( "\u{203A}\u{3009}", "\u{2039}\u{3008}" ); // single chevrons, pointing together (>text<)
// Other
insert_pair!( "\u{300C}\u{300E}", "\u{300D}\u{300F}" ); // corner brackets (Japanese/Chinese)
pairs
};
}
// very very simple quote matching
// quotes need to be at start/end of words, and are ignored if a closing quote is not present
// WTB POSIX quoting: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html
pub fn next_param(input: String, current_pos: usize) -> Option<(String, usize)> {
if input.len() == current_pos {
return None;
}
let leading_whitespace_count =
input[..current_pos].len() - input[..current_pos].trim_start().len();
let substr_to_match = input[current_pos + leading_whitespace_count..].to_string();
println!("stuff: {input} {current_pos} {leading_whitespace_count}");
println!("to match: {substr_to_match}");
// try matching end quote
if let Some(right) = QUOTE_PAIRS.get(&substr_to_match[0..1]) {
for possible_quote in right.chars() {
for (pos, _) in substr_to_match.match_indices(possible_quote) {
if substr_to_match.len() == pos + 1
|| substr_to_match
.chars()
.nth(pos + 1)
.unwrap()
.is_whitespace()
{
// return quoted string, without quotes
return Some((
substr_to_match[1..pos - 1].to_string(),
current_pos + pos + 1,
));
}
}
}
}
// find next whitespace character
for (pos, char) in substr_to_match.clone().char_indices() {
if char.is_whitespace() {
return Some((substr_to_match[..pos].to_string(), current_pos + pos + 1));
}
}
// if we're here, we went to EOF and didn't match any whitespace
// so we return the whole string
Some((substr_to_match.clone(), current_pos + substr_to_match.len()))
}

84
lib/commands/src/token.rs Normal file
View file

@ -0,0 +1,84 @@
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub enum Token {
/// Token used to represent a finished command (i.e. no more parameters required)
// todo: this is likely not the right way to represent this
Empty,
/// A bot-defined value ("member" in `pk;member MyName`)
Value(Vec<String>),
/// A command defined by multiple values
// todo!
MultiValue(Vec<Vec<String>>),
FullString,
/// Member reference (hid or member name)
MemberRef,
MemberPrivacyTarget,
PrivacyLevel,
// currently not included in command definitions
// todo: flags with values
Flag,
}
pub enum TokenMatchResult {
NoMatch,
/// Token matched, optionally with a value.
Match(Option<String>),
}
// move this somewhere else
lazy_static::lazy_static!(
static ref MEMBER_PRIVACY_TARGETS: Vec<String> = vec![
"visibility".to_string(),
"name".to_string(),
"todo".to_string()
];
);
impl Token {
pub fn try_match(&self, input: Option<String>) -> TokenMatchResult {
// short circuit on empty things
if matches!(self, Self::Empty) && input.is_none() {
return TokenMatchResult::Match(None);
} else if input.is_none() {
return TokenMatchResult::NoMatch;
}
let input = input.unwrap();
// try actually matching stuff
match self {
Self::Empty => return TokenMatchResult::NoMatch,
Self::Flag => unreachable!(), // matched upstream
Self::Value(values) => {
for v in values {
if input.trim() == v {
// c# bot currently needs subcommands provided as arguments
// todo!: remove this
return TokenMatchResult::Match(Some(v.clone()));
}
}
}
Self::MultiValue(_) => todo!(),
Self::FullString => return TokenMatchResult::Match(Some(input)),
Self::MemberRef => return TokenMatchResult::Match(Some(input)),
Self::MemberPrivacyTarget
if MEMBER_PRIVACY_TARGETS.contains(&input.trim().to_string()) =>
{
return TokenMatchResult::Match(Some(input))
}
Self::MemberPrivacyTarget => {}
Self::PrivacyLevel if input == "public" || input == "private" => {
return TokenMatchResult::Match(Some(input))
}
Self::PrivacyLevel => {}
}
// note: must not add a _ case to the above match
// instead, for conditional matches, also add generic cases with no return
return TokenMatchResult::NoMatch;
}
}

2
lib/commands/uniffi.toml Normal file
View file

@ -0,0 +1,2 @@
[bindings.csharp]
cdylib_name = "commands"

3
rust-toolchain.toml Normal file
View file

@ -0,0 +1,3 @@
[toolchain]
channel = "nightly-2024-08-20"