mirror of
https://github.com/PluralKit/PluralKit.git
synced 2026-02-04 04:56:49 +00:00
fix infinite loop backtracking for valid branches, cleanup matched tokens type
This commit is contained in:
parent
9122e64a41
commit
f9367ea041
3 changed files with 86 additions and 26 deletions
|
|
@ -33,6 +33,15 @@ pub struct ParsedCommand {
|
|||
pub flags: HashMap<String, Option<ParameterValue>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct MatchedTokenState {
|
||||
tree: Tree,
|
||||
token: Token,
|
||||
match_result: TokenMatchResult,
|
||||
start_pos: usize,
|
||||
filtered_tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
pub fn parse_command(
|
||||
command_tree: Tree,
|
||||
prefix: String,
|
||||
|
|
@ -44,21 +53,16 @@ pub fn parse_command(
|
|||
// end position of all currently matched tokens
|
||||
let mut current_pos: usize = 0;
|
||||
let mut current_token_idx: usize = 0;
|
||||
|
||||
let mut params: HashMap<String, ParameterValue> = HashMap::new();
|
||||
let mut raw_flags: Vec<(usize, MatchedFlag)> = Vec::new();
|
||||
|
||||
let mut matched_tokens: Vec<(Tree, (Token, TokenMatchResult, usize), usize)> = Vec::new();
|
||||
let mut filtered_tokens: Vec<Token> = Vec::new();
|
||||
let mut matched_tokens: Vec<MatchedTokenState> = Vec::new();
|
||||
let mut filtered_tokens: Vec<Token> = Vec::new(); // these are tokens that we've already tried (and failed)
|
||||
|
||||
let mut last_optional_param_error: Option<(SmolStr, SmolStr)> = None;
|
||||
|
||||
// track the best attempt at parsing (deepest matched tokens)
|
||||
// so we can use it for error messages/suggestions even if we backtrack later
|
||||
let mut best_attempt: Option<(
|
||||
Tree,
|
||||
Vec<(Tree, (Token, TokenMatchResult, usize), usize)>,
|
||||
usize,
|
||||
)> = None;
|
||||
let mut best_attempt: Option<(Tree, Vec<MatchedTokenState>, usize)> = None;
|
||||
|
||||
loop {
|
||||
let mut possible_tokens = local_tree
|
||||
|
|
@ -111,18 +115,21 @@ pub fn parse_command(
|
|||
}
|
||||
}
|
||||
|
||||
// add parameter if any
|
||||
if let TokenMatchResult::MatchedParameter { name, value } = result {
|
||||
params.insert(name.to_string(), value.clone());
|
||||
if let TokenMatchResult::MatchedParameter { .. } = result {
|
||||
// we don't add params here, but wait until we matched a full command
|
||||
// then we can use matched_tokens to extract the params
|
||||
// this is so we don't have to keep track of "params" when trying branches
|
||||
}
|
||||
|
||||
// move to the next branch
|
||||
if let Some(next_tree) = local_tree.get_branch(&found_token) {
|
||||
matched_tokens.push((
|
||||
local_tree.clone(),
|
||||
(found_token.clone(), result.clone(), *new_pos),
|
||||
current_pos,
|
||||
));
|
||||
matched_tokens.push(MatchedTokenState {
|
||||
tree: local_tree.clone(),
|
||||
token: found_token.clone(),
|
||||
match_result: result.clone(),
|
||||
start_pos: current_pos,
|
||||
filtered_tokens: filtered_tokens.clone(),
|
||||
});
|
||||
|
||||
// update best attempt if we're deeper
|
||||
if best_attempt.as_ref().map(|x| x.1.len()).unwrap_or(0) < matched_tokens.len()
|
||||
|
|
@ -147,14 +154,15 @@ pub fn parse_command(
|
|||
None => {
|
||||
// redo the previous branches if we didnt match on a parameter
|
||||
// this is a bit of a hack, but its necessary for making parameters on the same depth work
|
||||
if let Some((match_tree, match_next, old_pos)) = matched_tokens
|
||||
if let Some(state) = matched_tokens
|
||||
.pop()
|
||||
.and_then(|m| matches!(m.1, (Token::Parameter(_), _, _)).then_some(m))
|
||||
.and_then(|m| matches!(m.token, Token::Parameter(_)).then_some(m))
|
||||
{
|
||||
println!("redoing previous branch: {:?}", match_next.0);
|
||||
local_tree = match_tree;
|
||||
current_pos = old_pos; // reset position to previous branch's start
|
||||
filtered_tokens.push(match_next.0);
|
||||
println!("redoing previous branch: {:?}", state.token);
|
||||
local_tree = state.tree;
|
||||
current_pos = state.start_pos; // reset position to previous branch's start
|
||||
filtered_tokens = state.filtered_tokens; // reset filtered tokens to the previous branch's
|
||||
filtered_tokens.push(state.token);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -178,8 +186,8 @@ pub fn parse_command(
|
|||
|
||||
// normalize input by replacing parameters with placeholders
|
||||
let mut normalized_input = String::new();
|
||||
for (_, (token, _, _), _) in &matched_tokens {
|
||||
write!(&mut normalized_input, "{token} ").unwrap();
|
||||
for state in &matched_tokens {
|
||||
write!(&mut normalized_input, "{} ", state.token).unwrap();
|
||||
}
|
||||
normalized_input.push_str(&input[current_pos..].trim_start());
|
||||
|
||||
|
|
@ -220,6 +228,13 @@ pub fn parse_command(
|
|||
let mut misplaced_flags: Vec<MatchedFlag> = Vec::new();
|
||||
let mut invalid_flags: Vec<MatchedFlag> = Vec::new();
|
||||
|
||||
let mut params: HashMap<String, ParameterValue> = HashMap::new();
|
||||
for state in &matched_tokens {
|
||||
if let TokenMatchResult::MatchedParameter { name, value } = &state.match_result {
|
||||
params.insert(name.to_string(), value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for (token_idx, raw_flag) in raw_flags {
|
||||
let Some(matched_flag) = match_flag(command.flags.iter(), raw_flag.clone()) else {
|
||||
invalid_flags.push(raw_flag);
|
||||
|
|
|
|||
45
crates/command_parser/tests/parser.rs
Normal file
45
crates/command_parser/tests/parser.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
use command_parser::{parse_command, Tree, command::Command, parameter::*, tokens};
|
||||
|
||||
/// this checks if we properly keep track of filtered tokens (eg. branches we failed on)
|
||||
/// when we backtrack. a previous parser bug would cause infinite loops since it did not
|
||||
/// (the parser would "flip-flop" between branches) this is here for reference.
|
||||
#[test]
|
||||
fn test_infinite_loop_repro() {
|
||||
let p1 = Optional(("param1", ParameterKind::OpaqueString));
|
||||
let p2 = Optional(("param2", ParameterKind::OpaqueString));
|
||||
|
||||
let cmd1 = Command::new(tokens!("s", p1, "A"), "cmd1");
|
||||
let cmd2 = Command::new(tokens!("s", p2, "B"), "cmd2");
|
||||
|
||||
let mut tree = Tree::default();
|
||||
tree.register_command(cmd1);
|
||||
tree.register_command(cmd2);
|
||||
|
||||
let input = "s foo C";
|
||||
// this should fail and not loop
|
||||
let result = parse_command(tree, "pk;".to_string(), input.to_string());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
/// check if we have params from other branches when we trying to match them and they succeeded
|
||||
/// but then we backtracked, making them invalid. this should no longer happen since we just
|
||||
/// extract params from matched tokens when we match the command, but keeping here just for reference.
|
||||
#[test]
|
||||
fn test_dirty_params() {
|
||||
let p1 = Optional(("param1", ParameterKind::OpaqueString));
|
||||
let p2 = Optional(("param2", ParameterKind::OpaqueString));
|
||||
|
||||
let cmd1 = Command::new(tokens!("s", p1, "A"), "cmd1");
|
||||
let cmd2 = Command::new(tokens!("s", p2, "B"), "cmd2");
|
||||
|
||||
let mut tree = Tree::default();
|
||||
tree.register_command(cmd1);
|
||||
tree.register_command(cmd2);
|
||||
|
||||
let input = "s foo B";
|
||||
let result = parse_command(tree, "pk;".to_string(), input.to_string()).unwrap();
|
||||
|
||||
println!("params: {:?}", result.parameters);
|
||||
assert!(!result.parameters.contains_key("param1"), "params should not contain 'param1' from failed branch");
|
||||
assert!(result.parameters.contains_key("param2"), "params should contain 'param2'");
|
||||
}
|
||||
|
|
@ -54,4 +54,4 @@ fn test_typoed_command_with_flags() {
|
|||
assert!(msg.contains("message author"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue