fix infinite loop backtracking for valid branches, cleanup matched tokens type

This commit is contained in:
dawn 2026-01-19 17:49:06 +03:00
parent 9122e64a41
commit f9367ea041
No known key found for this signature in database
3 changed files with 86 additions and 26 deletions

View file

@ -33,6 +33,15 @@ pub struct ParsedCommand {
pub flags: HashMap<String, Option<ParameterValue>>, pub flags: HashMap<String, Option<ParameterValue>>,
} }
#[derive(Clone, Debug)]
struct MatchedTokenState {
tree: Tree,
token: Token,
match_result: TokenMatchResult,
start_pos: usize,
filtered_tokens: Vec<Token>,
}
pub fn parse_command( pub fn parse_command(
command_tree: Tree, command_tree: Tree,
prefix: String, prefix: String,
@ -44,21 +53,16 @@ pub fn parse_command(
// end position of all currently matched tokens // end position of all currently matched tokens
let mut current_pos: usize = 0; let mut current_pos: usize = 0;
let mut current_token_idx: usize = 0; let mut current_token_idx: usize = 0;
let mut params: HashMap<String, ParameterValue> = HashMap::new();
let mut raw_flags: Vec<(usize, MatchedFlag)> = Vec::new(); let mut raw_flags: Vec<(usize, MatchedFlag)> = Vec::new();
let mut matched_tokens: Vec<(Tree, (Token, TokenMatchResult, usize), usize)> = Vec::new(); let mut matched_tokens: Vec<MatchedTokenState> = Vec::new();
let mut filtered_tokens: Vec<Token> = Vec::new(); let mut filtered_tokens: Vec<Token> = Vec::new(); // these are tokens that we've already tried (and failed)
let mut last_optional_param_error: Option<(SmolStr, SmolStr)> = None; let mut last_optional_param_error: Option<(SmolStr, SmolStr)> = None;
// track the best attempt at parsing (deepest matched tokens) // track the best attempt at parsing (deepest matched tokens)
// so we can use it for error messages/suggestions even if we backtrack later // so we can use it for error messages/suggestions even if we backtrack later
let mut best_attempt: Option<( let mut best_attempt: Option<(Tree, Vec<MatchedTokenState>, usize)> = None;
Tree,
Vec<(Tree, (Token, TokenMatchResult, usize), usize)>,
usize,
)> = None;
loop { loop {
let mut possible_tokens = local_tree let mut possible_tokens = local_tree
@ -111,18 +115,21 @@ pub fn parse_command(
} }
} }
// add parameter if any if let TokenMatchResult::MatchedParameter { .. } = result {
if let TokenMatchResult::MatchedParameter { name, value } = result { // we don't add params here, but wait until we matched a full command
params.insert(name.to_string(), value.clone()); // then we can use matched_tokens to extract the params
// this is so we don't have to keep track of "params" when trying branches
} }
// move to the next branch // move to the next branch
if let Some(next_tree) = local_tree.get_branch(&found_token) { if let Some(next_tree) = local_tree.get_branch(&found_token) {
matched_tokens.push(( matched_tokens.push(MatchedTokenState {
local_tree.clone(), tree: local_tree.clone(),
(found_token.clone(), result.clone(), *new_pos), token: found_token.clone(),
current_pos, match_result: result.clone(),
)); start_pos: current_pos,
filtered_tokens: filtered_tokens.clone(),
});
// update best attempt if we're deeper // update best attempt if we're deeper
if best_attempt.as_ref().map(|x| x.1.len()).unwrap_or(0) < matched_tokens.len() if best_attempt.as_ref().map(|x| x.1.len()).unwrap_or(0) < matched_tokens.len()
@ -147,14 +154,15 @@ pub fn parse_command(
None => { None => {
// redo the previous branches if we didnt match on a parameter // redo the previous branches if we didnt match on a parameter
// this is a bit of a hack, but its necessary for making parameters on the same depth work // this is a bit of a hack, but its necessary for making parameters on the same depth work
if let Some((match_tree, match_next, old_pos)) = matched_tokens if let Some(state) = matched_tokens
.pop() .pop()
.and_then(|m| matches!(m.1, (Token::Parameter(_), _, _)).then_some(m)) .and_then(|m| matches!(m.token, Token::Parameter(_)).then_some(m))
{ {
println!("redoing previous branch: {:?}", match_next.0); println!("redoing previous branch: {:?}", state.token);
local_tree = match_tree; local_tree = state.tree;
current_pos = old_pos; // reset position to previous branch's start current_pos = state.start_pos; // reset position to previous branch's start
filtered_tokens.push(match_next.0); filtered_tokens = state.filtered_tokens; // reset filtered tokens to the previous branch's
filtered_tokens.push(state.token);
continue; continue;
} }
@ -178,8 +186,8 @@ pub fn parse_command(
// normalize input by replacing parameters with placeholders // normalize input by replacing parameters with placeholders
let mut normalized_input = String::new(); let mut normalized_input = String::new();
for (_, (token, _, _), _) in &matched_tokens { for state in &matched_tokens {
write!(&mut normalized_input, "{token} ").unwrap(); write!(&mut normalized_input, "{} ", state.token).unwrap();
} }
normalized_input.push_str(&input[current_pos..].trim_start()); normalized_input.push_str(&input[current_pos..].trim_start());
@ -220,6 +228,13 @@ pub fn parse_command(
let mut misplaced_flags: Vec<MatchedFlag> = Vec::new(); let mut misplaced_flags: Vec<MatchedFlag> = Vec::new();
let mut invalid_flags: Vec<MatchedFlag> = Vec::new(); let mut invalid_flags: Vec<MatchedFlag> = Vec::new();
let mut params: HashMap<String, ParameterValue> = HashMap::new();
for state in &matched_tokens {
if let TokenMatchResult::MatchedParameter { name, value } = &state.match_result {
params.insert(name.to_string(), value.clone());
}
}
for (token_idx, raw_flag) in raw_flags { for (token_idx, raw_flag) in raw_flags {
let Some(matched_flag) = match_flag(command.flags.iter(), raw_flag.clone()) else { let Some(matched_flag) = match_flag(command.flags.iter(), raw_flag.clone()) else {
invalid_flags.push(raw_flag); invalid_flags.push(raw_flag);

View file

@ -0,0 +1,45 @@
use command_parser::{parse_command, Tree, command::Command, parameter::*, tokens};
/// this checks if we properly keep track of filtered tokens (eg. branches we failed on)
/// when we backtrack. a previous parser bug would cause infinite loops since it did not
/// (the parser would "flip-flop" between branches) this is here for reference.
#[test]
fn test_infinite_loop_repro() {
let p1 = Optional(("param1", ParameterKind::OpaqueString));
let p2 = Optional(("param2", ParameterKind::OpaqueString));
let cmd1 = Command::new(tokens!("s", p1, "A"), "cmd1");
let cmd2 = Command::new(tokens!("s", p2, "B"), "cmd2");
let mut tree = Tree::default();
tree.register_command(cmd1);
tree.register_command(cmd2);
let input = "s foo C";
// this should fail and not loop
let result = parse_command(tree, "pk;".to_string(), input.to_string());
assert!(result.is_err());
}
/// check if we have params from other branches when we trying to match them and they succeeded
/// but then we backtracked, making them invalid. this should no longer happen since we just
/// extract params from matched tokens when we match the command, but keeping here just for reference.
#[test]
fn test_dirty_params() {
let p1 = Optional(("param1", ParameterKind::OpaqueString));
let p2 = Optional(("param2", ParameterKind::OpaqueString));
let cmd1 = Command::new(tokens!("s", p1, "A"), "cmd1");
let cmd2 = Command::new(tokens!("s", p2, "B"), "cmd2");
let mut tree = Tree::default();
tree.register_command(cmd1);
tree.register_command(cmd2);
let input = "s foo B";
let result = parse_command(tree, "pk;".to_string(), input.to_string()).unwrap();
println!("params: {:?}", result.parameters);
assert!(!result.parameters.contains_key("param1"), "params should not contain 'param1' from failed branch");
assert!(result.parameters.contains_key("param2"), "params should contain 'param2'");
}

View file

@ -54,4 +54,4 @@ fn test_typoed_command_with_flags() {
assert!(msg.contains("message author")); assert!(msg.contains("message author"));
} }
} }
} }