From f9367ea04141e597d0f41c1e822af0af7d4b5b07 Mon Sep 17 00:00:00 2001 From: dawn <90008@gaze.systems> Date: Mon, 19 Jan 2026 17:49:06 +0300 Subject: [PATCH] fix infinite loop backtracking for valid branches, cleanup matched tokens type --- crates/command_parser/src/lib.rs | 65 ++++++++++++++++---------- crates/command_parser/tests/parser.rs | 45 ++++++++++++++++++ crates/command_parser/tests/ranking.rs | 2 +- 3 files changed, 86 insertions(+), 26 deletions(-) create mode 100644 crates/command_parser/tests/parser.rs diff --git a/crates/command_parser/src/lib.rs b/crates/command_parser/src/lib.rs index 4310ce77..239b9531 100644 --- a/crates/command_parser/src/lib.rs +++ b/crates/command_parser/src/lib.rs @@ -33,6 +33,15 @@ pub struct ParsedCommand { pub flags: HashMap>, } +#[derive(Clone, Debug)] +struct MatchedTokenState { + tree: Tree, + token: Token, + match_result: TokenMatchResult, + start_pos: usize, + filtered_tokens: Vec, +} + pub fn parse_command( command_tree: Tree, prefix: String, @@ -44,21 +53,16 @@ pub fn parse_command( // end position of all currently matched tokens let mut current_pos: usize = 0; let mut current_token_idx: usize = 0; - - let mut params: HashMap = HashMap::new(); let mut raw_flags: Vec<(usize, MatchedFlag)> = Vec::new(); - let mut matched_tokens: Vec<(Tree, (Token, TokenMatchResult, usize), usize)> = Vec::new(); - let mut filtered_tokens: Vec = Vec::new(); + let mut matched_tokens: Vec = Vec::new(); + let mut filtered_tokens: Vec = Vec::new(); // these are tokens that we've already tried (and failed) + let mut last_optional_param_error: Option<(SmolStr, SmolStr)> = None; // track the best attempt at parsing (deepest matched tokens) // so we can use it for error messages/suggestions even if we backtrack later - let mut best_attempt: Option<( - Tree, - Vec<(Tree, (Token, TokenMatchResult, usize), usize)>, - usize, - )> = None; + let mut best_attempt: Option<(Tree, Vec, usize)> = None; loop { let mut possible_tokens = local_tree @@ -111,18 +115,21 @@ pub fn parse_command( } } - // add parameter if any - if let TokenMatchResult::MatchedParameter { name, value } = result { - params.insert(name.to_string(), value.clone()); + if let TokenMatchResult::MatchedParameter { .. } = result { + // we don't add params here, but wait until we matched a full command + // then we can use matched_tokens to extract the params + // this is so we don't have to keep track of "params" when trying branches } // move to the next branch if let Some(next_tree) = local_tree.get_branch(&found_token) { - matched_tokens.push(( - local_tree.clone(), - (found_token.clone(), result.clone(), *new_pos), - current_pos, - )); + matched_tokens.push(MatchedTokenState { + tree: local_tree.clone(), + token: found_token.clone(), + match_result: result.clone(), + start_pos: current_pos, + filtered_tokens: filtered_tokens.clone(), + }); // update best attempt if we're deeper if best_attempt.as_ref().map(|x| x.1.len()).unwrap_or(0) < matched_tokens.len() @@ -147,14 +154,15 @@ pub fn parse_command( None => { // redo the previous branches if we didnt match on a parameter // this is a bit of a hack, but its necessary for making parameters on the same depth work - if let Some((match_tree, match_next, old_pos)) = matched_tokens + if let Some(state) = matched_tokens .pop() - .and_then(|m| matches!(m.1, (Token::Parameter(_), _, _)).then_some(m)) + .and_then(|m| matches!(m.token, Token::Parameter(_)).then_some(m)) { - println!("redoing previous branch: {:?}", match_next.0); - local_tree = match_tree; - current_pos = old_pos; // reset position to previous branch's start - filtered_tokens.push(match_next.0); + println!("redoing previous branch: {:?}", state.token); + local_tree = state.tree; + current_pos = state.start_pos; // reset position to previous branch's start + filtered_tokens = state.filtered_tokens; // reset filtered tokens to the previous branch's + filtered_tokens.push(state.token); continue; } @@ -178,8 +186,8 @@ pub fn parse_command( // normalize input by replacing parameters with placeholders let mut normalized_input = String::new(); - for (_, (token, _, _), _) in &matched_tokens { - write!(&mut normalized_input, "{token} ").unwrap(); + for state in &matched_tokens { + write!(&mut normalized_input, "{} ", state.token).unwrap(); } normalized_input.push_str(&input[current_pos..].trim_start()); @@ -220,6 +228,13 @@ pub fn parse_command( let mut misplaced_flags: Vec = Vec::new(); let mut invalid_flags: Vec = Vec::new(); + let mut params: HashMap = HashMap::new(); + for state in &matched_tokens { + if let TokenMatchResult::MatchedParameter { name, value } = &state.match_result { + params.insert(name.to_string(), value.clone()); + } + } + for (token_idx, raw_flag) in raw_flags { let Some(matched_flag) = match_flag(command.flags.iter(), raw_flag.clone()) else { invalid_flags.push(raw_flag); diff --git a/crates/command_parser/tests/parser.rs b/crates/command_parser/tests/parser.rs new file mode 100644 index 00000000..75df16b7 --- /dev/null +++ b/crates/command_parser/tests/parser.rs @@ -0,0 +1,45 @@ +use command_parser::{parse_command, Tree, command::Command, parameter::*, tokens}; + +/// this checks if we properly keep track of filtered tokens (eg. branches we failed on) +/// when we backtrack. a previous parser bug would cause infinite loops since it did not +/// (the parser would "flip-flop" between branches) this is here for reference. +#[test] +fn test_infinite_loop_repro() { + let p1 = Optional(("param1", ParameterKind::OpaqueString)); + let p2 = Optional(("param2", ParameterKind::OpaqueString)); + + let cmd1 = Command::new(tokens!("s", p1, "A"), "cmd1"); + let cmd2 = Command::new(tokens!("s", p2, "B"), "cmd2"); + + let mut tree = Tree::default(); + tree.register_command(cmd1); + tree.register_command(cmd2); + + let input = "s foo C"; + // this should fail and not loop + let result = parse_command(tree, "pk;".to_string(), input.to_string()); + assert!(result.is_err()); +} + +/// check if we have params from other branches when we trying to match them and they succeeded +/// but then we backtracked, making them invalid. this should no longer happen since we just +/// extract params from matched tokens when we match the command, but keeping here just for reference. +#[test] +fn test_dirty_params() { + let p1 = Optional(("param1", ParameterKind::OpaqueString)); + let p2 = Optional(("param2", ParameterKind::OpaqueString)); + + let cmd1 = Command::new(tokens!("s", p1, "A"), "cmd1"); + let cmd2 = Command::new(tokens!("s", p2, "B"), "cmd2"); + + let mut tree = Tree::default(); + tree.register_command(cmd1); + tree.register_command(cmd2); + + let input = "s foo B"; + let result = parse_command(tree, "pk;".to_string(), input.to_string()).unwrap(); + + println!("params: {:?}", result.parameters); + assert!(!result.parameters.contains_key("param1"), "params should not contain 'param1' from failed branch"); + assert!(result.parameters.contains_key("param2"), "params should contain 'param2'"); +} diff --git a/crates/command_parser/tests/ranking.rs b/crates/command_parser/tests/ranking.rs index 0f185fc7..bcee3a51 100644 --- a/crates/command_parser/tests/ranking.rs +++ b/crates/command_parser/tests/ranking.rs @@ -54,4 +54,4 @@ fn test_typoed_command_with_flags() { assert!(msg.contains("message author")); } } -} \ No newline at end of file +}