diff --git a/src/commands.rs b/src/commands.rs index 314712b..680664e 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -73,6 +73,22 @@ fn take_until<'a>(sep: char, s: Input) -> Progress { Ok((s, vec.into_iter().collect())) } +// convert sed \1 syntax to regex crate $1 and escape $ +pub fn clean_replacement(mut s: String) -> String { + let mut dest = String::new(); + + // TODO static regexen + let dollar = Regex::new(r"\$").unwrap(); + let changed = regex::replace_all(&dollar, &s, &mut dest, "$$$$"); + if changed {std::mem::swap(&mut s, &mut dest)} + + let backslash_digits = Regex::new(r"\\([0-9]+)").unwrap(); + let changed = regex::replace_all(&backslash_digits, &s, &mut dest, r"$${$1}"); + if changed {std::mem::swap(&mut s, &mut dest)} + + s +} + pub fn parse_function<'a>(cmd: Input<'a>) -> Progress { let (s, function) = anychar(cmd)?; use Function::{*}; @@ -92,7 +108,7 @@ pub fn parse_function<'a>(cmd: Input<'a>) -> Progress { let regex = Regex::new(&format!("{}", ast)).unwrap(); let (s, replacement) = take_until(sep, s)?; let (s, _) = char(sep)(s)?; - Ok((s, Fs(regex, replacement))) + Ok((s, Fs(regex, clean_replacement(replacement)))) }, 'x' => Ok((s, Fx)), _ => fail(cmd) @@ -216,4 +232,19 @@ pub mod tests { fn addr_comma() { address_equivalent("\\,foo,", &Context(dummy_regex())) } + + #[test] + fn clean_noop() { + assert_eq!(clean_replacement("foo".to_string()), "foo") + } + + #[test] + fn clean_ref() { + assert_eq!(clean_replacement(r"foo\1".to_string()), "foo${1}") + } + + #[test] + fn clean_dollar() { + assert_eq!(clean_replacement("$foo".to_string()), "$$foo") + } } diff --git a/src/regex/equivalent.rs b/src/regex/equivalent.rs index eca4c00..33b3292 100644 --- a/src/regex/equivalent.rs +++ b/src/regex/equivalent.rs @@ -14,7 +14,7 @@ impl Equivalent for Ast { (Ast::Assertion(_), Ast::Assertion(_)) => panic!("not implemented"), (Ast::Class(_), Ast::Class(_)) => panic!("not implemented"), (Ast::Repetition(a), Ast::Repetition(b)) => a.equivalent(b), - (Ast::Group(_), Ast::Group(_)) => panic!("not implemented"), + (Ast::Group(a), Ast::Group(b)) => a.equivalent(b), (Ast::Alternation(_), Ast::Alternation(_)) => panic!("not implemented"), (Ast::Concat(a), Ast::Concat(b)) => a.equivalent(b), _ => false @@ -40,6 +40,12 @@ impl Equivalent for RepetitionOp { } } +impl Equivalent for Group { + fn equivalent(&self, other: &Group) -> bool { + self.kind == other.kind && self.ast.equivalent(&other.ast) + } +} + impl Equivalent for Concat { fn equivalent(&self, other: &Concat) -> bool { self.asts.len() == other.asts.len() && std::iter::zip(&self.asts, &other.asts).all(|(a, b)| a.equivalent(&b)) diff --git a/src/regex/parser.rs b/src/regex/parser.rs index 5474021..1490c8c 100644 --- a/src/regex/parser.rs +++ b/src/regex/parser.rs @@ -1,5 +1,4 @@ extern crate nom; - use nom::character::complete::{char, none_of, one_of, u32}; use nom::branch::alt; use nom::error::{ Error, ErrorKind}; @@ -9,7 +8,7 @@ use nom::{ Err, Finish, IResult, }; use nom_locate::{LocatedSpan}; -use regex_syntax::ast::{Ast, Concat, Literal, LiteralKind, Position, Repetition, RepetitionKind, RepetitionOp, RepetitionRange, Span}; +use regex_syntax::ast::{Ast, Concat, Group, GroupKind, Literal, LiteralKind, Position, Repetition, RepetitionKind, RepetitionOp, RepetitionRange, Span}; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExtraState { @@ -85,9 +84,24 @@ fn escaped_literal(s: Input<'_>) -> Progress { }))) } +fn group(s: Input) -> Progress { + let start = position(s); + let (s, _) = char( '(' )(s)?; + // TODO named & non-capturing + let (s, ast) = branch(s)?; + let (mut s, _) = char( ')' )(s)?; + let end = position(s); + s.extra.last_regex += 1; + Ok((s, Ast::Group( Group { + span: Span{ start: start, end: end}, + kind: GroupKind::CaptureIndex(s.extra.last_regex), + ast: Box::new(ast), + }))) +} + fn atom(s: Input<'_>) -> Progress { // TODO () ^ $ \^.[$()|*+?{\ \ - alt((literal, escaped_literal, dot))(s) + alt((group, literal, escaped_literal, dot))(s) } fn char_quantifier(s: Input<'_>) -> IResult { @@ -240,5 +254,10 @@ pub mod tests { fn range() { match_modern_syntax("x{2,5}") } + + #[test] + fn group() { + match_modern_syntax("(a*)") + } }