Skip to content

Commit

Permalink
capture groups, replacements
Browse files Browse the repository at this point in the history
  • Loading branch information
bergey committed Dec 27, 2022
1 parent cfb6a94 commit 7ab1700
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 5 deletions.
33 changes: 32 additions & 1 deletion src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ fn take_until<'a>(sep: char, s: Input) -> Progress<String> {
Ok((s, vec.into_iter().collect()))
}

// convert sed \1 syntax to regex crate $1 and escape $
pub fn clean_replacement(mut s: String) -> String {
let mut dest = String::new();

// TODO static regexen
let dollar = Regex::new(r"\$").unwrap();
let changed = regex::replace_all(&dollar, &s, &mut dest, "$$$$");
if changed {std::mem::swap(&mut s, &mut dest)}

let backslash_digits = Regex::new(r"\\([0-9]+)").unwrap();
let changed = regex::replace_all(&backslash_digits, &s, &mut dest, r"$${$1}");
if changed {std::mem::swap(&mut s, &mut dest)}

s
}

pub fn parse_function<'a>(cmd: Input<'a>) -> Progress<Function> {
let (s, function) = anychar(cmd)?;
use Function::{*};
Expand All @@ -92,7 +108,7 @@ pub fn parse_function<'a>(cmd: Input<'a>) -> Progress<Function> {
let regex = Regex::new(&format!("{}", ast)).unwrap();
let (s, replacement) = take_until(sep, s)?;
let (s, _) = char(sep)(s)?;
Ok((s, Fs(regex, replacement)))
Ok((s, Fs(regex, clean_replacement(replacement))))
},
'x' => Ok((s, Fx)),
_ => fail(cmd)
Expand Down Expand Up @@ -216,4 +232,19 @@ pub mod tests {
fn addr_comma() {
address_equivalent("\\,foo,", &Context(dummy_regex()))
}

#[test]
fn clean_noop() {
assert_eq!(clean_replacement("foo".to_string()), "foo")
}

#[test]
fn clean_ref() {
assert_eq!(clean_replacement(r"foo\1".to_string()), "foo${1}")
}

#[test]
fn clean_dollar() {
assert_eq!(clean_replacement("$foo".to_string()), "$$foo")
}
}
8 changes: 7 additions & 1 deletion src/regex/equivalent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ impl Equivalent for Ast {
(Ast::Assertion(_), Ast::Assertion(_)) => panic!("not implemented"),
(Ast::Class(_), Ast::Class(_)) => panic!("not implemented"),
(Ast::Repetition(a), Ast::Repetition(b)) => a.equivalent(b),
(Ast::Group(_), Ast::Group(_)) => panic!("not implemented"),
(Ast::Group(a), Ast::Group(b)) => a.equivalent(b),
(Ast::Alternation(_), Ast::Alternation(_)) => panic!("not implemented"),
(Ast::Concat(a), Ast::Concat(b)) => a.equivalent(b),
_ => false
Expand All @@ -40,6 +40,12 @@ impl Equivalent for RepetitionOp {
}
}

impl Equivalent for Group {
fn equivalent(&self, other: &Group) -> bool {
self.kind == other.kind && self.ast.equivalent(&other.ast)
}
}

impl Equivalent for Concat {
fn equivalent(&self, other: &Concat) -> bool {
self.asts.len() == other.asts.len() && std::iter::zip(&self.asts, &other.asts).all(|(a, b)| a.equivalent(&b))
Expand Down
25 changes: 22 additions & 3 deletions src/regex/parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
extern crate nom;

use nom::character::complete::{char, none_of, one_of, u32};
use nom::branch::alt;
use nom::error::{ Error, ErrorKind};
Expand All @@ -9,7 +8,7 @@ use nom::{
Err, Finish, IResult,
};
use nom_locate::{LocatedSpan};
use regex_syntax::ast::{Ast, Concat, Literal, LiteralKind, Position, Repetition, RepetitionKind, RepetitionOp, RepetitionRange, Span};
use regex_syntax::ast::{Ast, Concat, Group, GroupKind, Literal, LiteralKind, Position, Repetition, RepetitionKind, RepetitionOp, RepetitionRange, Span};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct ExtraState {
Expand Down Expand Up @@ -85,9 +84,24 @@ fn escaped_literal(s: Input<'_>) -> Progress {
})))
}

fn group(s: Input) -> Progress {
let start = position(s);
let (s, _) = char( '(' )(s)?;
// TODO named & non-capturing
let (s, ast) = branch(s)?;
let (mut s, _) = char( ')' )(s)?;
let end = position(s);
s.extra.last_regex += 1;
Ok((s, Ast::Group( Group {
span: Span{ start: start, end: end},
kind: GroupKind::CaptureIndex(s.extra.last_regex),
ast: Box::new(ast),
})))
}

fn atom(s: Input<'_>) -> Progress {
// TODO () ^ $ \^.[$()|*+?{\ \
alt((literal, escaped_literal, dot))(s)
alt((group, literal, escaped_literal, dot))(s)
}

fn char_quantifier(s: Input<'_>) -> IResult<Input, RepetitionOp> {
Expand Down Expand Up @@ -240,5 +254,10 @@ pub mod tests {
fn range() {
match_modern_syntax("x{2,5}")
}

#[test]
fn group() {
match_modern_syntax("(a*)")
}

}

0 comments on commit 7ab1700

Please sign in to comment.