From cad77fc48a621a315e46e6f9f51024451f74772b Mon Sep 17 00:00:00 2001 From: Elie ROUDNINSKI Date: Mon, 24 Feb 2025 10:52:57 +0100 Subject: [PATCH] Only pass parser settings when checking function parameters. Passing the whole parser allows to access too many things, like the scheme and potentially other things we might add in the future. It also makes writing unit tests more cumbersome since we now have to build a scheme first just to get a dummy parser. --- engine/src/ast/field_expr.rs | 10 ++-- engine/src/ast/function_expr.rs | 2 +- engine/src/ast/parse.rs | 63 ++++++++++++++++++++------ engine/src/functions.rs | 6 +-- engine/src/lib.rs | 2 +- engine/src/rhs_types/regex/imp_real.rs | 4 +- engine/src/rhs_types/wildcard.rs | 2 +- engine/src/scheme.rs | 16 ++++++- 8 files changed, 76 insertions(+), 29 deletions(-) diff --git a/engine/src/ast/field_expr.rs b/engine/src/ast/field_expr.rs index faca8987..bbe9dc78 100644 --- a/engine/src/ast/field_expr.rs +++ b/engine/src/ast/field_expr.rs @@ -645,8 +645,10 @@ impl<'s> Expr<'s> for ComparisonExpr<'s> { mod tests { use super::*; use crate::{ - ast::function_expr::{FunctionCallArgExpr, FunctionCallExpr}, - ast::logical_expr::LogicalExpr, + ast::{ + function_expr::{FunctionCallArgExpr, FunctionCallExpr}, + logical_expr::LogicalExpr, + }, execution_context::ExecutionContext, functions::{ FunctionArgKind, FunctionArgs, FunctionDefinition, FunctionDefinitionContext, @@ -658,7 +660,7 @@ mod tests { rhs_types::{IpRange, RegexFormat}, scheme::{FieldIndex, IndexAccessError, Scheme}, types::ExpectedType, - BytesFormat, + BytesFormat, ParserSettings, }; use cidr::IpCidr; use std::sync::LazyLock; @@ -715,7 +717,7 @@ mod tests { impl FunctionDefinition for FilterFunction { fn check_param( &self, - _: &FilterParser<'_>, + _: &ParserSettings, params: &mut dyn ExactSizeIterator>, next_param: &FunctionParam<'_>, _: Option<&mut FunctionDefinitionContext>, diff --git a/engine/src/ast/function_expr.rs b/engine/src/ast/function_expr.rs index 00f08c43..0bc4a6d0 100644 --- a/engine/src/ast/function_expr.rs +++ b/engine/src/ast/function_expr.rs @@ -418,7 +418,7 @@ impl<'s> FunctionCallExpr<'s> { definition .check_param( - parser, + parser.settings(), &mut args.iter().map(|arg| arg.into()), &next_param, ctx.as_mut(), diff --git a/engine/src/ast/parse.rs b/engine/src/ast/parse.rs index 6a49ee6b..42060461 100644 --- a/engine/src/ast/parse.rs +++ b/engine/src/ast/parse.rs @@ -96,29 +96,56 @@ impl Display for ParseError<'_> { } } +/// Parser settings. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ParserSettings { + /// Approximate size of the cache used by the DFA of a regex. + /// Default: 10MB + pub regex_dfa_size_limit: usize, + /// Approximate size limit of the compiled regular expression. + /// Default: 2MB + pub regex_compiled_size_limit: usize, + /// Maximum number of star metacharacters allowed in a wildcard. + /// Default: unlimited + pub wildcard_star_limit: usize, +} + +impl Default for ParserSettings { + #[inline] + fn default() -> Self { + Self { + // Default value extracted from the regex crate. + regex_compiled_size_limit: 10 * (1 << 20), + // Default value extracted from the regex crate. + regex_dfa_size_limit: 2 * (1 << 20), + wildcard_star_limit: usize::MAX, + } + } +} + /// A structure used to drive parsing of an expression into a [`FilterAst`]. #[derive(Clone, Debug, PartialEq, Eq)] pub struct FilterParser<'s> { pub(crate) scheme: &'s Scheme, - pub(crate) regex_dfa_size_limit: usize, - pub(crate) regex_compiled_size_limit: usize, - pub(crate) wildcard_star_limit: usize, + pub(crate) settings: ParserSettings, } impl<'s> FilterParser<'s> { - /// Creates a new parser with default configuration. + /// Creates a new parser with default settings. #[inline] pub fn new(scheme: &'s Scheme) -> Self { Self { scheme, - // Default value extracted from the regex crate. - regex_compiled_size_limit: 10 * (1 << 20), - // Default value extracted from the regex crate. - regex_dfa_size_limit: 2 * (1 << 20), - wildcard_star_limit: usize::MAX, + settings: ParserSettings::default(), } } + /// Creates a new parser with the specified settings. + #[inline] + pub fn with_settings(scheme: &'s Scheme, settings: ParserSettings) -> Self { + Self { scheme, settings } + } + /// Returns the [`Scheme`](struct@Scheme) for which this parser has been constructor for. #[inline] pub fn scheme(&self) -> &'s Scheme { @@ -143,39 +170,45 @@ impl<'s> FilterParser<'s> { complete(self.lex_as(input.trim())).map_err(|err| ParseError::new(input, err)) } + /// Retrieve parser settings. + #[inline] + pub fn settings(&self) -> &ParserSettings { + &self.settings + } + /// Set the approximate size limit of the compiled regular expression. #[inline] pub fn regex_set_compiled_size_limit(&mut self, regex_compiled_size_limit: usize) { - self.regex_compiled_size_limit = regex_compiled_size_limit; + self.settings.regex_compiled_size_limit = regex_compiled_size_limit; } /// Get the approximate size limit of the compiled regular expression. #[inline] pub fn regex_get_compiled_size_limit(&self) -> usize { - self.regex_compiled_size_limit + self.settings.regex_compiled_size_limit } /// Set the approximate size of the cache used by the DFA of a regex. #[inline] pub fn regex_set_dfa_size_limit(&mut self, regex_dfa_size_limit: usize) { - self.regex_dfa_size_limit = regex_dfa_size_limit; + self.settings.regex_dfa_size_limit = regex_dfa_size_limit; } /// Get the approximate size of the cache used by the DFA of a regex. #[inline] pub fn regex_get_dfa_size_limit(&self) -> usize { - self.regex_dfa_size_limit + self.settings.regex_dfa_size_limit } /// Set the maximum number of star metacharacters allowed in a wildcard. #[inline] pub fn wildcard_set_star_limit(&mut self, wildcard_star_limit: usize) { - self.wildcard_star_limit = wildcard_star_limit; + self.settings.wildcard_star_limit = wildcard_star_limit; } /// Get the maximum number of star metacharacters allowed in a wildcard. #[inline] pub fn wildcard_get_star_limit(&self) -> usize { - self.wildcard_star_limit + self.settings.wildcard_star_limit } } diff --git a/engine/src/functions.rs b/engine/src/functions.rs index 1153c806..476b06d1 100644 --- a/engine/src/functions.rs +++ b/engine/src/functions.rs @@ -1,7 +1,7 @@ use crate::{ filter::CompiledValueResult, types::{ExpectedType, ExpectedTypeList, GetType, LhsValue, RhsValue, Type, TypeMismatchError}, - FilterParser, + ParserSettings, }; use std::any::Any; use std::convert::TryFrom; @@ -371,7 +371,7 @@ pub trait FunctionDefinition: Debug + Send + Sync { /// correct. Return the expected the parameter definition. fn check_param( &self, - parser: &FilterParser<'_>, + settings: &ParserSettings, params: &mut dyn ExactSizeIterator>, next_param: &FunctionParam<'_>, ctx: Option<&mut FunctionDefinitionContext>, @@ -460,7 +460,7 @@ pub struct SimpleFunctionDefinition { impl FunctionDefinition for SimpleFunctionDefinition { fn check_param( &self, - _parser: &FilterParser<'_>, + _settings: &ParserSettings, params: &mut dyn ExactSizeIterator>, next_param: &FunctionParam<'_>, _: Option<&mut FunctionDefinitionContext>, diff --git a/engine/src/lib.rs b/engine/src/lib.rs index 2bf87641..88c9a315 100644 --- a/engine/src/lib.rs +++ b/engine/src/lib.rs @@ -84,7 +84,7 @@ pub use self::{ function_expr::{FunctionCallArgExpr, FunctionCallExpr}, index_expr::IndexExpr, logical_expr::{LogicalExpr, LogicalOp, ParenthesizedExpr, UnaryOp}, - parse::{FilterParser, ParseError}, + parse::{FilterParser, ParseError, ParserSettings}, visitor::{Visitor, VisitorMut}, Expr, FilterAst, FilterValueAst, ValueExpr, }, diff --git a/engine/src/rhs_types/regex/imp_real.rs b/engine/src/rhs_types/regex/imp_real.rs index 8fce3d47..e3a35138 100644 --- a/engine/src/rhs_types/regex/imp_real.rs +++ b/engine/src/rhs_types/regex/imp_real.rs @@ -18,8 +18,8 @@ impl Regex { ) -> Result { ::regex::bytes::RegexBuilder::new(pattern) .unicode(false) - .size_limit(parser.regex_compiled_size_limit) - .dfa_size_limit(parser.regex_dfa_size_limit) + .size_limit(parser.settings.regex_compiled_size_limit) + .dfa_size_limit(parser.settings.regex_dfa_size_limit) .build() .map(|r| Regex { compiled_regex: r, diff --git a/engine/src/rhs_types/wildcard.rs b/engine/src/rhs_types/wildcard.rs index dd14d83c..970b1d5c 100644 --- a/engine/src/rhs_types/wildcard.rs +++ b/engine/src/rhs_types/wildcard.rs @@ -126,7 +126,7 @@ impl Serialize for Wildcard { impl<'i, 's, const STRICT: bool> LexWith<'i, &FilterParser<'s>> for Wildcard { fn lex_with(input: &'i str, parser: &FilterParser<'s>) -> LexResult<'i, Wildcard> { lex_quoted_or_raw_string(input).and_then(|(pattern, rest)| { - match Wildcard::new(pattern, parser.wildcard_star_limit) { + match Wildcard::new(pattern, parser.settings.wildcard_star_limit) { Ok(wildcard) => Ok((wildcard, rest)), Err(err) => Err((LexErrorKind::ParseWildcard(err), input)), } diff --git a/engine/src/scheme.rs b/engine/src/scheme.rs index fb7f098b..2924eb5a 100644 --- a/engine/src/scheme.rs +++ b/engine/src/scheme.rs @@ -1,6 +1,8 @@ use crate::{ - ast::parse::{FilterParser, ParseError}, - ast::{FilterAst, FilterValueAst}, + ast::{ + parse::{FilterParser, ParseError, ParserSettings}, + FilterAst, FilterValueAst, + }, functions::FunctionDefinition, lex::{expect, span, take_while, Lex, LexErrorKind, LexResult, LexWith}, list_matcher::ListDefinition, @@ -536,6 +538,16 @@ impl<'s> Scheme { }) } + /// Creates a new parser with default settings. + pub fn parser(&self) -> FilterParser<'_> { + FilterParser::new(self) + } + + /// Creates a new parser with the specified settings. + pub fn parser_with_settings(&self, settings: ParserSettings) -> FilterParser<'_> { + FilterParser::with_settings(self, settings) + } + /// Parses a filter expression into an AST form. pub fn parse<'i>(&'s self, input: &'i str) -> Result, ParseError<'i>> { FilterParser::new(self).parse(input)