Skip to content

Commit

Permalink
Only pass parser settings when checking function parameters.
Browse files Browse the repository at this point in the history
Passing the whole parser allows to access too many things, like the
scheme and potentially other things we might add in the future.
It also makes writing unit tests more cumbersome since we now have
to build a scheme first just to get a dummy parser.
  • Loading branch information
marmeladema committed Feb 24, 2025
1 parent 8e10e4b commit ca9a4a2
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 29 deletions.
10 changes: 6 additions & 4 deletions engine/src/ast/field_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -645,8 +645,10 @@ impl<'s> Expr<'s> for ComparisonExpr<'s> {
mod tests {
use super::*;
use crate::{
ast::function_expr::{FunctionCallArgExpr, FunctionCallExpr},
ast::logical_expr::LogicalExpr,
ast::{
function_expr::{FunctionCallArgExpr, FunctionCallExpr},
logical_expr::LogicalExpr,
},
execution_context::ExecutionContext,
functions::{
FunctionArgKind, FunctionArgs, FunctionDefinition, FunctionDefinitionContext,
Expand All @@ -658,7 +660,7 @@ mod tests {
rhs_types::{IpRange, RegexFormat},
scheme::{FieldIndex, IndexAccessError, Scheme},
types::ExpectedType,
BytesFormat,
BytesFormat, ParserSettings,
};
use cidr::IpCidr;
use std::sync::LazyLock;
Expand Down Expand Up @@ -715,7 +717,7 @@ mod tests {
impl FunctionDefinition for FilterFunction {
fn check_param(
&self,
_: &FilterParser<'_>,
_: &ParserSettings,
params: &mut dyn ExactSizeIterator<Item = FunctionParam<'_>>,
next_param: &FunctionParam<'_>,
_: Option<&mut FunctionDefinitionContext>,
Expand Down
2 changes: 1 addition & 1 deletion engine/src/ast/function_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ impl<'s> FunctionCallExpr<'s> {

definition
.check_param(
parser,
parser.settings(),
&mut args.iter().map(|arg| arg.into()),
&next_param,
ctx.as_mut(),
Expand Down
63 changes: 48 additions & 15 deletions engine/src/ast/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,29 +96,56 @@ impl Display for ParseError<'_> {
}
}

/// Parser settings.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParserSettings {
/// Approximate size of the cache used by the DFA of a regex.
/// Default: 10MB
pub regex_dfa_size_limit: usize,
/// Approximate size limit of the compiled regular expression.
/// Default: 2MB
pub regex_compiled_size_limit: usize,
/// Maximum number of star metacharacters allowed in a wildcard.
/// Default: unlimited
pub wildcard_star_limit: usize,
}

impl Default for ParserSettings {
#[inline]
fn default() -> Self {
Self {
// Default value extracted from the regex crate.
regex_compiled_size_limit: 10 * (1 << 20),
// Default value extracted from the regex crate.
regex_dfa_size_limit: 2 * (1 << 20),
wildcard_star_limit: usize::MAX,
}
}
}

/// A structure used to drive parsing of an expression into a [`FilterAst`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FilterParser<'s> {
pub(crate) scheme: &'s Scheme,
pub(crate) regex_dfa_size_limit: usize,
pub(crate) regex_compiled_size_limit: usize,
pub(crate) wildcard_star_limit: usize,
pub(crate) settings: ParserSettings,
}

impl<'s> FilterParser<'s> {
/// Creates a new parser with default configuration.
/// Creates a new parser with default settings.
#[inline]
pub fn new(scheme: &'s Scheme) -> Self {
Self {
scheme,
// Default value extracted from the regex crate.
regex_compiled_size_limit: 10 * (1 << 20),
// Default value extracted from the regex crate.
regex_dfa_size_limit: 2 * (1 << 20),
wildcard_star_limit: usize::MAX,
settings: ParserSettings::default(),
}
}

/// Creates a new parser with the specified settings.
#[inline]
pub fn with_settings(scheme: &'s Scheme, settings: ParserSettings) -> Self {
Self { scheme, settings }
}

/// Returns the [`Scheme`](struct@Scheme) for which this parser has been constructor for.
#[inline]
pub fn scheme(&self) -> &'s Scheme {
Expand All @@ -143,39 +170,45 @@ impl<'s> FilterParser<'s> {
complete(self.lex_as(input.trim())).map_err(|err| ParseError::new(input, err))
}

/// Retrieve parser settings.
#[inline]
pub fn settings(&self) -> &ParserSettings {
&self.settings
}

/// Set the approximate size limit of the compiled regular expression.
#[inline]
pub fn regex_set_compiled_size_limit(&mut self, regex_compiled_size_limit: usize) {
self.regex_compiled_size_limit = regex_compiled_size_limit;
self.settings.regex_compiled_size_limit = regex_compiled_size_limit;
}

/// Get the approximate size limit of the compiled regular expression.
#[inline]
pub fn regex_get_compiled_size_limit(&self) -> usize {
self.regex_compiled_size_limit
self.settings.regex_compiled_size_limit
}

/// Set the approximate size of the cache used by the DFA of a regex.
#[inline]
pub fn regex_set_dfa_size_limit(&mut self, regex_dfa_size_limit: usize) {
self.regex_dfa_size_limit = regex_dfa_size_limit;
self.settings.regex_dfa_size_limit = regex_dfa_size_limit;
}

/// Get the approximate size of the cache used by the DFA of a regex.
#[inline]
pub fn regex_get_dfa_size_limit(&self) -> usize {
self.regex_dfa_size_limit
self.settings.regex_dfa_size_limit
}

/// Set the maximum number of star metacharacters allowed in a wildcard.
#[inline]
pub fn wildcard_set_star_limit(&mut self, wildcard_star_limit: usize) {
self.wildcard_star_limit = wildcard_star_limit;
self.settings.wildcard_star_limit = wildcard_star_limit;
}

/// Get the maximum number of star metacharacters allowed in a wildcard.
#[inline]
pub fn wildcard_get_star_limit(&self) -> usize {
self.wildcard_star_limit
self.settings.wildcard_star_limit
}
}
6 changes: 3 additions & 3 deletions engine/src/functions.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
filter::CompiledValueResult,
types::{ExpectedType, ExpectedTypeList, GetType, LhsValue, RhsValue, Type, TypeMismatchError},
FilterParser,
ParserSettings,
};
use std::any::Any;
use std::convert::TryFrom;
Expand Down Expand Up @@ -371,7 +371,7 @@ pub trait FunctionDefinition: Debug + Send + Sync {
/// correct. Return the expected the parameter definition.
fn check_param(
&self,
parser: &FilterParser<'_>,
settings: &ParserSettings,
params: &mut dyn ExactSizeIterator<Item = FunctionParam<'_>>,
next_param: &FunctionParam<'_>,
ctx: Option<&mut FunctionDefinitionContext>,
Expand Down Expand Up @@ -460,7 +460,7 @@ pub struct SimpleFunctionDefinition {
impl FunctionDefinition for SimpleFunctionDefinition {
fn check_param(
&self,
_parser: &FilterParser<'_>,
_settings: &ParserSettings,
params: &mut dyn ExactSizeIterator<Item = FunctionParam<'_>>,
next_param: &FunctionParam<'_>,
_: Option<&mut FunctionDefinitionContext>,
Expand Down
2 changes: 1 addition & 1 deletion engine/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ pub use self::{
function_expr::{FunctionCallArgExpr, FunctionCallExpr},
index_expr::IndexExpr,
logical_expr::{LogicalExpr, LogicalOp, ParenthesizedExpr, UnaryOp},
parse::{FilterParser, ParseError},
parse::{FilterParser, ParseError, ParserSettings},
visitor::{Visitor, VisitorMut},
Expr, FilterAst, FilterValueAst, ValueExpr,
},
Expand Down
4 changes: 2 additions & 2 deletions engine/src/rhs_types/regex/imp_real.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ impl Regex {
) -> Result<Self, Error> {
::regex::bytes::RegexBuilder::new(pattern)
.unicode(false)
.size_limit(parser.regex_compiled_size_limit)
.dfa_size_limit(parser.regex_dfa_size_limit)
.size_limit(parser.settings.regex_compiled_size_limit)
.dfa_size_limit(parser.settings.regex_dfa_size_limit)
.build()
.map(|r| Regex {
compiled_regex: r,
Expand Down
2 changes: 1 addition & 1 deletion engine/src/rhs_types/wildcard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ impl<const STRICT: bool> Serialize for Wildcard<STRICT> {
impl<'i, 's, const STRICT: bool> LexWith<'i, &FilterParser<'s>> for Wildcard<STRICT> {
fn lex_with(input: &'i str, parser: &FilterParser<'s>) -> LexResult<'i, Wildcard<STRICT>> {
lex_quoted_or_raw_string(input).and_then(|(pattern, rest)| {
match Wildcard::new(pattern, parser.wildcard_star_limit) {
match Wildcard::new(pattern, parser.settings.wildcard_star_limit) {
Ok(wildcard) => Ok((wildcard, rest)),
Err(err) => Err((LexErrorKind::ParseWildcard(err), input)),
}
Expand Down
16 changes: 14 additions & 2 deletions engine/src/scheme.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::{
ast::parse::{FilterParser, ParseError},
ast::{FilterAst, FilterValueAst},
ast::{
parse::{FilterParser, ParseError, ParserSettings},
FilterAst, FilterValueAst,
},
functions::FunctionDefinition,
lex::{expect, span, take_while, Lex, LexErrorKind, LexResult, LexWith},
list_matcher::ListDefinition,
Expand Down Expand Up @@ -536,6 +538,16 @@ impl<'s> Scheme {
})
}

/// Creates a new parser with default settings.
pub fn parser(&self) -> FilterParser<'_> {
FilterParser::new(self)
}

/// Creates a new parser with the specified settings.
pub fn parser_with_settings(&self, settings: ParserSettings) -> FilterParser<'_> {
FilterParser::with_settings(self, settings)
}

/// Parses a filter expression into an AST form.
pub fn parse<'i>(&'s self, input: &'i str) -> Result<FilterAst<'s>, ParseError<'i>> {
FilterParser::new(self).parse(input)
Expand Down

0 comments on commit ca9a4a2

Please sign in to comment.