Skip to content

Commit

Permalink
Fix issue with regexps not matching empty strings.
Browse files Browse the repository at this point in the history
Some regexps like `/(abc|)/` where not matching the empty string even if they should. Matching an empty string is not very useful, as such regexp will match on every offset of every file, but this is fixed for correctness.

Also fixes another issue with the `\b` metacharacter in regexps, which was exposed after fixing the main issue.
  • Loading branch information
plusvic committed Jan 15, 2024
1 parent bb28999 commit 79794c4
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 19 deletions.
38 changes: 24 additions & 14 deletions libyara/re.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,11 @@ void yr_re_ast_destroy(RE_AST* re_ast)
// Parses a regexp but don't emit its code. A further call to
// yr_re_ast_emit_code is required to get the code.
//
int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error, int flags)
int yr_re_parse(
const char* re_string,
RE_AST** re_ast,
RE_ERROR* error,
int flags)
{
return yr_parse_re_string(re_string, re_ast, error, flags);
}
Expand Down Expand Up @@ -1725,6 +1729,9 @@ int yr_re_exec(
int kill;
int action;

bool prev_is_word_char = false;
bool input_is_word_char = false;

#define ACTION_NONE 0
#define ACTION_CONTINUE 1
#define ACTION_KILL 2
Expand Down Expand Up @@ -1940,27 +1947,30 @@ int yr_re_exec(

case RE_OPCODE_WORD_BOUNDARY:
case RE_OPCODE_NON_WORD_BOUNDARY:

if (bytes_matched == 0 && input_backwards_size < character_size)
if (input - input_incr + character_size <= input_data + input_forwards_size &&
input - input_incr >= input_data - input_backwards_size)
{
match = true;
prev_is_word_char = _yr_re_is_word_char(
input - input_incr, character_size);
}
else if (bytes_matched >= max_bytes_matched)
else
{
match = true;
prev_is_word_char = false;
}

if (input + character_size <= input_data + input_forwards_size &&
input >= input_data - input_backwards_size)
{
input_is_word_char = _yr_re_is_word_char(input, character_size);
}
else
{
assert(input < input_data + input_forwards_size);
assert(input >= input_data - input_backwards_size);

assert(input - input_incr < input_data + input_forwards_size);
assert(input - input_incr >= input_data - input_backwards_size);

match = _yr_re_is_word_char(input, character_size) !=
_yr_re_is_word_char(input - input_incr, character_size);
input_is_word_char = false;
}

match = (prev_is_word_char && !input_is_word_char) ||
(!prev_is_word_char && input_is_word_char);

if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
match = !match;

Expand Down
4 changes: 2 additions & 2 deletions libyara/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ static int _yr_scan_verify_re_match(
(void*) &callback_args,
NULL));
}
else if (callback_args.forward_matches > 0)
else if (callback_args.forward_matches >= 0)
{
FAIL_ON_ERROR(
_yr_scan_match_callback(data + offset, 0, flags, &callback_args));
Expand Down Expand Up @@ -892,7 +892,7 @@ static int _yr_scan_verify_re_match(
(void*) &callback_args,
NULL));
}
else if (callback_args.forward_matches > 0)
else if (callback_args.forward_matches >= 0)
{
FAIL_ON_ERROR(
_yr_scan_match_callback(data + offset, 0, flags, &callback_args));
Expand Down
8 changes: 5 additions & 3 deletions tests/test-rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -2435,15 +2435,15 @@ void test_re()
"rule test { strings: $a = /abc\\b/ wide condition: $a }",
TEXT_1024_BYTES "a\0b\0c\0b\t");

assert_false_rule_blob(
assert_false_rule(
"rule test { strings: $a = /\\b/ wide condition: $a }",
TEXT_1024_BYTES "abc");

assert_true_rule_blob(
assert_true_rule(
"rule test { condition: \"avb\" matches /a\\vb/ }",
TEXT_1024_BYTES "rule test { condition: \"avb\" matches /a\\vb/ }");

assert_false_rule_blob(
assert_false_rule(
"rule test { condition: \"ab\" matches /a\\vb/ }",
TEXT_1024_BYTES "rule test { condition: \"ab\" matches /a\\vb/ }");

Expand Down Expand Up @@ -2638,8 +2638,10 @@ void test_re()
assert_false_regexp("^(ab|cd)e", "abcde");
assert_true_regexp("(abc|)ef", "abcdef", "ef");
assert_true_regexp("(abc|)ef", "abcef", "abcef");
assert_true_regexp("(abc|)", "foo", "");
assert_true_regexp("\\babc", "abc", "abc");
assert_true_regexp("abc\\b", "abc", "abc");
assert_true_regexp("\\b", "abc", "");
assert_false_regexp("\\babc", "1abc");
assert_false_regexp("abc\\b", "abc1");
assert_true_regexp("abc\\s\\b", "abc x", "abc ");
Expand Down

0 comments on commit 79794c4

Please sign in to comment.