diff --git a/libyara/re.c b/libyara/re.c index e128e3065a..4edf441529 100644 --- a/libyara/re.c +++ b/libyara/re.c @@ -214,7 +214,11 @@ void yr_re_ast_destroy(RE_AST* re_ast) // Parses a regexp but don't emit its code. A further call to // yr_re_ast_emit_code is required to get the code. // -int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error, int flags) +int yr_re_parse( + const char* re_string, + RE_AST** re_ast, + RE_ERROR* error, + int flags) { return yr_parse_re_string(re_string, re_ast, error, flags); } @@ -1725,6 +1729,9 @@ int yr_re_exec( int kill; int action; + bool prev_is_word_char = false; + bool input_is_word_char = false; + #define ACTION_NONE 0 #define ACTION_CONTINUE 1 #define ACTION_KILL 2 @@ -1940,27 +1947,30 @@ int yr_re_exec( case RE_OPCODE_WORD_BOUNDARY: case RE_OPCODE_NON_WORD_BOUNDARY: - - if (bytes_matched == 0 && input_backwards_size < character_size) + if (input - input_incr + character_size <= input_data + input_forwards_size && + input - input_incr >= input_data - input_backwards_size) { - match = true; + prev_is_word_char = _yr_re_is_word_char( + input - input_incr, character_size); } - else if (bytes_matched >= max_bytes_matched) + else { - match = true; + prev_is_word_char = false; + } + + if (input + character_size <= input_data + input_forwards_size && + input >= input_data - input_backwards_size) + { + input_is_word_char = _yr_re_is_word_char(input, character_size); } else { - assert(input < input_data + input_forwards_size); - assert(input >= input_data - input_backwards_size); - - assert(input - input_incr < input_data + input_forwards_size); - assert(input - input_incr >= input_data - input_backwards_size); - - match = _yr_re_is_word_char(input, character_size) != - _yr_re_is_word_char(input - input_incr, character_size); + input_is_word_char = false; } + match = (prev_is_word_char && !input_is_word_char) || + (!prev_is_word_char && input_is_word_char); + if (*ip == RE_OPCODE_NON_WORD_BOUNDARY) match = !match; diff --git a/libyara/scan.c b/libyara/scan.c index b1ff00fcc5..5e4c8dc308 100644 --- a/libyara/scan.c +++ b/libyara/scan.c @@ -859,7 +859,7 @@ static int _yr_scan_verify_re_match( (void*) &callback_args, NULL)); } - else if (callback_args.forward_matches > 0) + else if (callback_args.forward_matches >= 0) { FAIL_ON_ERROR( _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); @@ -892,7 +892,7 @@ static int _yr_scan_verify_re_match( (void*) &callback_args, NULL)); } - else if (callback_args.forward_matches > 0) + else if (callback_args.forward_matches >= 0) { FAIL_ON_ERROR( _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); diff --git a/tests/test-rules.c b/tests/test-rules.c index 5fd928fb1b..5545df330f 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -2435,15 +2435,15 @@ void test_re() "rule test { strings: $a = /abc\\b/ wide condition: $a }", TEXT_1024_BYTES "a\0b\0c\0b\t"); - assert_false_rule_blob( + assert_false_rule( "rule test { strings: $a = /\\b/ wide condition: $a }", TEXT_1024_BYTES "abc"); - assert_true_rule_blob( + assert_true_rule( "rule test { condition: \"avb\" matches /a\\vb/ }", TEXT_1024_BYTES "rule test { condition: \"avb\" matches /a\\vb/ }"); - assert_false_rule_blob( + assert_false_rule( "rule test { condition: \"ab\" matches /a\\vb/ }", TEXT_1024_BYTES "rule test { condition: \"ab\" matches /a\\vb/ }"); @@ -2638,8 +2638,10 @@ void test_re() assert_false_regexp("^(ab|cd)e", "abcde"); assert_true_regexp("(abc|)ef", "abcdef", "ef"); assert_true_regexp("(abc|)ef", "abcef", "abcef"); + assert_true_regexp("(abc|)", "foo", ""); assert_true_regexp("\\babc", "abc", "abc"); assert_true_regexp("abc\\b", "abc", "abc"); + assert_true_regexp("\\b", "abc", ""); assert_false_regexp("\\babc", "1abc"); assert_false_regexp("abc\\b", "abc1"); assert_true_regexp("abc\\s\\b", "abc x", "abc ");