Skip to content

Commit

Permalink
Allow adding delimiter to Schema object; Allow using LogParser as Que…
Browse files Browse the repository at this point in the history
…ryParser; Fix wildcard bug in timestamp schema
  • Loading branch information
SharafMohamed committed Aug 9, 2024
1 parent 0b9e45c commit 97731ae
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 38 deletions.
40 changes: 22 additions & 18 deletions src/log_surgeon/LogParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ using finite_automata::RegexASTOr;
using finite_automata::RegexDFAByteState;
using finite_automata::RegexNFAByteState;

LogParser::LogParser(string const& schema_file_path)
: LogParser::LogParser(SchemaParser::try_schema_file(schema_file_path)) {}
LogParser::LogParser(string const& schema_file_path, bool query_parser)
: LogParser::LogParser(SchemaParser::try_schema_file(schema_file_path), query_parser) {}

LogParser::LogParser(std::unique_ptr<SchemaAST> schema_ast) {
add_rules(std::move(schema_ast));
LogParser::LogParser(std::unique_ptr<SchemaAST> schema_ast, bool query_parser) {
add_rules(std::move(schema_ast), query_parser);
m_lexer.generate();
m_log_event_view = make_unique<LogEventView>(*this);
}
Expand All @@ -43,7 +43,7 @@ auto LogParser::add_delimiters(unique_ptr<ParserAST> const& delimiters) -> void
}
}

void LogParser::add_rules(std::unique_ptr<SchemaAST> schema_ast) {
void LogParser::add_rules(std::unique_ptr<SchemaAST> schema_ast, bool query_parser) {
for (auto const& delimiters : schema_ast->m_delimiters) {
add_delimiters(delimiters);
}
Expand All @@ -57,10 +57,14 @@ void LogParser::add_rules(std::unique_ptr<SchemaAST> schema_ast) {
if (delimiters.empty()) {
throw runtime_error("When using --schema-path, \"delimiters:\" line must be used.");
}
add_token("newLine", '\n');
if(false == query_parser) {
add_token("newLine", '\n');
}
for (unique_ptr<ParserAST> const& parser_ast : schema_ast->m_schema_vars) {
auto* rule = dynamic_cast<SchemaVarAST*>(parser_ast.get());
if (rule->m_name == "timestamp") {
// transform '.' from any-character into any non-delimiter character
rule->m_regex_ptr->remove_delimiters_from_wildcard(delimiters);
if (rule->m_name == "timestamp" && false == query_parser) {
unique_ptr<RegexAST<RegexNFAByteState>> first_timestamp_regex_ast(
rule->m_regex_ptr->clone()
);
Expand Down Expand Up @@ -88,8 +92,6 @@ void LogParser::add_rules(std::unique_ptr<SchemaAST> schema_ast) {
// prevent timestamps from going into the dictionary
continue;
}
// transform '.' from any-character into any non-delimiter character
rule->m_regex_ptr->remove_delimiters_from_wildcard(delimiters);
// currently, error out if non-timestamp pattern contains a delimiter
// check if regex contains a delimiter
bool is_possible_input[cUnicodeMax] = {false};
Expand All @@ -103,7 +105,7 @@ void LogParser::add_rules(std::unique_ptr<SchemaAST> schema_ast) {
break;
}
}
if (contains_delimiter) {
if (contains_delimiter && "timestamp" != rule->m_name) {
FileReader schema_reader;
ErrorCode error_code = schema_reader.try_open(schema_ast->m_file_path);
if (ErrorCode::Success != error_code) {
Expand Down Expand Up @@ -137,14 +139,16 @@ void LogParser::add_rules(std::unique_ptr<SchemaAST> schema_ast) {
+ arrows + "\n"
);
}
unique_ptr<RegexASTGroup<RegexNFAByteState>> delimiter_group
= make_unique<RegexASTGroup<RegexNFAByteState>>(
RegexASTGroup<RegexNFAByteState>(delimiters)
);
rule->m_regex_ptr = make_unique<RegexASTCat<RegexNFAByteState>>(
std::move(delimiter_group),
std::move(rule->m_regex_ptr)
);
if (false == query_parser) {
unique_ptr<RegexASTGroup<RegexNFAByteState>> delimiter_group
= make_unique<RegexASTGroup<RegexNFAByteState>>(
RegexASTGroup<RegexNFAByteState>(delimiters)
);
rule->m_regex_ptr = make_unique<RegexASTCat<RegexNFAByteState>>(
std::move(delimiter_group),
std::move(rule->m_regex_ptr)
);
}
add_rule(rule->m_name, std::move(rule->m_regex_ptr));
}
}
Expand Down
12 changes: 9 additions & 3 deletions src/log_surgeon/LogParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,24 @@ class LogParser
/**
* Constructs the parser using the given schema file.
* @param schema_file_path
* @param query_parser
* @throw std::runtime_error from LALR1Parser, RegexAST, or Lexer
* describing the failure parsing the schema file or processing the schema
* AST.
*/
explicit LogParser(std::string const& schema_file_path);
explicit LogParser(std::string const& schema_file_path, bool query_parser = false);

/**
* Constructs the parser using the given schema AST.
* @param schema_ast
* @param query_parser
* @throw std::runtime_error from LALR1Parser, RegexAST, or Lexer
* describing the failure processing the schema AST.
*/
explicit LogParser(std::unique_ptr<log_surgeon::SchemaAST> schema_ast);
explicit LogParser(
std::unique_ptr<log_surgeon::SchemaAST> schema_ast,
bool query_parser = false
);

/**
* Returns the parser to its initial state, clearing any existing
Expand Down Expand Up @@ -157,8 +162,9 @@ class LogParser
* specified in the schema AST.
* @param schema_ast The AST from which parsing and lexing rules are
* generated.
* @param query_parser
*/
auto add_rules(std::unique_ptr<SchemaAST> schema_ast) -> void;
auto add_rules(std::unique_ptr<SchemaAST> schema_ast, bool query_parser = false) -> void;

// TODO: move ownership of the buffer to the lexer
ParserInputBuffer m_input_buffer;
Expand Down
12 changes: 7 additions & 5 deletions src/log_surgeon/Schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ Schema::Schema() {
Schema::Schema(std::string const& schema_file_path)
: m_schema_ast{SchemaParser::try_schema_file(schema_file_path)} {}

auto Schema::add_variable(std::string const& var_name, std::string const& regex, int priority)
-> void {
std::string unparsed_string = var_name + ":" + regex;
std::unique_ptr<SchemaAST> schema_ast = SchemaParser::try_schema_string(unparsed_string);
m_schema_ast->add_schema_var(std::move(schema_ast->m_schema_vars[0]), priority);
auto Schema::add_schema_line(std::string const& schema_line, int priority) -> void {
std::unique_ptr<SchemaAST> schema_ast = SchemaParser::try_schema_string(schema_line);
if ("delimiters" == schema_line.substr(0, std::string("delimiters").size())) {
m_schema_ast->add_delimiters(std::move(schema_ast->m_delimiters[0]));
} else {
m_schema_ast->add_schema_var(std::move(schema_ast->m_schema_vars[0]), priority);
}
}
} // namespace log_surgeon
18 changes: 6 additions & 12 deletions src/log_surgeon/Schema.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ class Schema {
explicit Schema(std::string const& schema_file_path);

/**
* Parses var_name+":"+regex as if it were its own entire schema file. Then
* extracts the SchemaVarAST from the resulting SchemaAST and adds it to
* m_schema_vars in m_schema_ast. Position in m_schema_vars is determined by
* the priority (priority == -1 to set to lowest).
* @param var_name
* Parses schema line as if it were its own entire schema file. Then extracts the
* SchemaVarAST/DelimiterStringAST from the resulting SchemaAST and adds it to
* m_schema_vars/m_delimiters in m_schema_ast. Position in m_schema_vars is determined by the
* priority (priority == -1 to set to lowest).
* @param schema_line
* @param regex
* @param priority
*/
auto add_variable(std::string const& var_name, std::string const& regex, int priority) -> void;
auto add_schema_line(std::string const& schema_line, int priority) -> void;

/* Work in progress API to modify a schema object
Expand All @@ -41,18 +41,12 @@ class Schema {
auto set_variables (std::map<std::string, std::string> variables) -> void;
auto add_delimiter (char delimiter) -> void;
auto remove_delimiter (char delimiter) -> void;
auto add_delimiters (std::vector<char> delimiter) -> void;
auto remove_delimiters (std::vector<char> delimiter) -> void;
auto remove_all_delimiters () -> void;
auto set_delimiters (std::vector<char> delimiters) -> void;
auto clear ();
*/

Expand Down

0 comments on commit 97731ae

Please sign in to comment.