Skip to content

Commit

Permalink
Json to IR functionality moved over to branch
Browse files Browse the repository at this point in the history
  • Loading branch information
AVMatthews committed Jan 8, 2025
1 parent e1f70e7 commit 9a9553b
Show file tree
Hide file tree
Showing 4 changed files with 344 additions and 2 deletions.
132 changes: 131 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
std::cerr << " c - compress" << std::endl;
std::cerr << " x - decompress" << std::endl;
std::cerr << " s - search" << std::endl;
std::cerr << " r - JSON to IR Format" << std::endl;
std::cerr << std::endl;
std::cerr << "Try "
<< " c --help OR"
<< " x --help OR"
<< " s --help for command-specific details." << std::endl;
<< " s --help OR"
<< " r --help for command-specific details." << std::endl;

po::options_description visible_options;
visible_options.add(general_options);
Expand All @@ -125,6 +127,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
case (char)Command::Compress:
case (char)Command::Extract:
case (char)Command::Search:
case (char)Command::JsonToIr:
m_command = (Command)command_input;
break;
default:
Expand Down Expand Up @@ -727,6 +730,129 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"The --count-by-time and --count options are mutually exclusive."
);
}
} else if ((char)Command::JsonToIr == command_input) {
po::options_description compression_positional_options;
// clang-format off
compression_positional_options.add_options()(
"ir-dir",
po::value<std::string>(&m_archives_dir)->value_name("DIR"),
"output directory"
)(
"input-paths",
po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on

po::options_description compression_options("Compression options");
std::string metadata_db_config_file_path;
std::string input_path_list_file_path;
// clang-format off
compression_options.add_options()(
"compression-level",
po::value<int>(&m_compression_level)->value_name("LEVEL")->
default_value(m_compression_level),
"1 (fast/low compression) to 9 (slow/high compression)."
)(
"max-document-size",
po::value<size_t>(&m_max_document_size)->value_name("DOC_SIZE")->
default_value(m_max_document_size),
"Maximum allowed size (B) for a single document before ir generation fails."
)(
"max-ir-buffer-size",
po::value<size_t>(&m_max_ir_buffer_size)->value_name("BUFFER_SIZE")->
default_value(m_max_ir_buffer_size),
"Maximum allowed size (B) for an in memory IR buffer befroe being written to file."
)(
"encoding-type",
po::value<int>(&m_encoding_type)->value_name("ENCODING_TYPE")->
default_value(m_encoding_type),
"4 (four byte encoding) or 8 (eight byte encoding)"
)(
"db-config-file",
po::value<std::string>(&metadata_db_config_file_path)->value_name("FILE")->
default_value(metadata_db_config_file_path),
"Global metadata DB YAML config"
)(
"files-from,f",
po::value<std::string>(&input_path_list_file_path)
->value_name("FILE")
->default_value(input_path_list_file_path),
"Compress files specified in FILE"
);
// clang-format on

po::positional_options_description positional_options;
positional_options.add("ir-dir", 1);
positional_options.add("input-paths", -1);

po::options_description all_compression_options;
all_compression_options.add(compression_options);
all_compression_options.add(compression_positional_options);

std::vector<std::string> unrecognized_options
= po::collect_unrecognized(parsed.options, po::include_positional);
unrecognized_options.erase(unrecognized_options.begin());
po::store(
po::command_line_parser(unrecognized_options)
.options(all_compression_options)
.positional(positional_options)
.run(),
parsed_command_line_options
);
po::notify(parsed_command_line_options);

if (parsed_command_line_options.count("help")) {
print_json_to_ir_usage();

std::cerr << "Examples:\n";
std::cerr << " # Parse file1.json and dir1 into irs-dir\n";
std::cerr << " " << m_program_name << " r irs-dir file1.json dir1\n";

po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(compression_options);
std::cerr << visible_options << '\n';
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No IRs directory specified.");
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

if (m_file_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}

// Parse and validate global metadata DB config
if (false == metadata_db_config_file_path.empty()) {
clp::GlobalMetadataDBConfig metadata_db_config;
try {
metadata_db_config.parse_config_file(metadata_db_config_file_path);
} catch (std::exception& e) {
SPDLOG_ERROR("Failed to validate metadata database config - {}.", e.what());
return ParsingResult::Failure;
}

if (clp::GlobalMetadataDBConfig::MetadataDBType::MySQL
!= metadata_db_config.get_metadata_db_type())
{
SPDLOG_ERROR(
"Invalid metadata database type for {}; only supported type is MySQL.",
m_program_name
);
return ParsingResult::Failure;
}

m_metadata_db_config = std::move(metadata_db_config);
}
}
} catch (std::exception& e) {
SPDLOG_ERROR("{}", e.what());
Expand Down Expand Up @@ -834,6 +960,10 @@ void CommandLineArguments::print_decompression_usage() const {
std::cerr << "Usage: " << m_program_name << " x [OPTIONS] ARCHIVES_DIR OUTPUT_DIR" << std::endl;
}

void CommandLineArguments::print_json_to_ir_usage() const {
std::cerr << "Usage: " << m_program_name << " r [OPTIONS] IRS_DIR [FILE/DIR ...]\n";
}

void CommandLineArguments::print_search_usage() const {
std::cerr << "Usage: " << m_program_name
<< " s [OPTIONS] ARCHIVES_DIR KQL_QUERY"
Expand Down
11 changes: 10 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class CommandLineArguments {
enum class Command : char {
Compress = 'c',
Extract = 'x',
Search = 's'
Search = 's',
JsonToIr = 'r'
};

enum class OutputHandlerType : uint8_t {
Expand Down Expand Up @@ -65,6 +66,10 @@ class CommandLineArguments {

size_t get_max_document_size() const { return m_max_document_size; }

[[nodiscard]] auto get_max_ir_buffer_size() const -> size_t { return m_max_ir_buffer_size; }

[[nodiscard]] auto get_encoding_type() const -> int { return m_encoding_type; }

[[nodiscard]] bool print_archive_stats() const { return m_print_archive_stats; }

std::string const& get_mongodb_uri() const { return m_mongodb_uri; }
Expand Down Expand Up @@ -170,6 +175,8 @@ class CommandLineArguments {

void print_decompression_usage() const;

void print_json_to_ir_usage() const;

void print_search_usage() const;

// Variables
Expand All @@ -192,6 +199,8 @@ class CommandLineArguments {
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
bool m_disable_log_order{false};
FileType m_file_type{FileType::Json};
int m_encoding_type{8};
size_t m_max_ir_buffer_size{512ULL * 1024 * 1024};

// Metadata db variables
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
Expand Down
9 changes: 9 additions & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ struct JsonParserOption {
std::shared_ptr<clp::GlobalMySQLMetadataDB> metadata_db;
};

struct JsonToIrParserOption {
std::vector<std::string> file_paths;
std::string irs_dir;
size_t max_document_size;
size_t max_ir_buffer_size;
int compression_level;
int encoding;
};

class JsonParser {
public:
class OperationFailed : public TraceableException {
Expand Down
Loading

0 comments on commit 9a9553b

Please sign in to comment.