Skip to content

Commit

Permalink
Added examples
Browse files Browse the repository at this point in the history
  • Loading branch information
anuragkh committed May 22, 2016
1 parent 419f561 commit 1e4ecba
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake-modules)

# Builds all of Succinct's sub-modules
add_subdirectory(core)
add_subdirectory(construct)
add_subdirectory(examples)
add_subdirectory(sharded)
add_subdirectory(sharded-kv)
add_subdirectory(bench)
Expand Down
4 changes: 2 additions & 2 deletions bench/src/regex_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <time.h>
#include <sys/time.h>

#include "../../core/include/regex/regex.h"
#include "../../core/include/succinct_file.h"
#include "regex/regex.h"
#include "succinct_file.h"

// Debug
void display(RegEx *re) {
Expand Down
10 changes: 7 additions & 3 deletions construct/CMakeLists.txt → examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 2.8)
project(succinct-construct CXX)
project(succinct-examples CXX)

include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11)
Expand All @@ -21,6 +21,10 @@ file(MAKE_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})

set(INCLUDE include ../core/include)
include_directories(${INCLUDE})
add_executable(construct src/construct.cc)
add_executable(compress src/compress.cc)
add_executable(query_file src/query_file.cc)
add_executable(query_kv src/query_kv.cc)

target_link_libraries(construct succinct)
target_link_libraries(compress succinct)
target_link_libraries(query_file succinct)
target_link_libraries(query_kv succinct)
58 changes: 52 additions & 6 deletions construct/src/construct.cc → examples/src/compress.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,26 @@
#include <unistd.h>

#include "succinct_shard.h"
#include "succinct_file.h"
#include "npa/npa.h"

/**
* Example program that takes an input file and compresses it using Succinct.
*/

/**
* Prints usage
*/
void print_usage(char *exec) {
fprintf(
stderr,
"Usage: %s [-s sa_sampling_rate] [-i isa_sampling_rate] [-x sampling_scheme] [-n npa_sampling_rate] [-r npa_encoding_scheme] [file]\n",
"Usage: %s [-s sa_sampling_rate] [-i isa_sampling_rate] [-x sampling_scheme] [-n npa_sampling_rate] [-r npa_encoding_scheme] [-t input_type] [file]\n",
exec);
}

/**
* Converts integer option to SamplingScheme
*/
SamplingScheme SamplingSchemeFromOption(int opt) {
switch (opt) {
case 0: {
Expand All @@ -38,6 +49,9 @@ SamplingScheme SamplingSchemeFromOption(int opt) {
}
}

/**
* Converts integer option to NPAEncodingScheme
*/
NPA::NPAEncodingScheme EncodingSchemeFromOption(int opt) {
switch (opt) {
case 0: {
Expand All @@ -60,6 +74,9 @@ NPA::NPAEncodingScheme EncodingSchemeFromOption(int opt) {
}

int main(int argc, char **argv) {
// Logic to parse command line arguments

// starts here ==>
if (argc < 2 || argc > 12) {
print_usage(argv[0]);
return -1;
Expand All @@ -72,8 +89,9 @@ int main(int argc, char **argv) {
SamplingScheme sampling_scheme = SamplingScheme::FLAT_SAMPLE_BY_INDEX;
NPA::NPAEncodingScheme npa_encoding_scheme =
NPA::NPAEncodingScheme::ELIAS_GAMMA_ENCODED;
std::string type = "file";

while ((c = getopt(argc, argv, "s:i:x:n:r:")) != -1) {
while ((c = getopt(argc, argv, "s:i:x:n:r:t:")) != -1) {
switch (c) {
case 's': {
sa_sampling_rate = atoi(optarg);
Expand All @@ -95,6 +113,10 @@ int main(int argc, char **argv) {
npa_encoding_scheme = EncodingSchemeFromOption(atoi(optarg));
break;
}
case 't': {
type = optarg;
break;
}
default: {
fprintf(stderr, "Error parsing options\n");
exit(-1);
Expand All @@ -106,17 +128,41 @@ int main(int argc, char **argv) {
print_usage(argv[0]);
return -1;
}

std::string inputpath = std::string(argv[optind]);
// <== ends here

SuccinctShard *fd = new SuccinctShard(0, inputpath,
if (type == "file") {
// The following compresses an input file at "inputpath" in memory
// as a flat file (no structure) using the compression parameters
// passed in (sampling rates, etc.).
// Leave the arguments unspecified to use default values.
SuccinctFile *fd = new SuccinctFile(inputpath,
SuccinctMode::CONSTRUCT_IN_MEMORY,
sa_sampling_rate, isa_sampling_rate,
npa_sampling_rate, sampling_scheme,
sampling_scheme, npa_encoding_scheme);
// Serialize
fd->Serialize();

fprintf(stderr, "Shard Size = %lu\n", fd->StorageSize());
// Serialize the compressed representation to disk at the location <inputpath>.succinct
fd->Serialize();
delete fd;
} else if (type == "kv") {
// The following compresses an input file at "inputpath" in memory
// as a buffer containing key-value pairs. It uses newline '\n' to
// differentiate between successive values, and assigns the line number
// as the key for the corresponding value.
SuccinctShard *fd = new SuccinctShard(0, inputpath,
SuccinctMode::CONSTRUCT_IN_MEMORY,
sa_sampling_rate, isa_sampling_rate,
npa_sampling_rate, sampling_scheme,
sampling_scheme, npa_encoding_scheme);

// Serialize the compressed representation to disk at the location <inputpath>.succinct
fd->Serialize();
delete fd;
} else {
fprintf(stderr, "Invalid type: %s\n", type.c_str());
}

return 0;
}
131 changes: 131 additions & 0 deletions examples/src/query_file.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#include <cstdio>
#include <iostream>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <sstream>

#include "succinct_file.h"

/**
* Prints usage.
*/
void print_usage(char *exec) {
fprintf(stderr, "Usage: %s [-m mode] [file]\n", exec);
}

void print_valid_cmds() {
std::cerr
<< "Command must be one of: search [query], count [query], extract [offset] [length]\n";
}

typedef unsigned long long int timestamp_t;

static timestamp_t get_timestamp() {
struct timeval now;
gettimeofday(&now, NULL);

return (now.tv_usec + (time_t) now.tv_sec * 1000000);
}

int main(int argc, char **argv) {
if (argc < 2 || argc > 5) {
print_usage(argv[0]);
return -1;
}

int c;
uint32_t mode = 0;
while ((c = getopt(argc, argv, "m:")) != -1) {
switch (c) {
case 'm':
mode = atoi(optarg);
break;
default:
fprintf(stderr, "Invalid option %c\n", c);
exit(0);
}
}

if (optind == argc) {
print_usage(argv[0]);
return -1;
}

std::string filename = std::string(argv[optind]);

SuccinctFile *s_file = NULL;
if (mode == 0) {
// If mode is set to 0, compress the input file.
// Use default parameters.
std::cout << "Constructing Succinct data structures...\n";
s_file = new SuccinctFile(filename);

std::cout << "Serializing Succinct data structures...\n";
s_file->Serialize();
} else {
// If mode is set to 1, read the serialized data structures from disk.
// The serialized data structures must exist at <filename>.succinct.
std::cout << "De-serializing Succinct data structures...\n";
s_file = new SuccinctFile(filename, SuccinctMode::LOAD_IN_MEMORY);
}

std::cout << "Done. Starting Succinct Shell...\n";

print_valid_cmds();
while (true) {
char cmd_line[500];
std::cout << "succinct> ";
std::cin.getline(cmd_line, sizeof(cmd_line));
std::istringstream iss(cmd_line);
std::string cmd, arg;
int64_t offset, length;
if (!(iss >> cmd)) {
std::cerr << "Could not parse command: " << cmd_line << "\n";
continue;
}

if (cmd == "search") {
if (!(iss >> arg)) {
std::cerr << "Could not parse argument: " << cmd_line << "\n";
continue;
}
std::vector<int64_t> results;
timestamp_t start = get_timestamp();
s_file->Search(results, arg);
timestamp_t tot_time = get_timestamp() - start;
std::cout << "Found " << results.size() << " results in " << tot_time
<< "us:\n";
for (auto res : results) {
std::cout << res << ", ";
}
std::cout << std::endl;
} else if (cmd == "count") {
if (!(iss >> arg)) {
std::cerr << "Could not parse argument: " << cmd_line << "\n";
continue;
}
timestamp_t start = get_timestamp();
int64_t count = s_file->Count(arg);
timestamp_t tot_time = get_timestamp() - start;
std::cout << "Count = " << count << "; Time taken: " << tot_time
<< "us\n";
} else if (cmd == "extract") {
if (!(iss >> offset >> length)) {
std::cerr << "Could not parse argument: " << cmd_line << "\n";
continue;
}
timestamp_t start = get_timestamp();
std::string result;
s_file->Extract(result, offset, length);
timestamp_t tot_time = get_timestamp() - start;
std::cout << "Extracted string = " << result << "; Time taken: "
<< tot_time << "us\n";
} else {
std::cerr << "Unsupported command: " << cmd << std::endl;
print_valid_cmds();
}
}

return 0;
}
Loading

0 comments on commit 1e4ecba

Please sign in to comment.