Skip to content

Commit

Permalink
[Annotated Buffers] Improve efficiency (google#7820)
Browse files Browse the repository at this point in the history
* AnnotatedBinaryTextGen switch to ofstream instead of building giant string

* Add --annotate-sparse-vectors to reduce AFB size
  • Loading branch information
dbaileychess authored Feb 7, 2023
1 parent 85aee1f commit 535ead8
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 85 deletions.
4 changes: 2 additions & 2 deletions include/flatbuffers/flatc.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct FlatCOptions {
size_t binary_files_from = std::numeric_limits<size_t>::max();
std::string conform_to_schema;
std::string annotate_schema;
bool annotate_include_vector_contents = true;
bool any_generator = false;
bool print_make_rules = false;
bool raw_binary = false;
Expand Down Expand Up @@ -110,8 +111,7 @@ class FlatCompiler {

void AnnotateBinaries(const uint8_t *binary_schema,
uint64_t binary_schema_size,
const std::string &schema_filename,
const std::vector<std::string> &binary_files);
const FlatCOptions &options);

void ValidateOptions(const FlatCOptions &options);

Expand Down
161 changes: 87 additions & 74 deletions src/annotated_binary_text_gen.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "annotated_binary_text_gen.h"

#include <algorithm>
#include <fstream>
#include <ostream>
#include <sstream>
#include <string>

Expand All @@ -21,6 +23,8 @@ struct OutputConfig {
size_t offset_max_char = 4;

char delimiter = '|';

bool include_vector_contents = true;
};

static std::string ToString(const BinarySectionType type) {
Expand Down Expand Up @@ -83,7 +87,7 @@ static std::string ToValueString(const BinaryRegion &region,
if (region.array_length) {
if (region.type == BinaryRegionType::Uint8 ||
region.type == BinaryRegionType::Unknown) {
// Interpet each value as a ASCII to aid debugging
// Interpret each value as a ASCII to aid debugging
for (uint64_t i = 0; i < region.array_length; ++i) {
const uint8_t c = *(binary + region.offset + i);
s += isprint(c) ? static_cast<char>(c & 0x7F) : '.';
Expand Down Expand Up @@ -257,84 +261,74 @@ static std::string GenerateComment(const BinaryRegionComment &comment,
return s;
}

static std::string GenerateDocumentation(const BinaryRegion &region,
const BinarySection &section,
const uint8_t *binary,
DocContinuation &continuation,
const OutputConfig &output_config) {
std::string s;

static void GenerateDocumentation(std::ostream &os, const BinaryRegion &region,
const BinarySection &section,
const uint8_t *binary,
DocContinuation &continuation,
const OutputConfig &output_config) {
// Check if there is a doc continuation that should be prioritized.
if (continuation.value_start_column) {
s += std::string(continuation.value_start_column - 2, ' ');
s += output_config.delimiter;
s += " ";
os << std::string(continuation.value_start_column - 2, ' ');
os << output_config.delimiter << " ";

s += continuation.value.substr(0, output_config.max_bytes_per_line);
os << continuation.value.substr(0, output_config.max_bytes_per_line);
continuation.value = continuation.value.substr(
std::min(output_config.max_bytes_per_line, continuation.value.size()));
return s;
return;
}

size_t size_of = 0;
{
std::stringstream ss;
ss << std::setw(static_cast<int>(output_config.largest_type_string)) << std::left;
ss << std::setw(static_cast<int>(output_config.largest_type_string))
<< std::left;
ss << GenerateTypeString(region);
s += ss.str();
os << ss.str();
size_of = ss.str().size();
}
s += " ";
s += output_config.delimiter;
s += " ";
os << " " << output_config.delimiter << " ";
if (region.array_length) {
// Record where the value is first being outputted.
continuation.value_start_column = s.size();
continuation.value_start_column = 3 + size_of;

// Get the full-length value, which we will chunk below.
const std::string value = ToValueString(region, binary, output_config);

std::stringstream ss;
ss << std::setw(static_cast<int>(output_config.largest_value_string)) << std::left;
ss << std::setw(static_cast<int>(output_config.largest_value_string))
<< std::left;
ss << value.substr(0, output_config.max_bytes_per_line);
s += ss.str();
os << ss.str();

continuation.value =
value.substr(std::min(output_config.max_bytes_per_line, value.size()));
} else {
std::stringstream ss;
ss << std::setw(static_cast<int>(output_config.largest_value_string)) << std::left;
ss << std::setw(static_cast<int>(output_config.largest_value_string))
<< std::left;
ss << ToValueString(region, binary, output_config);
s += ss.str();
os << ss.str();
}

s += " ";
s += output_config.delimiter;
s += " ";
s += GenerateComment(region.comment, section);

return s;
os << " " << output_config.delimiter << " ";
os << GenerateComment(region.comment, section);
}

static std::string GenerateRegion(const BinaryRegion &region,
const BinarySection &section,
const uint8_t *binary,
const OutputConfig &output_config) {
std::string s;
static void GenerateRegion(std::ostream &os, const BinaryRegion &region,
const BinarySection &section, const uint8_t *binary,
const OutputConfig &output_config) {
bool doc_generated = false;
DocContinuation doc_continuation;
for (uint64_t i = 0; i < region.length; ++i) {
if ((i % output_config.max_bytes_per_line) == 0) {
// Start a new line of output
s += '\n';
s += " ";
s += "+0x";
s += ToHex(region.offset + i, output_config.offset_max_char);
s += " ";
s += output_config.delimiter;
os << std::endl;
os << " +0x" << ToHex(region.offset + i, output_config.offset_max_char);
os << " " << output_config.delimiter;
}

// Add each byte
s += " ";
s += ToHex(binary[region.offset + i]);
os << " " << ToHex(binary[region.offset + i]);

// Check for end of line or end of region conditions.
if (((i + 1) % output_config.max_bytes_per_line == 0) ||
Expand All @@ -344,47 +338,66 @@ static std::string GenerateRegion(const BinaryRegion &region,
// zero those out to align everything globally.
for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
++j) {
s += " ";
os << " ";
}
}
s += " ";
s += output_config.delimiter;
os << " " << output_config.delimiter;
// This is the end of the first line or its the last byte of the region,
// generate the end-of-line documentation.
if (!doc_generated) {
s += " ";
s += GenerateDocumentation(region, section, binary, doc_continuation,
output_config);
os << " ";
GenerateDocumentation(os, region, section, binary, doc_continuation,
output_config);

// If we have a value in the doc continuation, that means the doc is
// being printed on multiple lines.
doc_generated = doc_continuation.value.empty();
}
}
}

return s;
}

static std::string GenerateSection(const BinarySection &section,
const uint8_t *binary,
const OutputConfig &output_config) {
std::string s;
s += "\n";
s += ToString(section.type);
if (!section.name.empty()) { s += " (" + section.name + ")"; }
s += ":";
static void GenerateSection(std::ostream &os, const BinarySection &section,
const uint8_t *binary,
const OutputConfig &output_config) {
os << std::endl;
os << ToString(section.type);
if (!section.name.empty()) { os << " (" + section.name + ")"; }
os << ":";

// As a space saving measure, skip generating every vector element, just put
// the first and last elements in the output. Skip the whole thing if there
// are only two or fewer elements, as it doesn't save space.
if (section.type == BinarySectionType::Vector &&
!output_config.include_vector_contents && section.regions.size() > 3) {
// Generate the length region which should be first.
GenerateRegion(os, section.regions[0], section, binary, output_config);

// Generate the first element.
GenerateRegion(os, section.regions[1], section, binary, output_config);

// Indicate that we omitted lines.
os << std::endl
<< " <" << section.regions.size() - 2 << " regions omitted>";

// Generate the last element.
GenerateRegion(os, section.regions.back(), section, binary, output_config);
os << std::endl;
return;
}

for (const BinaryRegion &region : section.regions) {
s += GenerateRegion(region, section, binary, output_config);
GenerateRegion(os, region, section, binary, output_config);
}
return s;
os << std::endl;
}
} // namespace

bool AnnotatedBinaryTextGenerator::Generate(
const std::string &filename, const std::string &schema_filename) {
OutputConfig output_config;
output_config.max_bytes_per_line = options_.max_bytes_per_line;
output_config.include_vector_contents = options_.include_vector_contents;

// Given the length of the binary, we can calculate the maximum number of
// characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
Expand Down Expand Up @@ -414,27 +427,27 @@ bool AnnotatedBinaryTextGenerator::Generate(
}
}

// Generate each of the binary sections
std::string s;

s += "// Annotated Flatbuffer Binary\n";
s += "//\n";
s += "// Schema file: " + schema_filename + "\n";
s += "// Binary file: " + filename + "\n";

for (const auto &section : annotations_) {
s += GenerateSection(section.second, binary_, output_config);
s += "\n";
}

// Modify the output filename.
std::string output_filename = StripExtension(filename);
output_filename += options_.output_postfix;
output_filename +=
"." + (options_.output_extension.empty() ? GetExtension(filename)
: options_.output_extension);

return SaveFile(output_filename.c_str(), s, false);
std::ofstream ofs(output_filename.c_str());

ofs << "// Annotated Flatbuffer Binary" << std::endl;
ofs << "//" << std::endl;
ofs << "// Schema file: " << schema_filename << std::endl;
ofs << "// Binary file: " << filename << std::endl;

// Generate each of the binary sections
for (const auto &section : annotations_) {
GenerateSection(ofs, section.second, binary_, output_config);
}

ofs.close();
return true;
}

} // namespace flatbuffers
3 changes: 3 additions & 0 deletions src/annotated_binary_text_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class AnnotatedBinaryTextGenerator {
//
// Example: binary1.bin -> binary1.afb
std::string output_extension = "afb";

// Controls.
bool include_vector_contents = true;
};

explicit AnnotatedBinaryTextGenerator(
Expand Down
24 changes: 15 additions & 9 deletions src/flatc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ const static FlatCOption flatc_options[] = {
"ts_entry_points." },
{ "", "ts-entry-points", "",
"Generate entry point typescript per namespace. Implies gen-all." },
{ "", "annotate-sparse-vectors", "", "Don't annotate every vector element."},
{ "", "annotate", "SCHEMA",
"Annotate the provided BINARY_FILE with the specified SCHEMA file." },
{ "", "no-leak-private-annotation", "",
Expand Down Expand Up @@ -371,11 +372,12 @@ std::string FlatCompiler::GetUsageString(
return ss.str();
}

void FlatCompiler::AnnotateBinaries(
const uint8_t *binary_schema, const uint64_t binary_schema_size,
const std::string &schema_filename,
const std::vector<std::string> &binary_files) {
for (const std::string &filename : binary_files) {
void FlatCompiler::AnnotateBinaries(const uint8_t *binary_schema,
const uint64_t binary_schema_size,
const FlatCOptions &options) {
const std::string &schema_filename = options.annotate_schema;

for (const std::string &filename : options.filenames) {
std::string binary_contents;
if (!flatbuffers::LoadFile(filename.c_str(), true, &binary_contents)) {
Warn("unable to load binary file: " + filename);
Expand All @@ -391,13 +393,16 @@ void FlatCompiler::AnnotateBinaries(

auto annotations = binary_annotator.Annotate();

flatbuffers::AnnotatedBinaryTextGenerator::Options text_gen_opts;
text_gen_opts.include_vector_contents =
options.annotate_include_vector_contents;

// TODO(dbaileychess): Right now we just support a single text-based
// output of the annotated binary schema, which we generate here. We
// could output the raw annotations instead and have third-party tools
// use them to generate their own output.
flatbuffers::AnnotatedBinaryTextGenerator text_generator(
flatbuffers::AnnotatedBinaryTextGenerator::Options{}, annotations,
binary, binary_size);
text_gen_opts, annotations, binary, binary_size);

text_generator.Generate(filename, schema_filename);
}
Expand Down Expand Up @@ -641,6 +646,8 @@ FlatCOptions FlatCompiler::ParseFromCommandLineArguments(int argc,
opts.ts_no_import_ext = true;
} else if (arg == "--no-leak-private-annotation") {
opts.no_leak_private_annotations = true;
} else if (arg == "--annotate-sparse-vectors") {
options.annotate_include_vector_contents = false;
} else if (arg == "--annotate") {
if (++argi >= argc) Error("missing path following: " + arg, true);
options.annotate_schema = flatbuffers::PosixPath(argv[argi]);
Expand Down Expand Up @@ -939,8 +946,7 @@ int FlatCompiler::Compile(const FlatCOptions &options) {
}

// Annotate the provided files with the binary_schema.
AnnotateBinaries(binary_schema, binary_schema_size, options.annotate_schema,
options.filenames);
AnnotateBinaries(binary_schema, binary_schema_size, options);

// We don't support doing anything else after annotating a binary.
return 0;
Expand Down

0 comments on commit 535ead8

Please sign in to comment.