Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix broken instance locations on orphan parent schemas #1538

Merged
merged 4 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions benchmark/jsonschema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,20 @@ static void Schema_Frame_OMC_Full(benchmark::State &state) {
}
}

static void Schema_Frame_OMC_References(benchmark::State &state) {
const auto schema{
sourcemeta::core::read_json(std::filesystem::path{CURRENT_DIRECTORY} /
"schemas" / "2019_09_omc_json_v2.json")};

for (auto _ : state) {
sourcemeta::core::SchemaFrame frame{
sourcemeta::core::SchemaFrame::Mode::References};
frame.analyse(schema, sourcemeta::core::schema_official_walker,
sourcemeta::core::schema_official_resolver);
benchmark::DoNotOptimize(frame);
}
}

static void Schema_Bundle_Meta_2020_12(benchmark::State &state) {
for (auto _ : state) {
state.PauseTiming();
Expand All @@ -34,4 +48,5 @@ static void Schema_Bundle_Meta_2020_12(benchmark::State &state) {
}

BENCHMARK(Schema_Frame_OMC_Full);
BENCHMARK(Schema_Frame_OMC_References);
BENCHMARK(Schema_Bundle_Meta_2020_12);
2 changes: 1 addition & 1 deletion src/core/jsonschema/bundle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ auto bundle(sourcemeta::core::JSON &schema, const SchemaWalker &walker,
const auto vocabularies{
sourcemeta::core::vocabularies(schema, resolver, default_dialect)};
sourcemeta::core::SchemaFrame frame{
sourcemeta::core::SchemaFrame::Mode::Full};
sourcemeta::core::SchemaFrame::Mode::References};
bundle_schema(schema, definitions_keyword(vocabularies), schema, frame,
walker, resolver, default_dialect);
}
Expand Down
124 changes: 107 additions & 17 deletions src/core/jsonschema/frame.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,89 @@ struct CacheSubschema {
const std::optional<sourcemeta::core::Pointer> parent;
};

auto internal_analyse(const sourcemeta::core::SchemaFrame::Mode,
// TODO: The fact this lookup algorithm is O(N) is the main
// performance problem of framing
static auto find_subschema_by_pointer(
const sourcemeta::core::SchemaFrame::Locations &locations,
const sourcemeta::core::Pointer &pointer)
-> std::optional<std::reference_wrapper<
const sourcemeta::core::SchemaFrame::LocationsEntry>> {
for (const auto &location : locations) {
if (location.second.type !=
sourcemeta::core::SchemaFrame::LocationType::Resource &&
location.second.type !=
sourcemeta::core::SchemaFrame::LocationType::Subschema) {
continue;
}

if (location.second.pointer == pointer) {
return location.second;
}
}

return std::nullopt;
}

static auto repopulate_instance_locations(
const std::map<sourcemeta::core::Pointer, CacheSubschema> &cache,
sourcemeta::core::SchemaFrame::Locations &locations,
const sourcemeta::core::Pointer &, const CacheSubschema &cache_entry,
// This is the output
const sourcemeta::core::Pointer &output,
const std::optional<sourcemeta::core::PointerTemplate> &accumulator)
-> void {
if (cache_entry.orphan
// TODO: Implement an .empty() method
&& cache_entry.instance_location == sourcemeta::core::PointerTemplate{}) {
return;
} else if (cache_entry.parent.has_value()) {
const auto parent_location{
find_subschema_by_pointer(locations, cache_entry.parent.value())};
assert(parent_location.has_value());
for (const auto &parent_instance_location :
parent_location.value().get().instance_locations) {
// Guard against overly unrolling recursive schemas
if (parent_instance_location == cache_entry.instance_location) {
continue;
}

auto new_accumulator = cache_entry.relative_instance_location;
if (accumulator.has_value()) {
for (const auto &token : accumulator.value()) {
new_accumulator.emplace_back(token);
}
}

auto result = parent_instance_location;
for (const auto &token : new_accumulator) {
result.emplace_back(token);
}

// TODO: Look for the output locations once before calling this function
for (auto &location : locations) {
if (location.second.type !=
sourcemeta::core::SchemaFrame::LocationType::Resource &&
location.second.type !=
sourcemeta::core::SchemaFrame::LocationType::Subschema) {
continue;
}

if (location.second.pointer == output &&
std::find(location.second.instance_locations.cbegin(),
location.second.instance_locations.cend(),
result) == location.second.instance_locations.cend()) {
location.second.instance_locations.push_back(result);
}
}

repopulate_instance_locations(
cache, locations, cache_entry.parent.value(),
cache.at(cache_entry.parent.value()), output, new_accumulator);
}
}
}

auto internal_analyse(const sourcemeta::core::SchemaFrame::Mode mode,
const sourcemeta::core::JSON &schema,
sourcemeta::core::SchemaFrame::Locations &frame,
sourcemeta::core::SchemaFrame::References &references,
Expand Down Expand Up @@ -780,29 +862,37 @@ auto internal_analyse(const sourcemeta::core::SchemaFrame::Mode,
}
}

// We only care about marking reference origins from/to resources and
// subschemas
if (mode == sourcemeta::core::SchemaFrame::Mode::Full) {
// We only care about marking reference origins from/to resources and
// subschemas

for (const auto &entry : frame) {
if (entry.second.type != SchemaFrame::LocationType::Resource) {
continue;
for (const auto &entry : frame) {
if (entry.second.type != SchemaFrame::LocationType::Resource) {
continue;
}

mark_reference_origins_from(frame, references, entry);
}

mark_reference_origins_from(frame, references, entry);
}
for (const auto &entry : frame) {
if (entry.second.type != SchemaFrame::LocationType::Subschema) {
continue;
}

for (const auto &entry : frame) {
if (entry.second.type != SchemaFrame::LocationType::Subschema) {
continue;
mark_reference_origins_from(frame, references, entry);
}

mark_reference_origins_from(frame, references, entry);
}
// Calculate alternative unresolved instance locations
for (auto &entry : frame) {
traverse_origin_instance_locations(frame, entry.second, std::nullopt,
entry.second.instance_locations);
}

// Calculate alternative unresolved instance locations
for (auto &entry : frame) {
traverse_origin_instance_locations(frame, entry.second, std::nullopt,
entry.second.instance_locations);
// This is guaranteed to be top-down
for (auto &entry : subschemas) {
repopulate_instance_locations(subschemas, frame, entry.first,
entry.second, entry.first, std::nullopt);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame {
public:
/// The mode of framing. More extensive analysis can be compute and memory
/// intensive
enum class Mode { Full };
enum class Mode { References, Full };

SchemaFrame(const Mode mode) : mode_{mode} {}

Expand Down
4 changes: 2 additions & 2 deletions src/core/jsonschema/jsonschema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ auto sourcemeta::core::reference_visit(
const std::optional<std::string> &default_dialect,
const std::optional<std::string> &default_id) -> void {
sourcemeta::core::SchemaFrame frame{
sourcemeta::core::SchemaFrame::Mode::Full};
sourcemeta::core::SchemaFrame::Mode::References};
frame.analyse(schema, walker, resolver, default_dialect, default_id);
for (const auto &entry : frame.locations()) {
if (entry.second.type !=
Expand Down Expand Up @@ -646,7 +646,7 @@ auto sourcemeta::core::unidentify(
const std::optional<std::string> &default_dialect) -> void {
// (1) Re-frame before changing anything
sourcemeta::core::SchemaFrame frame{
sourcemeta::core::SchemaFrame::Mode::Full};
sourcemeta::core::SchemaFrame::Mode::References};
frame.analyse(schema, walker, resolver, default_dialect);

// (2) Remove all identifiers and anchors
Expand Down
2 changes: 1 addition & 1 deletion src/core/jsonschema/resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ auto SchemaMapResolver::add(const JSON &schema,

// Registering the top-level schema is not enough. We need to check
// and register every embedded schema resource too
SchemaFrame frame{SchemaFrame::Mode::Full};
SchemaFrame frame{SchemaFrame::Mode::References};
frame.analyse(schema, schema_official_walker, *this, default_dialect,
default_id);

Expand Down
Loading