diff --git a/benchmark/jsonschema.cc b/benchmark/jsonschema.cc index 7022b92bb..d72ede306 100644 --- a/benchmark/jsonschema.cc +++ b/benchmark/jsonschema.cc @@ -20,6 +20,20 @@ static void Schema_Frame_OMC_Full(benchmark::State &state) { } } +static void Schema_Frame_OMC_References(benchmark::State &state) { + const auto schema{ + sourcemeta::core::read_json(std::filesystem::path{CURRENT_DIRECTORY} / + "schemas" / "2019_09_omc_json_v2.json")}; + + for (auto _ : state) { + sourcemeta::core::SchemaFrame frame{ + sourcemeta::core::SchemaFrame::Mode::References}; + frame.analyse(schema, sourcemeta::core::schema_official_walker, + sourcemeta::core::schema_official_resolver); + benchmark::DoNotOptimize(frame); + } +} + static void Schema_Bundle_Meta_2020_12(benchmark::State &state) { for (auto _ : state) { state.PauseTiming(); @@ -34,4 +48,5 @@ static void Schema_Bundle_Meta_2020_12(benchmark::State &state) { } BENCHMARK(Schema_Frame_OMC_Full); +BENCHMARK(Schema_Frame_OMC_References); BENCHMARK(Schema_Bundle_Meta_2020_12); diff --git a/src/core/jsonschema/bundle.cc b/src/core/jsonschema/bundle.cc index 4ba1babb6..b21fb3992 100644 --- a/src/core/jsonschema/bundle.cc +++ b/src/core/jsonschema/bundle.cc @@ -138,7 +138,7 @@ auto bundle(sourcemeta::core::JSON &schema, const SchemaWalker &walker, const auto vocabularies{ sourcemeta::core::vocabularies(schema, resolver, default_dialect)}; sourcemeta::core::SchemaFrame frame{ - sourcemeta::core::SchemaFrame::Mode::Full}; + sourcemeta::core::SchemaFrame::Mode::References}; bundle_schema(schema, definitions_keyword(vocabularies), schema, frame, walker, resolver, default_dialect); } diff --git a/src/core/jsonschema/frame.cc b/src/core/jsonschema/frame.cc index 2afcc2d6c..703aa851f 100644 --- a/src/core/jsonschema/frame.cc +++ b/src/core/jsonschema/frame.cc @@ -294,7 +294,89 @@ struct CacheSubschema { const std::optional parent; }; -auto internal_analyse(const sourcemeta::core::SchemaFrame::Mode, +// TODO: The fact this lookup algorithm is O(N) is the main +// performance problem of framing +static auto find_subschema_by_pointer( + const sourcemeta::core::SchemaFrame::Locations &locations, + const sourcemeta::core::Pointer &pointer) + -> std::optional> { + for (const auto &location : locations) { + if (location.second.type != + sourcemeta::core::SchemaFrame::LocationType::Resource && + location.second.type != + sourcemeta::core::SchemaFrame::LocationType::Subschema) { + continue; + } + + if (location.second.pointer == pointer) { + return location.second; + } + } + + return std::nullopt; +} + +static auto repopulate_instance_locations( + const std::map &cache, + sourcemeta::core::SchemaFrame::Locations &locations, + const sourcemeta::core::Pointer &, const CacheSubschema &cache_entry, + // This is the output + const sourcemeta::core::Pointer &output, + const std::optional &accumulator) + -> void { + if (cache_entry.orphan + // TODO: Implement an .empty() method + && cache_entry.instance_location == sourcemeta::core::PointerTemplate{}) { + return; + } else if (cache_entry.parent.has_value()) { + const auto parent_location{ + find_subschema_by_pointer(locations, cache_entry.parent.value())}; + assert(parent_location.has_value()); + for (const auto &parent_instance_location : + parent_location.value().get().instance_locations) { + // Guard against overly unrolling recursive schemas + if (parent_instance_location == cache_entry.instance_location) { + continue; + } + + auto new_accumulator = cache_entry.relative_instance_location; + if (accumulator.has_value()) { + for (const auto &token : accumulator.value()) { + new_accumulator.emplace_back(token); + } + } + + auto result = parent_instance_location; + for (const auto &token : new_accumulator) { + result.emplace_back(token); + } + + // TODO: Look for the output locations once before calling this function + for (auto &location : locations) { + if (location.second.type != + sourcemeta::core::SchemaFrame::LocationType::Resource && + location.second.type != + sourcemeta::core::SchemaFrame::LocationType::Subschema) { + continue; + } + + if (location.second.pointer == output && + std::find(location.second.instance_locations.cbegin(), + location.second.instance_locations.cend(), + result) == location.second.instance_locations.cend()) { + location.second.instance_locations.push_back(result); + } + } + + repopulate_instance_locations( + cache, locations, cache_entry.parent.value(), + cache.at(cache_entry.parent.value()), output, new_accumulator); + } + } +} + +auto internal_analyse(const sourcemeta::core::SchemaFrame::Mode mode, const sourcemeta::core::JSON &schema, sourcemeta::core::SchemaFrame::Locations &frame, sourcemeta::core::SchemaFrame::References &references, @@ -780,29 +862,37 @@ auto internal_analyse(const sourcemeta::core::SchemaFrame::Mode, } } - // We only care about marking reference origins from/to resources and - // subschemas + if (mode == sourcemeta::core::SchemaFrame::Mode::Full) { + // We only care about marking reference origins from/to resources and + // subschemas - for (const auto &entry : frame) { - if (entry.second.type != SchemaFrame::LocationType::Resource) { - continue; + for (const auto &entry : frame) { + if (entry.second.type != SchemaFrame::LocationType::Resource) { + continue; + } + + mark_reference_origins_from(frame, references, entry); } - mark_reference_origins_from(frame, references, entry); - } + for (const auto &entry : frame) { + if (entry.second.type != SchemaFrame::LocationType::Subschema) { + continue; + } - for (const auto &entry : frame) { - if (entry.second.type != SchemaFrame::LocationType::Subschema) { - continue; + mark_reference_origins_from(frame, references, entry); } - mark_reference_origins_from(frame, references, entry); - } + // Calculate alternative unresolved instance locations + for (auto &entry : frame) { + traverse_origin_instance_locations(frame, entry.second, std::nullopt, + entry.second.instance_locations); + } - // Calculate alternative unresolved instance locations - for (auto &entry : frame) { - traverse_origin_instance_locations(frame, entry.second, std::nullopt, - entry.second.instance_locations); + // This is guaranteed to be top-down + for (auto &entry : subschemas) { + repopulate_instance_locations(subschemas, frame, entry.first, + entry.second, entry.first, std::nullopt); + } } } diff --git a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index 0ee6de64c..ddd81f7a2 100644 --- a/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -105,7 +105,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { public: /// The mode of framing. More extensive analysis can be compute and memory /// intensive - enum class Mode { Full }; + enum class Mode { References, Full }; SchemaFrame(const Mode mode) : mode_{mode} {} diff --git a/src/core/jsonschema/jsonschema.cc b/src/core/jsonschema/jsonschema.cc index 01161817d..147b31bda 100644 --- a/src/core/jsonschema/jsonschema.cc +++ b/src/core/jsonschema/jsonschema.cc @@ -567,7 +567,7 @@ auto sourcemeta::core::reference_visit( const std::optional &default_dialect, const std::optional &default_id) -> void { sourcemeta::core::SchemaFrame frame{ - sourcemeta::core::SchemaFrame::Mode::Full}; + sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id); for (const auto &entry : frame.locations()) { if (entry.second.type != @@ -646,7 +646,7 @@ auto sourcemeta::core::unidentify( const std::optional &default_dialect) -> void { // (1) Re-frame before changing anything sourcemeta::core::SchemaFrame frame{ - sourcemeta::core::SchemaFrame::Mode::Full}; + sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect); // (2) Remove all identifiers and anchors diff --git a/src/core/jsonschema/resolver.cc b/src/core/jsonschema/resolver.cc index 74e09d9a0..b29c97ff7 100644 --- a/src/core/jsonschema/resolver.cc +++ b/src/core/jsonschema/resolver.cc @@ -20,7 +20,7 @@ auto SchemaMapResolver::add(const JSON &schema, // Registering the top-level schema is not enough. We need to check // and register every embedded schema resource too - SchemaFrame frame{SchemaFrame::Mode::Full}; + SchemaFrame frame{SchemaFrame::Mode::References}; frame.analyse(schema, schema_official_walker, *this, default_dialect, default_id); diff --git a/test/jsonschema/jsonschema_frame_2020_12_test.cc b/test/jsonschema/jsonschema_frame_2020_12_test.cc index 29c0deef7..b4b1a41de 100644 --- a/test/jsonschema/jsonschema_frame_2020_12_test.cc +++ b/test/jsonschema/jsonschema_frame_2020_12_test.cc @@ -1925,3 +1925,205 @@ TEST(JSONSchema_frame_2020_12, properties_with_refs) { "https://www.sourcemeta.com/schema", "/properties/bar"); } + +TEST(JSONSchema_frame_2020_12, property_ref_defs) { + const sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({ + "$id": "https://www.sourcemeta.com/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "foo": { + "$ref": "#/$defs/helper" + }, + "bar": { + "$ref": "#/$defs/helper/items" + } + }, + "$defs": { + "helper": { + "items": { + "additionalProperties": { + "type": "string" + } + } + } + } + })JSON"); + + sourcemeta::core::SchemaFrame frame{ + sourcemeta::core::SchemaFrame::Mode::Full}; + frame.analyse(document, sourcemeta::core::schema_official_walker, + sourcemeta::core::schema_official_resolver); + + EXPECT_EQ(frame.locations().size(), 13); + + EXPECT_FRAME_STATIC_2020_12_RESOURCE( + frame, "https://www.sourcemeta.com/schema", + "https://www.sourcemeta.com/schema", "", + "https://www.sourcemeta.com/schema", "", {""}, 0, std::nullopt); + + // JSON Pointers + + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/$id", + "https://www.sourcemeta.com/schema", "/$id", + "https://www.sourcemeta.com/schema", "/$id", {}, 0, std::nullopt); + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/$schema", + "https://www.sourcemeta.com/schema", "/$schema", + "https://www.sourcemeta.com/schema", "/$schema", {}, 0, std::nullopt); + + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/properties", + "https://www.sourcemeta.com/schema", "/properties", + "https://www.sourcemeta.com/schema", "/properties", {}, 0, std::nullopt); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/properties/foo", + "https://www.sourcemeta.com/schema", "/properties/foo", + "https://www.sourcemeta.com/schema", "/properties/foo", {"/foo"}, 0, ""); + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/properties/foo/$ref", + "https://www.sourcemeta.com/schema", "/properties/foo/$ref", + "https://www.sourcemeta.com/schema", "/properties/foo/$ref", {}, 0, + std::nullopt); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/properties/bar", + "https://www.sourcemeta.com/schema", "/properties/bar", + "https://www.sourcemeta.com/schema", "/properties/bar", {"/bar"}, 0, ""); + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/properties/bar/$ref", + "https://www.sourcemeta.com/schema", "/properties/bar/$ref", + "https://www.sourcemeta.com/schema", "/properties/bar/$ref", {}, 0, + std::nullopt); + + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/$defs", + "https://www.sourcemeta.com/schema", "/$defs", + "https://www.sourcemeta.com/schema", "/$defs", {}, 0, std::nullopt); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/$defs/helper", + "https://www.sourcemeta.com/schema", "/$defs/helper", + "https://www.sourcemeta.com/schema", "/$defs/helper", {"/foo"}, 1, ""); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/$defs/helper/items", + "https://www.sourcemeta.com/schema", "/$defs/helper/items", + "https://www.sourcemeta.com/schema", "/$defs/helper/items", + POINTER_TEMPLATES("/bar", "/foo/~I~"), 1, "/$defs/helper"); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, + "https://www.sourcemeta.com/schema#/$defs/helper/items/" + "additionalProperties", + "https://www.sourcemeta.com/schema", + "/$defs/helper/items/additionalProperties", + "https://www.sourcemeta.com/schema", + "/$defs/helper/items/additionalProperties", + POINTER_TEMPLATES("/bar/~P~", "/foo/~I~/~P~"), 0, "/$defs/helper/items"); + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, + "https://www.sourcemeta.com/schema#/$defs/helper/items/" + "additionalProperties/type", + "https://www.sourcemeta.com/schema", + "/$defs/helper/items/additionalProperties/type", + "https://www.sourcemeta.com/schema", + "/$defs/helper/items/additionalProperties/type", {}, 0, std::nullopt); + + // References + + EXPECT_EQ(frame.references().size(), 3); + + EXPECT_STATIC_REFERENCE( + frame, "/$schema", "https://json-schema.org/draft/2020-12/schema", + "https://json-schema.org/draft/2020-12/schema", std::nullopt); + EXPECT_STATIC_REFERENCE(frame, "/properties/foo/$ref", + "https://www.sourcemeta.com/schema#/$defs/helper", + "https://www.sourcemeta.com/schema", "/$defs/helper"); + EXPECT_STATIC_REFERENCE( + frame, "/properties/bar/$ref", + "https://www.sourcemeta.com/schema#/$defs/helper/items", + "https://www.sourcemeta.com/schema", "/$defs/helper/items"); +} + +TEST(JSONSchema_frame_2020_12, property_cross_ref) { + const sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({ + "$id": "https://www.sourcemeta.com/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "foo": { + "$ref": "#/properties/bar" + }, + "bar": { + "items": { + "additionalProperties": true + } + } + } + })JSON"); + + sourcemeta::core::SchemaFrame frame{ + sourcemeta::core::SchemaFrame::Mode::Full}; + frame.analyse(document, sourcemeta::core::schema_official_walker, + sourcemeta::core::schema_official_resolver); + + EXPECT_EQ(frame.locations().size(), 9); + + EXPECT_FRAME_STATIC_2020_12_RESOURCE( + frame, "https://www.sourcemeta.com/schema", + "https://www.sourcemeta.com/schema", "", + "https://www.sourcemeta.com/schema", "", {""}, 0, std::nullopt); + + // JSON Pointers + + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/$id", + "https://www.sourcemeta.com/schema", "/$id", + "https://www.sourcemeta.com/schema", "/$id", {}, 0, std::nullopt); + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/$schema", + "https://www.sourcemeta.com/schema", "/$schema", + "https://www.sourcemeta.com/schema", "/$schema", {}, 0, std::nullopt); + + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/properties", + "https://www.sourcemeta.com/schema", "/properties", + "https://www.sourcemeta.com/schema", "/properties", {}, 0, std::nullopt); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/properties/foo", + "https://www.sourcemeta.com/schema", "/properties/foo", + "https://www.sourcemeta.com/schema", "/properties/foo", {"/foo"}, 0, ""); + EXPECT_FRAME_STATIC_2020_12_POINTER( + frame, "https://www.sourcemeta.com/schema#/properties/foo/$ref", + "https://www.sourcemeta.com/schema", "/properties/foo/$ref", + "https://www.sourcemeta.com/schema", "/properties/foo/$ref", {}, 0, + std::nullopt); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/properties/bar", + "https://www.sourcemeta.com/schema", "/properties/bar", + "https://www.sourcemeta.com/schema", "/properties/bar", + POINTER_TEMPLATES("/bar", "/foo"), 1, ""); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, "https://www.sourcemeta.com/schema#/properties/bar/items", + "https://www.sourcemeta.com/schema", "/properties/bar/items", + "https://www.sourcemeta.com/schema", "/properties/bar/items", + POINTER_TEMPLATES("/bar/~I~", "/foo/~I~"), 0, "/properties/bar"); + EXPECT_FRAME_STATIC_2020_12_SUBSCHEMA( + frame, + "https://www.sourcemeta.com/schema#/properties/bar/items/" + "additionalProperties", + "https://www.sourcemeta.com/schema", + "/properties/bar/items/additionalProperties", + "https://www.sourcemeta.com/schema", + "/properties/bar/items/additionalProperties", + POINTER_TEMPLATES("/bar/~I~/~P~", "/foo/~I~/~P~"), 0, + "/properties/bar/items"); + + // References + + EXPECT_EQ(frame.references().size(), 2); + + EXPECT_STATIC_REFERENCE( + frame, "/$schema", "https://json-schema.org/draft/2020-12/schema", + "https://json-schema.org/draft/2020-12/schema", std::nullopt); + EXPECT_STATIC_REFERENCE(frame, "/properties/foo/$ref", + "https://www.sourcemeta.com/schema#/properties/bar", + "https://www.sourcemeta.com/schema", + "/properties/bar"); +} diff --git a/test/jsonschema/referencingsuite.cc b/test/jsonschema/referencingsuite.cc index e6afdc2eb..0b67d4244 100644 --- a/test/jsonschema/referencingsuite.cc +++ b/test/jsonschema/referencingsuite.cc @@ -50,7 +50,7 @@ class ReferencingTest : public testing::Test { new_entries; for (const auto &[uri, schema] : this->registry) { sourcemeta::core::SchemaFrame frame{ - sourcemeta::core::SchemaFrame::Mode::Full}; + sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(schema.first, sourcemeta::core::schema_official_walker, sourcemeta::core::schema_official_resolver, this->dialect, uri);