From 27ef8ece0e4148691b1dfc7d4b68be08c5384a01 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Tekuri Date: Thu, 28 Mar 2024 09:53:37 +0530 Subject: [PATCH] refactor: better url management --- src/compiler.rs | 245 +++++++++---------------- src/draft.rs | 81 ++++----- src/lib.rs | 18 +- src/root.rs | 138 +++++++------- src/roots.rs | 21 ++- src/util.rs | 473 ++++++++++++++++++++++++++++++++---------------- 6 files changed, 545 insertions(+), 431 deletions(-) diff --git a/src/compiler.rs b/src/compiler.rs index bc7fb23..0f5f594 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -186,10 +186,9 @@ impl Compiler { returns [`CompileError`] if url parsing failed. */ - pub fn add_resource(&mut self, mut url: &str, json: Value) -> Result<(), CompileError> { - (url, _) = split(url); // strip fragment if any - let url = to_url(url)?; - self.roots.loader.add_resource(url, json); + pub fn add_resource(&mut self, url: &str, json: Value) -> Result<(), CompileError> { + let uf = UrlFrag::absolute(url)?; + self.roots.loader.add_resource(uf.url, json); Ok(()) } @@ -208,11 +207,11 @@ impl Compiler { loc: &str, target: &mut Schemas, ) -> Result { - let (url, frag) = split(loc); - let url = to_url(url)?; - let loc = format!("{url}#{frag}"); + let uf = UrlFrag::absolute(loc)?; + // resolve anchor + let up = self.roots.resolve_fragment(uf)?; - let result = self.do_compile(loc, target); + let result = self.do_compile(up, target); if let Err(bug @ CompileError::Bug(_)) = &result { debug_assert!(false, "{bug}"); } @@ -221,68 +220,26 @@ impl Compiler { fn do_compile( &mut self, - loc: String, + up: UrlPtr, target: &mut Schemas, ) -> Result { - debug_assert!(loc.contains('#')); - let mut queue = Queue::new(); let mut compiled = Vec::new(); - // resolve anchor - let (url, _) = split(loc.as_str()); - let root = { - let url = Url::parse(url).map_err(|e| CompileError::LoadUrlError { - url: url.to_owned(), - src: e.into(), - })?; - self.roots.or_load(url.clone())?; - self.roots - .get(&url) - .ok_or(CompileError::Bug("or_load didn't add".into()))? - }; - let loc = root.resolve(&loc)?; - - let index = queue.enqueue_schema(target, loc); + let index = queue.enqueue_schema(target, up); if queue.schemas.is_empty() { // already got compiled return Ok(index); } while queue.schemas.len() > compiled.len() { - let loc = &queue.schemas[compiled.len()]; - let (url, frag) = split(loc); - let url = Url::parse(url).map_err(|e| CompileError::LoadUrlError { - url: url.to_owned(), - src: e.into(), - })?; - debug_assert!(!frag.is_anchor(), "ony non-achors should be in queue"); - let ptr = frag.decode().map_err(|e| CompileError::LoadUrlError { - url: url.to_string(), - src: e.into(), - })?; - - self.roots.or_load(url.clone())?; - let root = self - .roots - .get_mut(&url) - .ok_or(CompileError::Bug("or_load didn't add".into()))?; - if !root.draft.is_subschema(ptr.as_ref()) { - root.add_subschema(ptr.as_ref())?; - } - let root = self - .roots - .get(&url) - .ok_or(CompileError::Bug("or_load didn't add".into()))?; - - let v = root - .lookup_ptr(ptr.as_ref()) - .map_err(|_| CompileError::InvalidJsonPointer(loc.clone()))?; - let Some(v) = v else { - return Err(CompileError::JsonPointerNotFound(loc.to_owned())); + let up = &queue.schemas[compiled.len()]; + self.roots.ensure_subschema(up)?; + let Some(root) = self.roots.get(&up.url) else { + return Err(CompileError::Bug("or_load didn't add".into())); }; - - let sch = self.compile_value(target, v, &loc.to_owned(), root, &mut queue)?; + let v = up.lookup(&root.doc)?; + let sch = self.compile_value(target, v, &up.clone(), root, &mut queue)?; compiled.push(sch); self.roots.insert(&mut queue.roots); } @@ -295,44 +252,38 @@ impl Compiler { &self, schemas: &Schemas, v: &Value, - loc: &str, + up: &UrlPtr, root: &Root, queue: &mut Queue, ) -> Result { - let mut s = Schema::new(loc.to_owned()); + let mut s = Schema::new(up.to_string()); s.draft_version = root.draft.version; // we know it is already in queue, we just want to get its index let len = queue.schemas.len(); - s.idx = queue.enqueue_schema(schemas, loc.to_owned()); - debug_assert_eq!(queue.schemas.len(), len, "{loc} should already be in queue"); + s.idx = queue.enqueue_schema(schemas, up.to_owned()); + debug_assert_eq!(queue.schemas.len(), len, "{up} should already be in queue"); s.resource = { - let (_, frag) = split(loc); - let ptr = frag.decode().map_err(|e| CompileError::LoadUrlError { - url: loc.to_owned(), - src: e.into(), - })?; - let base = root.base_url(ptr.as_ref()); - let base_loc = root.resolve(base.as_str())?; - queue.enqueue_schema(schemas, base_loc) + let base = UrlPtr { + url: up.url.clone(), + ptr: root.resource(&up.ptr).ptr.clone(), + }; + queue.enqueue_schema(schemas, base) }; // if resource, enqueue dynamicAnchors for compilation if s.idx == s.resource && root.draft.version >= 2020 { - let (url, frag) = split(loc); - let ptr = frag.decode().map_err(|e| CompileError::LoadUrlError { - url: loc.to_owned(), - src: e.into(), - })?; - let res = root.resource(ptr.as_ref()); - for danchor in &res.dynamic_anchors { - let danchor_ptr = res.anchors.get(danchor).ok_or(CompileError::Bug( - "dynamicAnchor must be collected in resource".into(), - ))?; - let danchor_sch = queue - .enqueue_schema(schemas, format!("{}#{}", url, percent_encode(danchor_ptr))); - s.dynamic_anchors.insert(danchor.to_owned(), danchor_sch); + let res = root.resource(&up.ptr); + for (anchor, anchor_ptr) in &res.anchors { + if res.dynamic_anchors.contains(anchor) { + let up = UrlPtr { + url: up.url.clone(), + ptr: anchor_ptr.clone(), + }; + let danchor_sch = queue.enqueue_schema(schemas, up); + s.dynamic_anchors.insert(anchor.to_string(), danchor_sch); + } } } @@ -344,7 +295,7 @@ impl Compiler { ObjCompiler { c: self, obj, - loc, + up, schemas, root, queue, @@ -375,7 +326,7 @@ impl Compiler { struct ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { c: &'c Compiler, obj: &'v Map, - loc: &'l str, + up: &'l UrlPtr, schemas: &'s Schemas, root: &'r Root, queue: &'q mut Queue, @@ -432,17 +383,18 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { for pname in obj.keys() { let ecma = ecma::convert(pname).map_err(|src| CompileError::InvalidRegex { - url: format!("{}/patternProperties", self.loc), + url: self.up.format("patternProperties"), regex: pname.to_owned(), src, })?; let regex = Regex::new(ecma.as_ref()).map_err(|e| CompileError::InvalidRegex { - url: format!("{}/patternProperties", self.loc), + url: self.up.format("patternProperties"), regex: ecma.into_owned(), src: e.into(), })?; - let sch = self.enqueue_path(format!("patternProperties/{}", escape(pname))); + let ptr = self.up.ptr.append2("patternProperties", pname); + let sch = self.enqueue_schema(ptr); v.push((regex, sch)); } } @@ -457,9 +409,10 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { .filter_map(|(k, v)| { let v = match v { Value::Array(_) => Some(Dependency::Props(to_strings(v))), - _ => Some(Dependency::SchemaRef( - self.enqueue_path(format!("dependencies/{}", escape(k))), - )), + _ => { + let ptr = self.up.ptr.append2("dependencies", k); + Some(Dependency::SchemaRef(self.enqueue_schema(ptr))) + } }; v.map(|v| (k.clone(), v)) }) @@ -654,13 +607,14 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { if self.has_vocab("core") { if let Some(sch) = self.enqueue_ref("$dynamicRef")? { if let Some(Value::String(dref)) = self.value("$dynamicRef") { - let (_, frag) = split(dref); - let anchor = frag - .to_anchor() - .map_err(|_| CompileError::ParseAnchorError { - loc: format!("{}/$dynamicRef", self.loc), - })? - .map(|a| a.into_owned()); + let Ok((_, frag)) = Fragment::split(dref) else { + let loc = self.up.format("$dynamicRef"); + return Err(CompileError::ParseAnchorError { loc }); + }; + let anchor = match frag { + Fragment::Anchor(Anchor(s)) => Some(s), + Fragment::JsonPointer(_) => None, + }; s.dynamic_ref = Some(DynamicRef { sch, anchor }); } }; @@ -681,19 +635,18 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { // enqueue helpers impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { - fn enqueue_schema(&mut self, loc: String) -> SchemaIndex { - self.queue.enqueue_schema(self.schemas, loc) - } - - fn enqueue_path(&mut self, path: String) -> SchemaIndex { - let loc = format!("{}/{}", self.loc, percent_encode(&path)); - self.enqueue_schema(loc) + fn enqueue_schema(&mut self, ptr: JsonPointer) -> SchemaIndex { + let up = UrlPtr { + url: self.up.url.clone(), + ptr, + }; + self.queue.enqueue_schema(self.schemas, up) } fn enqueue_prop(&mut self, pname: &'static str) -> Option { if self.obj.contains_key(pname) { - let loc = format!("{}/{}", self.loc, percent_encode(&escape(pname))); - Some(self.enqueue_schema(loc)) + let ptr = self.up.ptr.append(pname); + Some(self.enqueue_schema(ptr)) } else { None } @@ -703,8 +656,8 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { if let Some(Value::Array(arr)) = self.obj.get(pname) { (0..arr.len()) .map(|i| { - let loc = format!("{}/{}/{i}", self.loc, percent_encode(&escape(pname))); - self.enqueue_schema(loc) + let ptr = self.up.ptr.append2(pname, &i.to_string()); + self.enqueue_schema(ptr) }) .collect() } else { @@ -720,13 +673,8 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { if let Some(Value::Object(obj)) = self.obj.get(pname) { obj.keys() .map(|k| { - let loc = format!( - "{}/{}/{}", - self.loc, - percent_encode(&escape(pname)), - percent_encode(&escape(k)) - ); - (k.clone(), self.enqueue_schema(loc)) + let ptr = self.up.ptr.append2(pname, k); + (k.clone(), self.enqueue_schema(ptr)) }) .collect() } else { @@ -738,21 +686,15 @@ impl<'c, 'v, 'l, 's, 'r, 'q> ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { let Some(Value::String(ref_)) = self.obj.get(pname) else { return Ok(None); }; - let (_, frag) = split(self.loc); - let ptr = frag.decode().map_err(|e| CompileError::LoadUrlError { - url: self.loc.to_owned(), - src: e.into(), - })?; - let abs_ref = self.root.base_url(ptr.as_ref()).join(ref_).map_err(|e| { - CompileError::ParseUrlError { - url: ref_.clone(), - src: e.into(), - } - })?; - let mut resolved_ref = self.root.resolve(abs_ref.as_str())?; - resolved_ref = self.queue.resolve_anchor(resolved_ref, &self.c.roots)?; - - Ok(Some(self.enqueue_schema(resolved_ref))) + let base_url = self.root.base_url(&self.up.ptr); + let abs_ref = UrlFrag::join(base_url, ref_)?; + if let Some(resolved_ref) = self.root.resolve(&abs_ref)? { + // local ref + return Ok(Some(self.enqueue_schema(resolved_ref.ptr))); + } + // remote ref + let up = self.queue.resolve_anchor(abs_ref, &self.c.roots)?; + Ok(Some(self.queue.enqueue_schema(self.schemas, up))) } fn enquue_additional(&mut self, pname: &'static str) -> Option { @@ -999,7 +941,7 @@ fn to_strings(v: &Value) -> Vec { } pub(crate) struct Queue { - pub(crate) schemas: Vec, + pub(crate) schemas: Vec, pub(crate) roots: HashMap, } @@ -1013,44 +955,33 @@ impl Queue { pub(crate) fn resolve_anchor( &mut self, - loc: String, + uf: UrlFrag, roots: &Roots, - ) -> Result { - let (url, frag) = split(&loc); - if frag.is_anchor() { - let url = Url::parse(url).map_err(|e| CompileError::ParseUrlError { - url: url.to_owned(), - src: e.into(), - })?; - if let Some(root) = roots.get(&url).or_else(|| self.roots.get(&url)) { - return root.resolve(&loc); + ) -> Result { + match uf.frag { + Fragment::JsonPointer(ptr) => Ok(UrlPtr { url: uf.url, ptr }), + Fragment::Anchor(_) => { + let root = match roots.get(&uf.url).or_else(|| self.roots.get(&uf.url)) { + Some(root) => root, + None => roots.enqueue_root(uf.url.clone(), &mut self.roots)?, + }; + root.resolve_fragment(&uf.frag) } - let root = roots.enqueue_root(url, &mut self.roots)?; - return root.resolve(&loc); } - Ok(loc) } - pub(crate) fn enqueue_schema(&mut self, schemas: &Schemas, mut loc: String) -> SchemaIndex { - if loc.rfind('#').is_none() { - loc.push('#'); - } - - // handle if external anchor - let (_, frag) = split(&loc); - debug_assert!(!frag.is_anchor(), "anchor {loc} should not be enqueued"); - - if let Some(sch) = schemas.get_by_loc(&loc) { + pub(crate) fn enqueue_schema(&mut self, schemas: &Schemas, up: UrlPtr) -> SchemaIndex { + if let Some(sch) = schemas.get_by_loc(&up) { // already got compiled return sch.idx; } - if let Some(qindex) = self.schemas.iter().position(|e| *e == loc) { + if let Some(qindex) = self.schemas.iter().position(|e| *e == up) { // already queued for compilation return SchemaIndex(schemas.size() + qindex); } // new compilation request - self.schemas.push(loc); + self.schemas.push(up); SchemaIndex(schemas.size() + self.schemas.len() - 1) } } diff --git a/src/draft.rs b/src/draft.rs index e94a262..987df84 100644 --- a/src/draft.rs +++ b/src/draft.rs @@ -132,8 +132,8 @@ pub(crate) struct Draft { impl Draft { pub(crate) fn from_url(url: &str) -> Option<&'static Draft> { - let (mut url, fragment) = split(url); - if !fragment.as_str().is_empty() { + let (mut url, frag) = split(url); + if !frag.is_empty() { return None; } if let Some(s) = url.strip_prefix("http://") { @@ -154,16 +154,19 @@ impl Draft { } pub(crate) fn get_schema(&self) -> Option { - let loc = match self.version { - 2020 => Some("https://json-schema.org/draft/2020-12/schema#"), - 2019 => Some("https://json-schema.org/draft/2019-09/schema#"), - 7 => Some("http://json-schema.org/draft-07/schema#"), - 6 => Some("http://json-schema.org/draft-06/schema#"), - 4 => Some("http://json-schema.org/draft-04/schema#"), - _ => None, + let url = match self.version { + 2020 => "https://json-schema.org/draft/2020-12/schema", + 2019 => "https://json-schema.org/draft/2019-09/schema", + 7 => "http://json-schema.org/draft-07/schema", + 6 => "http://json-schema.org/draft-06/schema", + 4 => "http://json-schema.org/draft-04/schema", + _ => return None, + }; + let up = UrlPtr { + url: Url::parse(url).expect(&format!("{url} should be valid url")), + ptr: "".into(), }; - loc.and_then(|loc| STD_METASCHEMAS.get_by_loc(loc)) - .map(|s| s.idx) + STD_METASCHEMAS.get_by_loc(&up).map(|s| s.idx) } fn get_id<'a>(&self, obj: &'a Map) -> Option<&'a Value> { @@ -186,7 +189,7 @@ impl Draft { pub(crate) fn collect_anchors( &self, sch: &Value, - root_ptr: &str, + root_ptr: &JsonPointer, res: &mut Resource, root_url: &Url, ) -> Result<(), CompileError> { @@ -194,7 +197,7 @@ impl Draft { return Ok(()); }; - let mut add_anchor = |anchor: String| match res.anchors.entry(anchor) { + let mut add_anchor = |anchor: Anchor| match res.anchors.entry(anchor) { Entry::Occupied(entry) => { if entry.get() == root_ptr { // anchor with same root_ptr already exists @@ -202,9 +205,9 @@ impl Draft { } return Err(CompileError::DuplicateAnchor { url: root_url.as_str().to_owned(), - anchor: entry.key().to_owned(), - ptr1: entry.get().to_owned(), - ptr2: root_ptr.to_owned(), + anchor: entry.key().to_string(), + ptr1: entry.get().to_string(), + ptr2: root_ptr.to_string(), }); } entry => { @@ -219,27 +222,25 @@ impl Draft { } // anchor is specified in id if let Some(Value::String(id)) = obj.get(self.id) { - let (_, frag) = split(id); - let Ok(anchor) = frag.to_anchor() else { - let mut url = root_url.clone(); - url.set_fragment(Some(root_ptr)); - return Err(CompileError::ParseAnchorError { loc: url.into() }); + let Ok((_, frag)) = Fragment::split(id) else { + let loc = UrlFrag::format(root_url, root_ptr.as_str()); + return Err(CompileError::ParseAnchorError { loc }); }; - if let Some(anchor) = anchor { - add_anchor(anchor.into())?; + if let Fragment::Anchor(anchor) = frag { + add_anchor(anchor)?; }; return Ok(()); } } if self.version >= 2019 { if let Some(Value::String(anchor)) = obj.get("$anchor") { - add_anchor(anchor.into())?; + add_anchor(anchor.as_str().into())?; } } if self.version >= 2020 { if let Some(Value::String(anchor)) = obj.get("$dynamicAnchor") { - add_anchor(anchor.clone())?; - res.dynamic_anchors.insert(anchor.clone()); + add_anchor(anchor.as_str().into())?; + res.dynamic_anchors.insert(anchor.as_str().into()); } } Ok(()) @@ -249,10 +250,10 @@ impl Draft { pub(crate) fn collect_resources( &self, sch: &Value, - base: &Url, // base of json - root_ptr: String, // ptr of json + base: &Url, // base of json + root_ptr: JsonPointer, // ptr of json root_url: &Url, - resources: &mut HashMap, + resources: &mut HashMap, ) -> Result<(), CompileError> { if resources.contains_key(&root_ptr) { // resources are already collected @@ -275,13 +276,11 @@ impl Draft { let mut base = base; let tmp; let res = if let Some(Value::String(id)) = id { - let (id, _) = split(id); - let Ok(id) = base.join(id) else { - let mut url = base.clone(); - url.set_fragment(Some(&root_ptr)); - return Err(CompileError::ParseIdError { loc: url.into() }); + let Ok(id) = UrlFrag::join(base, id) else { + let loc = UrlFrag::format(root_url, root_ptr.as_str()); + return Err(CompileError::ParseIdError { loc }); }; - tmp = id; + tmp = id.url; base = &tmp; Some(Resource::new(root_ptr.clone(), base.clone())) } else if root_ptr.is_empty() { @@ -314,13 +313,13 @@ impl Draft { continue; }; if pos & POS_SELF != 0 { - let ptr = format!("{root_ptr}/{kw}"); + let ptr = root_ptr.append(kw); self.collect_resources(v, base, ptr, root_url, resources)?; } if pos & POS_ITEM != 0 { if let Value::Array(arr) = v { for (i, item) in arr.iter().enumerate() { - let ptr = format!("{root_ptr}/{kw}/{i}"); + let ptr = root_ptr.append2(kw, &i.to_string()); self.collect_resources(item, base, ptr, root_url, resources)?; } } @@ -328,7 +327,7 @@ impl Draft { if pos & POS_PROP != 0 { if let Value::Object(obj) = v { for (pname, pvalue) in obj { - let ptr = format!("{root_ptr}/{kw}/{}", escape(pname)); + let ptr = root_ptr.append2(kw, pname); self.collect_resources(pvalue, base, ptr, root_url, resources)?; } } @@ -463,11 +462,11 @@ mod tests { }; let mut got = HashMap::new(); DRAFT4 - .collect_resources(&json, &url, String::new(), &url, &mut got) + .collect_resources(&json, &url, "".into(), &url, &mut got) .unwrap(); let got = got .iter() - .map(|(k, v)| (k.as_ref(), v.id.as_str())) + .map(|(k, v)| (k.as_str(), v.id.as_str())) .collect::>(); assert_eq!(got, want); } @@ -499,7 +498,7 @@ mod tests { .unwrap(); let mut resources = HashMap::new(); DRAFT2020 - .collect_resources(&json, &url, String::new(), &url, &mut resources) + .collect_resources(&json, &url, "".into(), &url, &mut resources) .unwrap(); assert!(resources.get("").unwrap().anchors.is_empty()); assert_eq!(resources.get("/$defs/s2").unwrap().anchors, { diff --git a/src/lib.rs b/src/lib.rs index ea0ca4d..29fc2ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,7 +140,7 @@ pub struct SchemaIndex(usize); #[derive(Default)] pub struct Schemas { list: Vec, - map: HashMap, // loc => schema-index + map: HashMap, // loc => schema-index } impl Schemas { @@ -148,11 +148,11 @@ impl Schemas { Self::default() } - fn insert(&mut self, locs: Vec, compiled: Vec) { - for (loc, sch) in locs.into_iter().zip(compiled.into_iter()) { + fn insert(&mut self, locs: Vec, compiled: Vec) { + for (up, sch) in locs.into_iter().zip(compiled.into_iter()) { let i = self.list.len(); self.list.push(sch); - self.map.insert(loc, i); + self.map.insert(up, i); } } @@ -160,14 +160,8 @@ impl Schemas { &self.list[idx.0] // todo: return bug } - fn get_by_loc(&self, loc: &str) -> Option<&Schema> { - let mut loc = Cow::from(loc); - if loc.rfind('#').is_none() { - let mut s = loc.into_owned(); - s.push('#'); - loc = Cow::from(s); - } - self.map.get(loc.as_ref()).and_then(|&i| self.list.get(i)) + fn get_by_loc(&self, up: &UrlPtr) -> Option<&Schema> { + self.map.get(up).and_then(|&i| self.list.get(i)) } /// Returns true if `sch_index` is generated for this instance. diff --git a/src/root.rs b/src/root.rs index ee64441..2561544 100644 --- a/src/root.rs +++ b/src/root.rs @@ -1,17 +1,13 @@ use std::collections::{HashMap, HashSet}; -use crate::{ - compiler::CompileError, - draft::*, - util::{self, *}, -}; +use crate::{compiler::CompileError, draft::*, util::*}; use serde_json::Value; use url::Url; pub(crate) struct Root { pub(crate) draft: &'static Draft, - pub(crate) resources: HashMap, // ptr => _ + pub(crate) resources: HashMap, // ptr => _ pub(crate) url: Url, pub(crate) doc: Value, pub(crate) meta_vocabs: Option>, @@ -28,50 +24,72 @@ impl Root { self.draft.default_vocabs.contains(&name) } + fn resolve_fragment_in(&self, frag: &Fragment, res: &Resource) -> Result { + let ptr = match frag { + Fragment::Anchor(anchor) => { + let Some(ptr) = res.anchors.get(anchor) else { + return Err(CompileError::AnchorNotFound { + url: self.url.to_string(), + reference: UrlFrag::format(&self.url, frag.as_str()), + }); + }; + ptr + } + Fragment::JsonPointer(ptr) => ptr, + }; + Ok(UrlPtr { + url: self.url.clone(), + ptr: ptr.clone(), + }) + } + + pub(crate) fn resolve_fragment(&self, frag: &Fragment) -> Result { + let res = self.resources.get("").ok_or(CompileError::Bug( + format!("no root resource found for {}", self.url).into(), + ))?; + self.resolve_fragment_in(frag, res) + } + // resolves `loc` to root-url#json-pointer - pub(crate) fn resolve(&self, loc: &str) -> Result { - let (url, frag) = split(loc); - - let (res_ptr, res) = { - if url == self.url.as_str() { - let res = self.resources.get("").ok_or(CompileError::Bug( - format!("no root resource found for {url}").into(), - ))?; - ("", res) + pub(crate) fn resolve(&self, loc: &UrlFrag) -> Result, CompileError> { + let res = { + if loc.url == self.url { + self.resources.get("").ok_or(CompileError::Bug( + format!("no root resource found for {}", self.url).into(), + ))? } else { // look for resource with id==url - let entry = self - .resources - .iter() - .find(|(_res_ptr, res)| res.id.as_str() == url); - - match entry { - Some((ptr, res)) => (ptr.as_str(), res), - _ => return Ok(loc.to_owned()), // external url + let res = self.resources.values().find(|res| res.id == loc.url); + match res { + Some(res) => res, + _ => return Ok(None), // external url } } }; - let anchor = frag.to_anchor().map_err(|e| CompileError::ParseUrlError { - url: loc.to_owned(), - src: e.into(), - })?; - - if let Some(anchor) = anchor { - if let Some(anchor_ptr) = res.anchors.get(anchor.as_ref()) { - Ok(format!("{}#{}", self.url, percent_encode(anchor_ptr))) - } else { - Err(CompileError::AnchorNotFound { - url: self.url.as_str().to_owned(), - reference: loc.to_owned(), - }) + let up = match &loc.frag { + Fragment::JsonPointer(ptr) => UrlPtr { + url: self.url.clone(), + ptr: res.ptr.concat(ptr), + }, + Fragment::Anchor(anchor) => { + let Some(anchor_ptr) = res.anchors.get(anchor) else { + return Err(CompileError::AnchorNotFound { + url: self.url.as_str().to_owned(), + reference: loc.to_string(), + }); + }; + UrlPtr { + url: self.url.clone(), + ptr: anchor_ptr.clone(), + } } - } else { - Ok(format!("{}#{}{}", self.url, percent_encode(res_ptr), frag)) - } + }; + Ok(Some(up)) } - pub(crate) fn resource(&self, mut ptr: &str) -> &Resource { + pub(crate) fn resource(&self, ptr: &JsonPointer) -> &Resource { + let mut ptr = ptr.as_str(); loop { if let Some(res) = self.resources.get(ptr) { return res; @@ -84,14 +102,10 @@ impl Root { self.resources.get("").expect("root resource should exist") } - pub(crate) fn base_url(&self, ptr: &str) -> &Url { + pub(crate) fn base_url(&self, ptr: &JsonPointer) -> &Url { &self.resource(ptr).id } - pub(crate) fn lookup_ptr(&self, ptr: &str) -> Result, ()> { - util::lookup_ptr(&self.doc, ptr) - } - pub(crate) fn get_reqd_vocabs(&self) -> Result>, CompileError> { if self.draft.version < 2019 { return Ok(None); @@ -123,27 +137,17 @@ impl Root { Ok(Some(vocabs)) } - pub(crate) fn add_subschema(&mut self, ptr: &str) -> Result<(), CompileError> { - let v = util::lookup_ptr(&self.doc, ptr).map_err(|_| { - CompileError::InvalidJsonPointer(format!("{}#{}", self.url, percent_encode(ptr))) - })?; - let Some(v) = v else { - let loc = format!("{}#{}", self.url, percent_encode(ptr)); - return Err(CompileError::JsonPointerNotFound(loc))?; - }; + pub(crate) fn add_subschema(&mut self, ptr: &JsonPointer) -> Result<(), CompileError> { + let v = ptr.lookup(&self.doc, &self.url)?; let base_url = self.base_url(ptr).clone(); - self.draft.collect_resources( - v, - &base_url, - ptr.to_string(), - &self.url, - &mut self.resources, - )?; + self.draft + .collect_resources(v, &base_url, ptr.clone(), &self.url, &mut self.resources)?; + + // collect anchors if !self.resources.contains_key(ptr) { let res = self.resource(ptr); - if let Some(res) = self.resources.get_mut(&res.ptr.to_string()) { - self.draft - .collect_anchors(v, ptr.as_ref(), res, &self.url)?; + if let Some(res) = self.resources.get_mut(&res.ptr.clone()) { + self.draft.collect_anchors(v, ptr, res, &self.url)?; } } Ok(()) @@ -152,14 +156,14 @@ impl Root { #[derive(Debug)] pub(crate) struct Resource { - pub(crate) ptr: String, // from root + pub(crate) ptr: JsonPointer, // from root pub(crate) id: Url, - pub(crate) anchors: HashMap, // anchor => ptr - pub(crate) dynamic_anchors: HashSet, + pub(crate) anchors: HashMap, // anchor => ptr + pub(crate) dynamic_anchors: HashSet, } impl Resource { - pub(crate) fn new(ptr: String, id: Url) -> Self { + pub(crate) fn new(ptr: JsonPointer, id: Url) -> Self { Self { ptr, id, diff --git a/src/roots.rs b/src/roots.rs index ab056b0..ba110b0 100644 --- a/src/roots.rs +++ b/src/roots.rs @@ -40,8 +40,23 @@ impl Roots { self.map.get(url) } - pub(crate) fn get_mut(&mut self, url: &Url) -> Option<&mut Root> { - self.map.get_mut(url) + pub(crate) fn resolve_fragment(&mut self, uf: UrlFrag) -> Result { + self.or_load(uf.url.clone())?; + let Some(root) = self.map.get(&uf.url) else { + return Err(CompileError::Bug("or_load didn't add".into())); + }; + root.resolve_fragment(&uf.frag) + } + + pub(crate) fn ensure_subschema(&mut self, up: &UrlPtr) -> Result<(), CompileError> { + self.or_load(up.url.clone())?; + let Some(root) = self.map.get_mut(&up.url) else { + return Err(CompileError::Bug("or_load didn't add".into())); + }; + if !root.draft.is_subschema(up.ptr.as_str()) { + root.add_subschema(&up.ptr)?; + } + Ok(()) } pub(crate) fn or_load(&mut self, url: Url) -> Result<(), CompileError> { @@ -104,7 +119,7 @@ impl Roots { let resources = { let mut m = HashMap::default(); - draft.collect_resources(&doc, &url, String::new(), &url, &mut m)?; + draft.collect_resources(&doc, &url, "".into(), &url, &mut m)?; m }; diff --git a/src/util.rs b/src/util.rs index a88e933..cbc4e8c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,4 +1,12 @@ -use std::{borrow::Cow, env, fmt::Display, hash::Hash, hash::Hasher, str::FromStr, str::Utf8Error}; +use std::{ + borrow::{Borrow, Cow}, + env, + error::Error, + fmt::Display, + hash::Hash, + hash::Hasher, + str::FromStr, +}; use ahash::AHasher; use percent_encoding::{percent_decode_str, AsciiSet, CONTROLS}; @@ -7,161 +15,349 @@ use url::Url; use crate::CompileError; -pub(crate) fn is_integer(v: &Value) -> bool { - match v { - Value::Number(n) => { - n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some() - } - _ => false, - } -} +// -- -fn starts_with_windows_drive(p: &str) -> bool { - p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\") -} +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub(crate) struct JsonPointer(pub(crate) String); -pub(crate) fn to_url(s: &str) -> Result { - debug_assert!(!s.contains('#')); +impl JsonPointer { + pub(crate) fn escape(token: &str) -> Cow { + const SPECIAL: [char; 2] = ['~', '/']; + if token.contains(SPECIAL) { + token.replace('~', "~0").replace('/', "~1").into() + } else { + token.into() + } + } - // note: windows drive letter is treated as url scheme by url parser - #[cfg(not(target_arch = "wasm32"))] - if std::env::consts::OS == "windows" && starts_with_windows_drive(s) { - return Url::from_file_path(s) - .map_err(|_| CompileError::Bug(format!("failed to convert {s} into url").into())); + pub(crate) fn unescape(mut token: &str) -> Result, ()> { + let Some(mut tilde) = token.find('~') else { + return Ok(Cow::Borrowed(token)); + }; + let mut s = String::with_capacity(token.len()); + loop { + s.push_str(&token[..tilde]); + token = &token[tilde + 1..]; + match token.chars().next() { + Some('1') => s.push('/'), + Some('0') => s.push('~'), + _ => return Err(()), + } + token = &token[1..]; + let Some(i) = token.find('~') else { + s.push_str(token); + break; + }; + tilde = i; + } + Ok(Cow::Owned(s)) } - match Url::parse(s) { - Ok(url) => Ok(url), - #[cfg(not(target_arch = "wasm32"))] - Err(url::ParseError::RelativeUrlWithoutBase) => { - use std::path::Path; - let mut path = Path::new(s); - let tmp; - if !path.is_absolute() { - tmp = env::current_dir() - .map_err(|e| CompileError::ParseUrlError { - url: s.to_owned(), - src: e.into(), - })? - .join(path); - path = tmp.as_path(); + + pub(crate) fn lookup<'a>( + &self, + mut v: &'a Value, + v_url: &Url, + ) -> Result<&'a Value, CompileError> { + for tok in self.0.split('/').skip(1) { + let Ok(tok) = Self::unescape(tok) else { + let loc = UrlFrag::format(v_url, self.as_str()); + return Err(CompileError::InvalidJsonPointer(loc)); + }; + match v { + Value::Object(obj) => { + if let Some(pvalue) = obj.get(tok.as_ref()) { + v = pvalue; + continue; + } + } + Value::Array(arr) => { + if let Ok(i) = usize::from_str(tok.as_ref()) { + if let Some(item) = arr.get(i) { + v = item; + continue; + } + }; + } + _ => {} } - Url::from_file_path(path) - .map_err(|_| CompileError::Bug(format!("failed to convert {s} into url").into())) + let loc = UrlFrag::format(v_url, self.as_str()); + return Err(CompileError::JsonPointerNotFound(loc)); } - Err(e) => Err(CompileError::ParseUrlError { - url: s.to_owned(), - src: e.into(), - }), + Ok(v) + } + + pub(crate) fn as_str(&self) -> &str { + &self.0 + } + + pub(crate) fn concat(&self, next: &Self) -> Self { + JsonPointer(format!("{}{}", self.0, next.0)) + } + + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub(crate) fn append(&self, tok: &str) -> Self { + Self(format!("{}/{}", self, Self::escape(tok))) + } + + pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self { + Self(format!( + "{}/{}/{}", + self, + Self::escape(tok1), + Self::escape(tok2) + )) } } -/// returns single-quoted string -pub(crate) fn quote(s: &T) -> String -where - T: AsRef + std::fmt::Debug + ?Sized, -{ - let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'"); - format!("'{}'", &s[1..s.len() - 1]) +impl Display for JsonPointer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } } -pub(crate) fn join_iter(iterable: T, sep: &str) -> String -where - T: IntoIterator, - T::Item: Display, -{ - iterable - .into_iter() - .map(|e| e.to_string()) - .collect::>() - .join(sep) +impl Borrow for JsonPointer { + fn borrow(&self) -> &str { + &self.0 + } } -pub(crate) fn escape(token: &str) -> Cow { - const SPECIAL: [char; 2] = ['~', '/']; - if token.contains(SPECIAL) { - token.replace('~', "~0").replace('/', "~1").into() - } else { - token.into() +impl From<&str> for JsonPointer { + fn from(value: &str) -> Self { + Self(value.into()) } } -pub(crate) fn unescape(mut token: &str) -> Result, ()> { - let Some(mut tilde) = token.find('~') else { - return Ok(Cow::Borrowed(token)); - }; - let mut s = String::with_capacity(token.len()); - loop { - s.push_str(&token[..tilde]); - token = &token[tilde + 1..]; - match token.chars().next() { - Some('1') => s.push('/'), - Some('0') => s.push('~'), - _ => return Err(()), - } - token = &token[1..]; - let Some(i) = token.find('~') else { - s.push_str(token); - break; - }; - tilde = i; +// -- + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub(crate) struct Anchor(pub(crate) String); + +impl Display for Anchor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) } - Ok(Cow::Owned(s)) } -pub(crate) fn percent_encode(frag: &str) -> String { - // https://url.spec.whatwg.org/#fragment-percent-encode-set - const FRAGMENT: &AsciiSet = &CONTROLS - .add(b'%') - .add(b' ') - .add(b'"') - .add(b'<') - .add(b'>') - .add(b'`'); - percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string() +impl Borrow for Anchor { + fn borrow(&self) -> &str { + &self.0 + } } -pub(crate) struct Fragment<'a>(&'a str); +impl From<&str> for Anchor { + fn from(value: &str) -> Self { + Self(value.into()) + } +} + +// -- +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) enum Fragment { + Anchor(Anchor), + JsonPointer(JsonPointer), +} -impl<'a> Fragment<'a> { - pub(crate) fn as_str(&self) -> &'a str { - self.0 +impl Fragment { + pub(crate) fn split(s: &str) -> Result<(&str, Fragment), Box> { + let (u, frag) = if let Some(i) = s.find('#') { + (&s[..i], &s[i + 1..]) + } else { + (s, "") + }; + + let frag = percent_decode_str(frag).decode_utf8()?.to_string(); + let frag = if frag.is_empty() || frag.starts_with('/') { + Fragment::JsonPointer(JsonPointer(frag)) + } else { + Fragment::Anchor(Anchor(frag)) + }; + Ok((u, frag)) } - fn is_json_pointer(&self) -> bool { - self.0.is_empty() - || self.0.starts_with('/') - || self.0.starts_with("%2F") - || self.0.starts_with("%2f") + pub(crate) fn encode(frag: &str) -> String { + // https://url.spec.whatwg.org/#fragment-percent-encode-set + const FRAGMENT: &AsciiSet = &CONTROLS + .add(b'%') + .add(b' ') + .add(b'"') + .add(b'<') + .add(b'>') + .add(b'`'); + percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string() } - pub(crate) fn is_anchor(&self) -> bool { - !self.is_json_pointer() + pub(crate) fn as_str(&self) -> &str { + match self { + Fragment::Anchor(s) => &s.0, + Fragment::JsonPointer(s) => &s.0, + } } +} + +// -- - pub(crate) fn decode(&self) -> Result, Utf8Error> { - return percent_decode_str(self.0).decode_utf8(); +#[derive(Clone)] +pub(crate) struct UrlFrag { + pub(crate) url: Url, + pub(crate) frag: Fragment, +} + +impl UrlFrag { + pub(crate) fn absolute(input: &str) -> Result { + let (u, frag) = Fragment::split(input).map_err(|e| CompileError::ParseUrlError { + url: input.to_string(), + src: e, + })?; + + // note: windows drive letter is treated as url scheme by url parser + #[cfg(not(target_arch = "wasm32"))] + if std::env::consts::OS == "windows" && starts_with_windows_drive(u) { + let url = Url::from_file_path(u) + .map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?; + return Ok(UrlFrag { url, frag }); + } + + match Url::parse(u) { + Ok(url) => Ok(UrlFrag { url, frag }), + #[cfg(not(target_arch = "wasm32"))] + Err(url::ParseError::RelativeUrlWithoutBase) => { + // TODO(unstable): replace with `path::absolute` once it is stabilized + use std::path::Path; + let mut path = Path::new(u); + let tmp; + if !path.is_absolute() { + tmp = env::current_dir() + .map_err(|e| CompileError::ParseUrlError { + url: u.to_owned(), + src: e.into(), + })? + .join(path); + path = tmp.as_path(); + } + + let url = Url::from_file_path(path).map_err(|_| { + CompileError::Bug(format!("failed to convert {u} into url").into()) + })?; + Ok(UrlFrag { url, frag }) + } + Err(e) => Err(CompileError::ParseUrlError { + url: u.to_owned(), + src: e.into(), + }), + } } - pub(crate) fn to_anchor(&self) -> Result>, Utf8Error> { - if self.is_json_pointer() { - Ok(None) // json-pointer + pub(crate) fn join(url: &Url, input: &str) -> Result { + let (input, frag) = Fragment::split(input).map_err(|e| CompileError::ParseUrlError { + url: input.to_string(), + src: e, + })?; + if input.is_empty() { + return Ok(UrlFrag { + url: url.clone(), + frag, + }); + } + let url = url.join(input).map_err(|e| CompileError::ParseUrlError { + url: input.to_string(), + src: e.into(), + })?; + + Ok(UrlFrag { url, frag }) + } + + pub(crate) fn format(url: &Url, frag: &str) -> String { + if frag.is_empty() { + url.to_string() } else { - Ok(Some(self.decode()?)) // anchor + format!("{}#{}", url, Fragment::encode(frag)) } } } -impl<'a> Display for Fragment<'a> { +impl Display for UrlFrag { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) + write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str())) } } -pub(crate) fn split(url: &str) -> (&str, Fragment) { +// -- + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub(crate) struct UrlPtr { + pub(crate) url: Url, + pub(crate) ptr: JsonPointer, +} + +impl UrlPtr { + pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> { + self.ptr.lookup(doc, &self.url) + } + + pub(crate) fn format(&self, tok: &str) -> String { + format!( + "{}#{}/{}", + self.url, + Fragment::encode(self.ptr.as_str()), + Fragment::encode(JsonPointer::escape(tok).as_ref()), + ) + } +} + +impl Display for UrlPtr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str())) + } +} + +// -- + +pub(crate) fn is_integer(v: &Value) -> bool { + match v { + Value::Number(n) => { + n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some() + } + _ => false, + } +} + +fn starts_with_windows_drive(p: &str) -> bool { + p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\") +} + +/// returns single-quoted string +pub(crate) fn quote(s: &T) -> String +where + T: AsRef + std::fmt::Debug + ?Sized, +{ + let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'"); + format!("'{}'", &s[1..s.len() - 1]) +} + +pub(crate) fn join_iter(iterable: T, sep: &str) -> String +where + T: IntoIterator, + T::Item: Display, +{ + iterable + .into_iter() + .map(|e| e.to_string()) + .collect::>() + .join(sep) +} + +pub(crate) fn escape(token: &str) -> Cow { + JsonPointer::escape(token) +} + +pub(crate) fn split(url: &str) -> (&str, &str) { if let Some(i) = url.find('#') { - (&url[..i], Fragment(&url[i + 1..])) + (&url[..i], &url[i + 1..]) } else { - (url, Fragment("")) + (url, "") } } @@ -208,34 +404,6 @@ pub(crate) fn equals(v1: &Value, v2: &Value) -> bool { } } -pub(crate) fn lookup_ptr<'a>(mut v: &'a Value, ptr: &str) -> Result, ()> { - debug_assert!( - ptr.is_empty() || ptr.starts_with('/'), - "lookup_ptr: {ptr} is not json-pointer" - ); - for tok in ptr.split('/').skip(1) { - let tok = unescape(tok)?; - match v { - Value::Object(obj) => { - if let Some(pvalue) = obj.get(tok.as_ref()) { - v = pvalue; - continue; - } - } - Value::Array(arr) => { - if let Ok(i) = usize::from_str(tok.as_ref()) { - if let Some(item) = arr.get(i) { - v = item; - continue; - } - }; - } - _ => {} - } - return Ok(None); - } - Ok(Some(v)) -} // HashedValue -- // Based on implementation proposed by Sven Marnach: @@ -292,13 +460,16 @@ mod tests { #[test] fn test_fragment_to_anchor() { - assert_eq!(Fragment("").to_anchor(), Ok(None)); - assert_eq!(Fragment("/a/b").to_anchor(), Ok(None)); - assert_eq!(Fragment("abcd").to_anchor(), Ok(Some(Cow::from("abcd")))); - assert_eq!( - Fragment("%61%62%63%64").to_anchor(), - Ok(Some(Cow::from("abcd"))) - ); + let tests = [ + ("#", Fragment::JsonPointer("".into())), + ("#/a/b", Fragment::JsonPointer("/a/b".into())), + ("#abcd", Fragment::Anchor("abcd".into())), + ("#%61%62%63%64", Fragment::Anchor("abcd".into())), + ]; + for test in tests { + let (_, got) = Fragment::split(test.0).unwrap(); + assert_eq!(got, test.1, "Fragment::split({:?})", test.0); + } } #[test] @@ -311,7 +482,7 @@ mod tests { ("bar~~", None), ]; for (tok, want) in tests { - let res = unescape(tok).ok(); + let res = JsonPointer::unescape(tok).ok(); let got = res.as_ref().map(|c| c.as_ref()); assert_eq!(got, want, "unescape({:?})", tok) }