diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ee4f91..c4ec93b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm The structure and content of this file follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [1.24.0] - 2024-08-09 +### Added +- Added the `jp.PathMatch` function that compares a normalized JSONPath with a target JSONPath. +- Added `jp.MatchHandler` a TokenHandler that can be used to + build a path and data while processing a JSON document. +- Added `oj.Match` and `sen.Match` functions. + ## [1.23.0] - 2024-07-07 ### Added - New script functions can now be added with `jp.RegisterUnaryFunction()` and `jp.RegisterBinaryFunction()`. diff --git a/cmd/oj/main.go b/cmd/oj/main.go index 1ff7d45..221161a 100644 --- a/cmd/oj/main.go +++ b/cmd/oj/main.go @@ -5,7 +5,7 @@ package main import ( "flag" "fmt" - "io/ioutil" + "io" "os" "path/filepath" "sort" @@ -38,6 +38,8 @@ var ( safe = false mongo = false omit = false + dig = false + annotate = false // If true wrap extracts with an array. wrapExtract = false @@ -72,13 +74,16 @@ func init() { flag.BoolVar(&lazy, "z", lazy, "lazy mode accepts Simple Encoding Notation (quotes and commas mostly optional)") flag.BoolVar(&senOut, "sen", senOut, "output in Simple Encoding Notation") flag.BoolVar(&tab, "t", tab, "indent with tabs") + flag.BoolVar(&annotate, "annotate", annotate, "annotate dig extracts with a path comment") flag.Var(&exValue{}, "x", "extract path") flag.Var(&matchValue{}, "m", "match equation/script") flag.Var(&delValue{}, "d", "delete path") + flag.BoolVar(&dig, "dig", dig, "dig into a large document using the tokenizer") flag.BoolVar(&showVersion, "version", showVersion, "display version and exit") flag.StringVar(&planDef, "a", planDef, "assembly plan or plan file using @") flag.BoolVar(&showRoot, "r", showRoot, "print root if an assemble plan provided") - flag.StringVar(&prettyOpt, "p", prettyOpt, `pretty print with the width, depth, and align as ..`) + flag.StringVar(&prettyOpt, "p", prettyOpt, + `pretty print with the width, depth, and align as ..`) flag.BoolVar(&html, "html", html, "output colored output as HTML") flag.BoolVar(&safe, "safe", safe, "escape &, <, and > for HTML inclusion") flag.StringVar(&confFile, "f", confFile, "configuration file (see -help-config), - indicates no file") @@ -271,7 +276,7 @@ func run() (err error) { if 0 < len(planDef) { if planDef[0] != '[' { var b []byte - if b, err = ioutil.ReadFile(planDef); err != nil { + if b, err = os.ReadFile(planDef); err != nil { return err } planDef = string(b) @@ -290,7 +295,11 @@ func run() (err error) { var f *os.File for _, file := range files { if f, err = os.Open(file); err == nil { - _, err = p.ParseReader(f, write) + if dig { + err = digParse(f) + } else { + _, err = p.ParseReader(f, write) + } _ = f.Close() } if err != nil { @@ -304,7 +313,12 @@ func run() (err error) { } } if len(files) == 0 && len(input) == 0 { - if _, err = p.ParseReader(os.Stdin, write); err != nil { + if dig { + err = digParse(os.Stdin) + } else { + _, err = p.ParseReader(os.Stdin, write) + } + if err != nil { panic(err) } } @@ -317,6 +331,79 @@ func run() (err error) { return } +func digParse(r io.Reader) error { + var fn func(path jp.Expr, data any) + annotateColor := "" + + if color { + annotateColor = ojg.Gray + } + // Pick a function that satisfies omit, annotate, and senOut + // values. Determining the function before the actual calling means few + // conditional paths during the repeated calls later. + if omit { + if annotate { + if senOut { + fn = func(path jp.Expr, data any) { + if data != nil && data != "" { + fmt.Printf("%s// %s\n", annotateColor, path) + writeSEN(data) + } + } + } else { + fn = func(path jp.Expr, data any) { + if data != nil && data != "" { + fmt.Printf("%s// %s\n", annotateColor, path) + writeJSON(data) + } + } + } + } else { + if senOut { + fn = func(path jp.Expr, data any) { + if data != nil && data != "" { + writeSEN(data) + } + } + } else { + fn = func(path jp.Expr, data any) { + if data != nil && data != "" { + writeJSON(data) + } + } + } + } + } else { + if annotate { + if senOut { + fn = func(path jp.Expr, data any) { + fmt.Printf("%s// %s\n", annotateColor, path) + writeSEN(data) + } + } else { + fn = func(path jp.Expr, data any) { + fmt.Printf("%s// %s\n", annotateColor, path) + writeJSON(data) + } + } + } else { + if senOut { + fn = func(path jp.Expr, data any) { + writeSEN(data) + } + } else { + fn = func(path jp.Expr, data any) { + writeJSON(data) + } + } + } + } + if lazy { + return sen.MatchLoad(r, fn, extracts...) + } + return oj.MatchLoad(r, fn, extracts...) +} + func write(v any) bool { if conv != nil { v = conv.Convert(v) diff --git a/jp/match.go b/jp/match.go new file mode 100644 index 0000000..92ab703 --- /dev/null +++ b/jp/match.go @@ -0,0 +1,86 @@ +// Copyright (c) 2024, Peter Ohler, All rights reserved. + +package jp + +// PathMatch returns true if the provided path would match the target +// expression. The path argument is expected to be a normalized path with only +// elements of Root ($), At (@), Child (string), or Nth (int). A Filter +// fragment in the target expression will match any value in path since it +// requires data from a JSON document to be evaluated. Slice fragments always +// return true as long as the path element is an Nth. +func PathMatch(target, path Expr) bool { + if 0 < len(target) { + switch target[0].(type) { + case Root, At: + target = target[1:] + } + } + if 0 < len(path) { + switch path[0].(type) { + case Root, At: + path = path[1:] + } + } + for i, f := range target { + if len(path) == 0 { + return false + } + switch path[0].(type) { + case Child, Nth: + default: + return false + } + switch tf := f.(type) { + case Child, Nth: + if tf != path[0] { + return false + } + path = path[1:] + case Bracket: + // ignore and don't advance path + case Wildcard: + path = path[1:] + case Union: + var ok bool + for _, u := range tf { + check: + switch tu := u.(type) { + case string: + if Child(tu) == path[0] { + ok = true + break check + } + case int64: + if Nth(tu) == path[0] { + ok = true + break check + } + } + } + if !ok { + return false + } + path = path[1:] + case Slice: + if _, ok := path[0].(Nth); !ok { + return false + } + path = path[1:] + case *Filter: + // Assume a match since there is no data for comparison. + path = path[1:] + case Descent: + rest := target[i+1:] + for 0 < len(path) { + if PathMatch(rest, path) { + return true + } + path = path[1:] + } + return false + default: + return false + } + } + return true +} diff --git a/jp/match_test.go b/jp/match_test.go new file mode 100644 index 0000000..cd619b2 --- /dev/null +++ b/jp/match_test.go @@ -0,0 +1,56 @@ +// copyright (c) 2024, Peter Ohler, All rights reserved. + +package jp_test + +import ( + "testing" + + "github.com/ohler55/ojg/jp" + "github.com/ohler55/ojg/tt" +) + +type matchData struct { + target string + path string + expect bool +} + +func TestPathMatchCheck(t *testing.T) { + for i, md := range []*matchData{ + {target: "$.a", path: "a", expect: true}, + {target: "@.a", path: "a", expect: true}, + {target: "a", path: "a", expect: true}, + {target: "a", path: "$.a", expect: true}, + {target: "a", path: "@.a", expect: true}, + {target: "[1]", path: "[1]", expect: true}, + {target: "[1]", path: "[0]", expect: false}, + {target: "*", path: "[1]", expect: true}, + {target: "[*]", path: "[1]", expect: true}, + {target: "*", path: "a", expect: true}, + {target: "[1,'a']", path: "a", expect: true}, + {target: "[1,'a']", path: "[1]", expect: true}, + {target: "[1,'a']", path: "b", expect: false}, + {target: "[1,'a']", path: "[0]", expect: false}, + {target: "$.x[1,'a']", path: "x[1]", expect: true}, + {target: "..x", path: "a.b.x", expect: true}, + {target: "..x", path: "a.b.c", expect: false}, + {target: "x[1:5:2]", path: "x[2]", expect: true}, + {target: "x[1:5:2]", path: "x.y", expect: false}, + {target: "x[?@.a == 2]", path: "x[2]", expect: true}, + {target: "x.y.z", path: "x.y", expect: false}, + } { + tt.Equal(t, md.expect, jp.PathMatch(jp.MustParseString(md.target), jp.MustParseString(md.path)), + "%d: %s %s", i, md.target, md.path) + } +} + +func TestPathMatchDoubleRoot(t *testing.T) { + tt.Equal(t, false, jp.PathMatch(jp.R().R().C("a"), jp.C("a"))) + tt.Equal(t, false, jp.PathMatch(jp.A().A().C("a"), jp.C("a"))) + tt.Equal(t, false, jp.PathMatch(jp.C("a"), jp.R().R().C("a"))) + tt.Equal(t, false, jp.PathMatch(jp.C("a"), jp.A().A().C("a"))) +} + +func TestPathMatchSkipBracket(t *testing.T) { + tt.Equal(t, true, jp.PathMatch(jp.B().C("a"), jp.C("a"))) +} diff --git a/jp/matchhandler.go b/jp/matchhandler.go new file mode 100644 index 0000000..f2b176c --- /dev/null +++ b/jp/matchhandler.go @@ -0,0 +1,187 @@ +// Copyright (c) 2024, Peter Ohler, All rights reserved. + +package jp + +import ( + "encoding/json" +) + +// TargetRest is used by the MatchHandler to associate a Target and Rest of a +// match search. +type TargetRest struct { + Target Expr + // Rest is set when a Filter is included in the initializing target. Since + // Filters can only be evaluated when there is data for the evaluation a + // traget with a Filter is split with the pre-filter portion and the rest + // starting with the filter. + Rest Expr +} + +// PathHandler is a TokenHandler compatible with both the oj.TokenHandler and +// the sen.TokenHandler. Fields are public to allow derived types to access +// those fields. +type MatchHandler struct { + Targets []*TargetRest + Path Expr + Stack []any + OnData func(path Expr, data any) +} + +// NewMatchHandler creates a new MatchHandler. +func NewMatchHandler(onData func(path Expr, data any), targets ...Expr) *MatchHandler { + h := MatchHandler{ + Path: R(), + OnData: onData, + } + for _, target := range targets { + tr := TargetRest{Target: target} + for i, f := range target { + if _, ok := f.(*Filter); ok { + tr.Rest = target[i:] + tr.Target = target[:i] + break + } + } + h.Targets = append(h.Targets, &tr) + } + return &h +} + +// Null is called when a JSON null is encountered. +func (h *MatchHandler) Null() { + h.AddValue(nil) +} + +// Bool is called when a JSON true or false is encountered. +func (h *MatchHandler) Bool(v bool) { + h.AddValue(v) +} + +// Int is called when a JSON integer is encountered. +func (h *MatchHandler) Int(v int64) { + h.AddValue(v) +} + +// Float is called when a JSON decimal is encountered that fits into a +// float64. +func (h *MatchHandler) Float(v float64) { + h.AddValue(v) +} + +// Number is called when a JSON number is encountered that does not fit +// into an int64 or float64. +func (h *MatchHandler) Number(num string) { + h.AddValue(json.Number(num)) +} + +// String is called when a JSON string is encountered. +func (h *MatchHandler) String(v string) { + h.AddValue(v) +} + +// ObjectStart is called when a JSON object start '{' is encountered. +func (h *MatchHandler) ObjectStart() { + h.objArrayStart(map[string]any{}, Child("")) +} + +// ObjectEnd is called when a JSON object end '}' is encountered. +func (h *MatchHandler) ObjectEnd() { + h.objArrayEnd() +} + +// Key is called when a JSON object key is encountered. +func (h *MatchHandler) Key(k string) { + h.Path[len(h.Path)-1] = Child(k) +} + +// ArrayStart is called when a JSON array start '[' is encountered. +func (h *MatchHandler) ArrayStart() { + h.objArrayStart([]any{}, Nth(0)) +} + +// ArrayEnd is called when a JSON array end ']' is encountered. +func (h *MatchHandler) ArrayEnd() { + h.objArrayEnd() +} + +// AddValue is called when a leave value is encountered. +func (h *MatchHandler) AddValue(v any) { + if 0 < len(h.Stack) { + switch ts := h.Stack[len(h.Stack)-1].(type) { + case map[string]any: + ts[string(h.Path[len(h.Path)-1].(Child))] = v + case []any: + h.Stack[len(h.Stack)-1] = append(ts, v) + } + } else if h.pathMatch(true) { + h.OnData(h.Path, v) + } + h.incNth() +} + +func (h *MatchHandler) objArrayStart(v any, frag Frag) { + if 0 < len(h.Stack) { + switch ts := h.Stack[len(h.Stack)-1].(type) { + case map[string]any: + ts[string(h.Path[len(h.Path)-1].(Child))] = v + case []any: + h.Stack[len(h.Stack)-1] = append(ts, v) + } + h.Stack = append(h.Stack, v) + } else if h.pathMatch(false) { + h.Stack = append(h.Stack, v) + } + h.Path = append(h.Path, frag) +} + +func (h *MatchHandler) objArrayEnd() { + h.Path = h.Path[:len(h.Path)-1] + if 0 < len(h.Stack) { + if len(h.Stack) == 1 { + if v, p, ok := h.checkRest(h.Stack[0]); ok { + h.OnData(p, v) + } + } + h.Stack = h.Stack[:len(h.Stack)-1] + } + h.incNth() +} + +func (h *MatchHandler) incNth() { + if last := len(h.Path) - 1; 0 <= last { + if nth, ok := h.Path[last].(Nth); ok { + h.Path[last] = nth + 1 + } + } +} + +func (h *MatchHandler) checkRest(v any) (any, Expr, bool) { + var tr *TargetRest + for _, t := range h.Targets { + if PathMatch(t.Target, h.Path) { + tr = t + break + } + } + p := h.Path + if tr != nil && tr.Rest != nil { + locs := tr.Rest.Locate(v, 1) + if len(locs) == 0 { + return nil, p, false + } + p = append(p, locs[0]...) + v = tr.Rest.First(v) + } + return v, p, true +} + +func (h *MatchHandler) pathMatch(leaf bool) bool { + for _, tr := range h.Targets { + if PathMatch(tr.Target, h.Path) { + if !leaf || tr.Rest == nil { + return true + } + } + } + return false +} diff --git a/jp/matchhandler_test.go b/jp/matchhandler_test.go new file mode 100644 index 0000000..f81cc9c --- /dev/null +++ b/jp/matchhandler_test.go @@ -0,0 +1,86 @@ +// copyright (c) 2024, Peter Ohler, All rights reserved. + +package jp_test + +import ( + "fmt" + "testing" + + "github.com/ohler55/ojg/jp" + "github.com/ohler55/ojg/pretty" + "github.com/ohler55/ojg/sen" + "github.com/ohler55/ojg/tt" +) + +type matchHandlerData struct { + target string + src string + expect string +} + +func (md *matchHandlerData) runTest(t *testing.T, i int) { + var buf []byte + h := jp.NewMatchHandler(func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v\n", path, pretty.SEN(data)) + }, jp.MustParseString(md.target)) + err := sen.TokenizeString(md.src, h) + tt.Nil(t, err) + tt.Equal(t, md.expect, string(buf), "%d: %s - %s", i, md.target, md.src) +} + +func TestMatchHandlerRoot(t *testing.T) { + for i, md := range []*matchHandlerData{ + {target: "$", src: "123", expect: "$: 123\n"}, + {target: "$", src: "2.5", expect: "$: 2.5\n"}, + {target: "$", src: "abc", expect: "$: abc\n"}, + {target: "$", src: "null", expect: "$: null\n"}, + {target: "$", src: "true", expect: "$: true\n"}, + {target: "$", src: "123456789012345678901234567890", expect: "$: \"123456789012345678901234567890\"\n"}, + } { + md.runTest(t, i) + } +} + +func TestMatchHandlerChild(t *testing.T) { + for i, md := range []*matchHandlerData{ + {target: "$.a", src: "{a:1 b:2}", expect: "$.a: 1\n"}, + {target: "$.a.b", src: "{a:{b:1} b:2}", expect: "$.a.b: 1\n"}, + } { + md.runTest(t, i) + } +} + +func TestMatchHandlerNth(t *testing.T) { + for i, md := range []*matchHandlerData{ + {target: "$[1]", src: "[1 2 3 4]", expect: "$[1]: 2\n"}, + {target: "$[1][2]", src: "[1 [2 4 8] 3 4]", expect: "$[1][2]: 8\n"}, + } { + md.runTest(t, i) + } +} + +func TestMatchHandlerObjectChild(t *testing.T) { + for i, md := range []*matchHandlerData{ + {target: "$.a", src: "{a:{b:2}}", expect: "$.a: {b: 2}\n"}, + {target: "$.a", src: "{a:{b:{c: 2}}}", expect: "$.a: {b: {c: 2}}\n"}, + } { + md.runTest(t, i) + } +} + +func TestMatchHandlerArrayNth(t *testing.T) { + for i, md := range []*matchHandlerData{ + {target: "$[1]", src: "[1 [2 3 4] 5]", expect: "$[1]: [2 3 4]\n"}, + } { + md.runTest(t, i) + } +} + +func TestMatchHandlerFilter(t *testing.T) { + for i, md := range []*matchHandlerData{ + {target: "$[?@.x == 1]", src: "[{x:0 y:0} {x:1 y:1}]", expect: "$[1]: {x: 1 y: 1}\n"}, + {target: "$[?@.x == 2]", src: "[{x:0 y:0} {x:1 y:1}]", expect: ""}, + } { + md.runTest(t, i) + } +} diff --git a/notes b/notes index cc794be..7787f2b 100644 --- a/notes +++ b/notes @@ -1,5 +1,4 @@ - - @.foo without a comparison indicates existance - parse diff --git a/oj/match_test.go b/oj/match_test.go new file mode 100644 index 0000000..a9432a3 --- /dev/null +++ b/oj/match_test.go @@ -0,0 +1,41 @@ +// Copyright (c) 2024, Peter Ohler, All rights reserved. + +package oj_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/ohler55/ojg/jp" + "github.com/ohler55/ojg/oj" + "github.com/ohler55/ojg/pretty" + "github.com/ohler55/ojg/tt" +) + +func TestMatch(t *testing.T) { + var buf []byte + err := oj.Match([]byte(`{"a":1, "b":2}`), func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v", path, pretty.SEN(data)) + }, jp.C("a")) + tt.Nil(t, err) + tt.Equal(t, "$.a: 1", string(buf)) +} + +func TestMatchString(t *testing.T) { + var buf []byte + err := oj.MatchString(`{"a":1, "b":2}`, func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v", path, pretty.SEN(data)) + }, jp.C("a")) + tt.Nil(t, err) + tt.Equal(t, "$.a: 1", string(buf)) +} + +func TestMatchLoad(t *testing.T) { + var buf []byte + err := oj.MatchLoad(strings.NewReader(`{"a":1, "b":2}`), func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v", path, pretty.SEN(data)) + }, jp.C("a")) + tt.Nil(t, err) + tt.Equal(t, "$.a: 1", string(buf)) +} diff --git a/oj/oj.go b/oj/oj.go index 2811db4..02d8f9a 100644 --- a/oj/oj.go +++ b/oj/oj.go @@ -8,6 +8,7 @@ import ( "github.com/ohler55/ojg" "github.com/ohler55/ojg/alt" + "github.com/ohler55/ojg/jp" ) // Options is an alias for ojg.Options @@ -238,3 +239,21 @@ func pickWriter(arg any, strict bool) (wr *Writer) { } return } + +// Match parses a JSON document and calls onData when a data element that +// matches the target path is encountered. +func Match(data []byte, onData func(path jp.Expr, data any), targets ...jp.Expr) error { + return Tokenize(data, jp.NewMatchHandler(onData, targets...)) +} + +// MatchString parses a JSON document and calls onData when a data element that +// matches the target path is encountered. +func MatchString(data string, onData func(path jp.Expr, data any), targets ...jp.Expr) error { + return Tokenize([]byte(data), jp.NewMatchHandler(onData, targets...)) +} + +// MatchLoad parses a JSON document from an io.Reader and calls onData when a +// data element that matches the target path is encountered. +func MatchLoad(r io.Reader, onData func(path jp.Expr, data any), targets ...jp.Expr) error { + return TokenizeLoad(r, jp.NewMatchHandler(onData, targets...)) +} diff --git a/sen/match_test.go b/sen/match_test.go new file mode 100644 index 0000000..c612632 --- /dev/null +++ b/sen/match_test.go @@ -0,0 +1,40 @@ +// Copyright (c) 2024, Peter Ohler, All rights reserved. +package sen_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/ohler55/ojg/jp" + "github.com/ohler55/ojg/pretty" + "github.com/ohler55/ojg/sen" + "github.com/ohler55/ojg/tt" +) + +func TestMatch(t *testing.T) { + var buf []byte + err := sen.Match([]byte(`{a:1 b:2}`), func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v", path, pretty.SEN(data)) + }, jp.C("a")) + tt.Nil(t, err) + tt.Equal(t, "$.a: 1", string(buf)) +} + +func TestMatchString(t *testing.T) { + var buf []byte + err := sen.MatchString(`{a:1 b:2}`, func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v", path, pretty.SEN(data)) + }, jp.C("a")) + tt.Nil(t, err) + tt.Equal(t, "$.a: 1", string(buf)) +} + +func TestMatchLoad(t *testing.T) { + var buf []byte + err := sen.MatchLoad(strings.NewReader(`{a:1 b:2}`), func(path jp.Expr, data any) { + buf = fmt.Appendf(buf, "%s: %v", path, pretty.SEN(data)) + }, jp.C("a")) + tt.Nil(t, err) + tt.Equal(t, "$.a: 1", string(buf)) +} diff --git a/sen/sen.go b/sen/sen.go index 03b954f..4781a98 100644 --- a/sen/sen.go +++ b/sen/sen.go @@ -9,6 +9,7 @@ import ( "github.com/ohler55/ojg" "github.com/ohler55/ojg/alt" + "github.com/ohler55/ojg/jp" ) // Options is an alias for ojg.Options @@ -200,3 +201,21 @@ func pickWriter(arg any) (wr *Writer) { } return } + +// Match parses a SEN document and calls onData when a data element that +// matches the target path is encountered. +func Match(data []byte, onData func(path jp.Expr, data any), targets ...jp.Expr) error { + return Tokenize(data, jp.NewMatchHandler(onData, targets...)) +} + +// MatchString parses a JSON document and calls onData when a data element that +// matches the target path is encountered. +func MatchString(data string, onData func(path jp.Expr, data any), targets ...jp.Expr) error { + return Tokenize([]byte(data), jp.NewMatchHandler(onData, targets...)) +} + +// MatchLoad parses a JSON document from an io.Reader and calls onData when a +// data element that matches the target path is encountered. +func MatchLoad(r io.Reader, onData func(path jp.Expr, data any), targets ...jp.Expr) error { + return TokenizeLoad(r, jp.NewMatchHandler(onData, targets...)) +}