From 4ccfb3024c8493aebc5569cd64e2b525fc6914b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Wed, 31 Jul 2024 22:43:39 +0100 Subject: [PATCH] recover parser errors squashed --- cmd/shfmt/main.go | 4 ++ syntax/nodes.go | 7 +++ syntax/parser.go | 45 +++++++++++++++++- syntax/parser_test.go | 106 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 2 deletions(-) diff --git a/cmd/shfmt/main.go b/cmd/shfmt/main.go index 78a6b35c..f5aaa293 100644 --- a/cmd/shfmt/main.go +++ b/cmd/shfmt/main.go @@ -57,6 +57,8 @@ var ( toJSON = &multiFlag[bool]{"tojson", "to-json", false} // TODO(v4): remove "tojson" for consistency fromJSON = &multiFlag[bool]{"", "from-json", false} + expRecover = &multiFlag[int]{"", "exp.recover", 0} + // useEditorConfig will be false if any parser or printer flags were used. useEditorConfig = true @@ -198,6 +200,8 @@ For more information, see 'man shfmt' and https://github.com/mvdan/sh. parser = syntax.NewParser(syntax.KeepComments(true)) printer = syntax.NewPrinter(syntax.Minify(minify.val)) + syntax.RecoverErrors(expRecover.val)(parser) + if !useEditorConfig { if posix.val { // -p equals -ln=posix diff --git a/syntax/nodes.go b/syntax/nodes.go index 7f0c9aa5..0630ac29 100644 --- a/syntax/nodes.go +++ b/syntax/nodes.go @@ -151,11 +151,18 @@ func (p Pos) String() string { // will only be valid if a statement contained a closing token such as ';'. func (p Pos) IsValid() bool { return p != Pos{} } +const recoveredOffs = math.MaxUint32 + +func (p Pos) IsRecovered() bool { return p == Pos{offs: recoveredOffs} } + // After reports whether the position p is after p2. It is a more expressive // version of p.Offset() > p2.Offset(). func (p Pos) After(p2 Pos) bool { return p.offs > p2.offs } func posAddCol(p Pos, n int) Pos { + if !p.IsValid() || p.IsRecovered() { + return p + } // TODO: guard against overflows p.lineCol += uint32(n) p.offs += uint32(n) diff --git a/syntax/parser.go b/syntax/parser.go index 59a45b18..76493311 100644 --- a/syntax/parser.go +++ b/syntax/parser.go @@ -144,6 +144,14 @@ func StopAt(word string) ParserOption { return func(p *Parser) { p.stopAt = []byte(word) } } +// RecoverErrors allows the parser to allow skipping up to a maximum number of +// errors in the given input. +// +// Currently, this only implies inserting +func RecoverErrors(maximum int) ParserOption { + return func(p *Parser) { p.recoverErrorsMax = maximum } +} + // NewParser allocates a new Parser and applies any number of options. func NewParser(options ...ParserOption) *Parser { p := &Parser{} @@ -365,6 +373,9 @@ type Parser struct { stopAt []byte + recoveredErrors int + recoverErrorsMax int + forbidNested bool // list of pending heredoc bodies @@ -423,6 +434,7 @@ func (p *Parser) reset() { p.err, p.readErr = nil, nil p.quote, p.forbidNested = noState, false p.openStmts = 0 + p.recoveredErrors = 0 p.heredocs, p.buriedHdocs = p.heredocs[:0], 0 p.hdocStops = nil p.parsingDoc = false @@ -653,6 +665,14 @@ func (p *Parser) gotRsrv(val string) (Pos, bool) { return pos, false } +func (p *Parser) recoverError() bool { + if p.recoveredErrors < p.recoverErrorsMax { + p.recoveredErrors++ + return true + } + return false +} + func readableStr(s string) string { // don't quote tokens like & or } if s != "" && s[0] >= 'a' && s[0] <= 'z' { @@ -679,6 +699,9 @@ func (p *Parser) follow(lpos Pos, left string, tok token) { func (p *Parser) followRsrv(lpos Pos, left, val string) Pos { pos, ok := p.gotRsrv(val) if !ok { + // if p.recoverError() { + // return Pos{offs: recoveredOffs} + // } p.followErr(lpos, left, fmt.Sprintf("%q", val)) } return pos @@ -707,6 +730,9 @@ func (p *Parser) followWordTok(tok token, pos Pos) *Word { func (p *Parser) stmtEnd(n Node, start, end string) Pos { pos, ok := p.gotRsrv(end) if !ok { + if p.recoverError() { + return Pos{offs: recoveredOffs} + } p.posErr(n.Pos(), "%s statement must end with %q", start, end) } return pos @@ -725,6 +751,9 @@ func (p *Parser) matchingErr(lpos Pos, left, right any) { func (p *Parser) matched(lpos Pos, left, right token) Pos { pos := p.pos if !p.got(right) { + if p.recoverError() { + return Pos{offs: recoveredOffs} + } p.matchingErr(lpos, left, right) } return pos @@ -1111,6 +1140,10 @@ func (p *Parser) wordPart() WordPart { p.litBs = append(p.litBs, '\\', '\n') case utf8.RuneSelf: p.tok = _EOF + if p.recoverError() { + sq.Right = Pos{offs: recoveredOffs} + return sq + } p.quoteErr(sq.Pos(), sglQuote) return nil } @@ -1148,7 +1181,11 @@ func (p *Parser) wordPart() WordPart { // Like above, the lexer didn't call p.rune for us. p.rune() if !p.got(bckQuote) { - p.quoteErr(cs.Pos(), bckQuote) + if p.recoverError() { + cs.Right = Pos{offs: recoveredOffs} + } else { + p.quoteErr(cs.Pos(), bckQuote) + } } return cs case globQuest, globStar, globPlus, globAt, globExcl: @@ -1198,7 +1235,11 @@ func (p *Parser) dblQuoted() *DblQuoted { p.quote = old q.Right = p.pos if !p.got(dblQuote) { - p.quoteErr(q.Pos(), dblQuote) + if p.recoverError() { + q.Right = Pos{offs: recoveredOffs} + } else { + p.quoteErr(q.Pos(), dblQuote) + } } return q } diff --git a/syntax/parser_test.go b/syntax/parser_test.go index ecfe6c97..6cd097a1 100644 --- a/syntax/parser_test.go +++ b/syntax/parser_test.go @@ -10,6 +10,7 @@ import ( "io" "os" "os/exec" + "reflect" "regexp" "strings" "sync" @@ -2512,3 +2513,108 @@ func TestBackquotesPos(t *testing.T) { qt.Assert(t, qt.Equals(lit.ValuePos.String(), "1:2")) qt.Assert(t, qt.Equals(lit.ValueEnd.String(), "1:7")) } + +func TestParseRecoverErrors(t *testing.T) { + t.Parallel() + + tests := []struct { + src string + + wantErr bool + wantRecoveredPos int + }{ + {src: "foo;"}, + {src: "foo"}, + { + src: "'incomp", + wantRecoveredPos: 1, + }, + { + src: "foo; 'incomp", + wantRecoveredPos: 1, + }, + { + src: "(incomp", + wantRecoveredPos: 1, + }, + { + src: "(incomp; foo", + wantRecoveredPos: 1, + }, + { + src: "$(incomp", + wantRecoveredPos: 1, + }, + // { + // src: "((incomp", + // wantRecoveredPos: 1, + // }, + { + src: "if foo; then bar", + wantRecoveredPos: 1, + }, + { + src: `"incomp`, + wantRecoveredPos: 1, + }, + { + src: "`incomp", + wantRecoveredPos: 1, + }, + // { + // src: "incomp >", + // wantRecoveredPos: 1, + // }, + { + src: "badsyntax)", + wantErr: true, + }, + } + p := NewParser(RecoverErrors(3)) + for _, tc := range tests { + t.Run("", func(t *testing.T) { + r := strings.NewReader(tc.src) + f, err := p.Parse(r, "") + if tc.wantErr && err == nil { + t.Fatalf("Expected error in %q with RecoverErrors(3), found none", tc.src) + } else if !tc.wantErr && err != nil { + t.Fatalf("Unexpected error in %q with RecoverErrors(3): %v", tc.src, err) + } + gotRecoveredPos := countRecoveredPositions(reflect.ValueOf(f)) + if gotRecoveredPos != tc.wantRecoveredPos { + t.Fatalf("want %d recovered positions in %q, got %d", tc.wantRecoveredPos, tc.src, gotRecoveredPos) + } + + }) + } +} + +func countRecoveredPositions(x reflect.Value) int { + switch x.Kind() { + case reflect.Interface: + return countRecoveredPositions(x.Elem()) + case reflect.Ptr: + if !x.IsNil() { + return countRecoveredPositions(x.Elem()) + } + case reflect.Slice: + n := 0 + for i := 0; i < x.Len(); i++ { + n += countRecoveredPositions(x.Index(i)) + } + return n + case reflect.Struct: + if pos, ok := x.Interface().(Pos); ok { + if pos.IsRecovered() { + return 1 + } + return 0 + } + n := 0 + for i := 0; i < x.NumField(); i++ { + n += countRecoveredPositions(x.Field(i)) + } + return n + } + return 0 +}