diff --git a/.github/labels.yaml b/.github/labels.yaml index deb49ebc1..62389dc93 100644 --- a/.github/labels.yaml +++ b/.github/labels.yaml @@ -20,10 +20,14 @@ - vscode_extension/**/* 'P: Core': - packages/Core/**/* +'P: Csv': + - packages/Csv/**/* 'P: Examples': - packages/Examples/**/* 'P: Http': - packages/Http/**/* +'P: Parser': + - packages/Parser/**/* 'P: ProgrammingLanguageBenchmarks': - packages/ProgrammingLanguageBenchmarks/**/* 'P: Random': diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f6ba90dd6..feca1f429 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -28,7 +28,11 @@ jobs: - uses: technote-space/assign-author@v1.6.2 automerge-pr: - if: (github.event.action == 'opened' && !github.event.pull_request.draft) || github.event.action == 'ready_for_review' + if: > + github.head_ref == 'refs/heads/main' + && ( + (github.event.action == 'opened' && !github.event.pull_request.draft) + || github.event.action == 'ready_for_review') runs-on: ubuntu-latest permissions: contents: write diff --git a/compiler/frontend/src/mir_optimize/pure.rs b/compiler/frontend/src/mir_optimize/pure.rs index 3f2452992..757978eb8 100644 --- a/compiler/frontend/src/mir_optimize/pure.rs +++ b/compiler/frontend/src/mir_optimize/pure.rs @@ -340,6 +340,7 @@ impl PurenessInsights { #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct IdSet(BitVec); impl IdSet { + #[must_use] pub fn contains(&self, id: Id) -> bool { if id.to_usize() >= self.0.len() { false @@ -348,6 +349,7 @@ impl IdSet { } } + #[must_use] pub fn iter(&self) -> IdSetIter { self.into_iter() } diff --git a/packages/Core/_.candy b/packages/Core/_.candy index 69bd28a43..4803bef40 100644 --- a/packages/Core/_.candy +++ b/packages/Core/_.candy @@ -1,5 +1,5 @@ bool := use ".bool" -[check] := use ".check" +[check, checkEquals] := use ".check" [if, ifElse, loop, recursive, repeat] := use ".controlFlow" [equals] := use ".equality" fixedDecimal := use ".fixedDecimal" diff --git a/packages/Core/iterator.candy b/packages/Core/iterator.candy index 01984db51..47ecd356f 100644 --- a/packages/Core/iterator.candy +++ b/packages/Core/iterator.candy @@ -174,6 +174,16 @@ joinToText iterator := iterator | wrapSafe { needs False "`joinToText` needs an needs (text.is item) result | text.concatenate item } +joinToTextWithSeparator iterator separator := + iterator = iterator | wrapSafe { needs False "`joinToTextWithSeparator` needs an iterator." } + needs (text.is separator) + + iterator | reduceLeft { a b -> + needs (text.is a) + needs (text.is b) + a | text.concatenate separator | text.concatenate b + } + | result.unwrapOr "" ## Filtering diff --git a/packages/Core/list.candy b/packages/Core/list.candy index a08562e7a..301516bc9 100644 --- a/packages/Core/list.candy +++ b/packages/Core/list.candy @@ -12,7 +12,7 @@ is value := type.is value List length := builtins.listLength isEmpty list := needs (is list) - equals (length list) 0 + equals (list | length) 0 lastIndex list := needs (is list) @@ -55,7 +55,7 @@ prepend list item := list | insert 0 item append list item := needs (is list) - list | insert (length list) item + list | insert (list | length) item replace := builtins.listReplace update list index updater := diff --git a/packages/Csv/_.candy b/packages/Csv/_.candy new file mode 100644 index 000000000..ae2e4492a --- /dev/null +++ b/packages/Csv/_.candy @@ -0,0 +1,2 @@ +[decode] := use ".decode" +[encode] := use ".encode" diff --git a/packages/Csv/_package.candy b/packages/Csv/_package.candy new file mode 100644 index 000000000..e69de29bb diff --git a/packages/Csv/decode.candy b/packages/Csv/decode.candy new file mode 100644 index 000000000..546f02539 --- /dev/null +++ b/packages/Csv/decode.candy @@ -0,0 +1,104 @@ +[bool, checkEquals, equals, ifElse, int, list, recursive, text] = use "Core" +[cursor, parser] = use "Parser" + +decode csv := + needs (text.is csv) + # TODO(JonasWanke): Error on lines with different field counts + recursive [Lines: (,), Fields: (,), FieldStartOffset: 0, P: parser.new csv] { + recurse [lines, fields, fieldStartOffset, p] -> + Parser (Cursor [source, Offset: oldOffset]) = p + p | parser.next % + Ok [Parser: p, character] -> + Parser c = p + Cursor [Offset: newOffset] = c + character % + "," -> + recurse [ + lines, + Fields: fields | list.append (source | text.getRange fieldStartOffset oldOffset), + FieldStartOffset: newOffset, + p, + ] + " + + + " -> + fields = fields | list.append (source | text.getRange fieldStartOffset oldOffset) + lines = lines | list.append fields + # CSV files can have a trailing newline. + ifElse + (c | cursor.isAtEnd) + { + ifElse (oldOffset | equals 0) { Ok (,) } { Ok lines } + } + { + recurse [lines, Fields: (,), FieldStartOffset: newOffset, p] + } + # TODO(JonasWanke): handle quoted field + _ -> recurse [lines, fields, fieldStartOffset, p] + Error Empty -> + finalField = source | text.getRange fieldStartOffset oldOffset + Ok + ifElse + finalField | text.isEmpty | bool.lazyAnd { fields | list.isEmpty } + { lines } + { lines | list.append (fields | list.append finalField) } + } + +test = + checkEquals (decode "") (Ok (,)) + checkEquals (decode "{text.newline}") (Ok (,)) + + checkEquals (decode "aaa") (Ok (("aaa",),)) + checkEquals (decode "aaa{text.newline}") (Ok (("aaa",),)) + + checkEquals (decode " aaa ") (Ok ((" aaa ",),)) + checkEquals (decode " aaa {text.newline}") (Ok ((" aaa ",),)) + + checkEquals (decode "aaa,bbb") (Ok (("aaa", "bbb"),)) + checkEquals (decode "aaa,bbb{text.newline}") (Ok (("aaa", "bbb"),)) + checkEquals (decode "aaa,bbb{text.newline}ccc,ddd") (Ok (("aaa", "bbb"), ("ccc", "ddd"))) + checkEquals + decode "aaa,bbb{text.newline}ccc,ddd{text.newline}" + Ok (("aaa", "bbb"), ("ccc", "ddd")) + # Parser is broken, hence this verbose formatting: https://github.com/candy-lang/candy/issues/896 + checkEquals + decode " + aaa , bbb {text.newline} ccc , ddd {text.newline} + " + Ok ( + ( + " + aaa + ", + " + bbb + ", + ), + ( + " + ccc + ", + " + ddd + ", + ), + ) + +testRfcExamples = + # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 + checkEquals (decode "aaa,bbb,ccc") (Ok (("aaa", "bbb", "ccc"),)) + + checkEquals + decode "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}" + Ok (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + checkEquals + decode " + field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline} + " + Ok (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + # TODO(JonasWanke): handle quoted field + # checkEquals + # decode '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' + # Ok (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx")) + # checkEquals (decode '"aaa,"b""bb",ccc{{text.newline}}"') (Ok (("aaa", '"b"bb"', "ccc"),)) diff --git a/packages/Csv/encode.candy b/packages/Csv/encode.candy new file mode 100644 index 000000000..9b54f278d --- /dev/null +++ b/packages/Csv/encode.candy @@ -0,0 +1,62 @@ +[bool, checkEquals, equals, ifElse, iterator, list, result, text] = use "Core" + +encodeField field = + needs (text.is field) + ifElse + field | text.contains '"""' | bool.lazyOr { field | text.contains text.newline } + { + encoded = field | text.characters | iterator.fromList + | iterator.map { char -> ifElse (char | equals '"""') { '""""' } { char } } + | iterator.joinToText + '""{{encoded}}""' + } + { field } + +encodeLine line = + needs (list.is line) + needs (line | iterator.fromList | iterator.all text.is) + line | iterator.fromList | iterator.map encodeField | iterator.joinToTextWithSeparator "," + +encode lines := + needs (list.is lines) + needs (lines | iterator.fromList | iterator.all list.is) + fieldsPerLine = lines | list.first | result.map list.length | result.unwrapOr 0 + lines | iterator.fromList | iterator.map { line -> + needs (line | list.length | equals fieldsPerLine) + "{line | encodeLine}{text.newline}" + } + | iterator.joinToText + +testEncodeLine = + # TODO(JonaWanke): Add tests cases for leading/trailing whitespace when our parser is fixed, + # https://github.com/candy-lang/candy/issues/896 + checkEquals (encodeLine (,)) "" + checkEquals (encodeLine ("aaa",)) "aaa" + checkEquals (encodeLine ("aaa", "bbb")) "aaa,bbb" + +testEncodeLineWithSpecialCharacters = + checkEquals (encodeLine ('"aa"a"',)) '""aa""a""' + checkEquals (encodeLine ("aa{text.newline}a",)) '""aa{{text.newline}}a""' + +testEncode = + checkEquals (encode (,)) "" + checkEquals (encode (("aaa",),)) "aaa{text.newline}" + checkEquals (encode (("aaa", "bbb"),)) "aaa,bbb{text.newline}" + checkEquals (encode (("aaa",), ("bbb",))) "aaa{text.newline}bbb{text.newline}" + +testRfcExamples = + # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 + checkEquals (encodeLine ("aaa", "bbb", "ccc")) "aaa,bbb,ccc" + + checkEquals + encode (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}" + checkEquals + encode (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + " + field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline} + " + checkEquals + encode (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx")) + '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' + checkEquals (encode (("aaa", '"b"bb"', "ccc"),)) '"aaa,"b""bb",ccc{{text.newline}}"' diff --git a/packages/Parser/_.candy b/packages/Parser/_.candy new file mode 100644 index 000000000..f58aec955 --- /dev/null +++ b/packages/Parser/_.candy @@ -0,0 +1,2 @@ +cursor := use ".cursor" +parser := use ".parser" diff --git a/packages/Parser/_package.candy b/packages/Parser/_package.candy new file mode 100644 index 000000000..e69de29bb diff --git a/packages/Parser/cursor.candy b/packages/Parser/cursor.candy new file mode 100644 index 000000000..416dd549f --- /dev/null +++ b/packages/Parser/cursor.candy @@ -0,0 +1,25 @@ +[bool, equals, int, text] = use "Core" + +is cursor := cursor % + Cursor [source, offset] -> + text.is source | bool.lazyAnd { int.is offset } | bool.lazyAnd { int.isNonNegative offset } + | bool.lazyAnd { offset | int.isLessThanOrEqualTo (source | text.length) } + _ -> False + +newAtStart source := + needs (text.is source) + Cursor [source, Offset: 0] + +isAtEnd cursor := + needs (is cursor) + Cursor [source, offset] = cursor + offset | equals (source | text.length) + +add cursor length := + needs (is cursor) + needs (int.is length) + needs (int.isNonNegative length) + Cursor [source, offset] = cursor + offset = offset | int.add length + needs (offset | int.isLessThanOrEqualTo (source | text.length)) + Cursor [source, offset] diff --git a/packages/Parser/parser.candy b/packages/Parser/parser.candy new file mode 100644 index 000000000..d550d4d7a --- /dev/null +++ b/packages/Parser/parser.candy @@ -0,0 +1,48 @@ +[bool, equals, ifElse, int, text] = use "Core" +cursor = use "..cursor" + +is parser := parser % + Parser c -> cursor.is c + _ -> False + +new source := + needs (text.is source) + Parser (cursor.newAtStart source) + +peek parser := + needs (is parser) + Parser c = parser + ifElse (c | cursor.isAtEnd) { Error Empty } { + Cursor [source, offset] = c + Ok (source | text.getRange offset (offset | int.add 1)) + } +next parser := + needs (is parser) + Parser c = parser + ifElse (c | cursor.isAtEnd) { Error Empty } { + Cursor [source, offset] = c + c = c | cursor.add 1 + Cursor [Offset: newOffset] = c + Ok [Parser: Parser c, Character: source | text.getRange offset newOffset] + } + +matches parser expectedText := + needs (is parser) + needs (text.is expectedText) + Parser (Cursor [source, offset]) = parser + endExclusive = offset | int.add (expectedText | text.length) + endExclusive | int.isLessThanOrEqualTo (source | text.length) + | bool.lazyAnd { source | text.getRange offset endExclusive | equals expectedText } +require parser expectedText := + needs (is parser) + needs (text.is expectedText) + ifElse + parser | matches expectedText + { + Parser c = parser + Ok (Parser (c | cursor.add (expectedText | text.length))) + } + { + Parser (Cursor [offset]) = parser + Error '"Expected "{{expectedText}}" at offset {offset}."' + } diff --git a/packages/candy.code-workspace b/packages/candy.code-workspace index 82b93988c..da72918b7 100644 --- a/packages/candy.code-workspace +++ b/packages/candy.code-workspace @@ -22,8 +22,8 @@ "editor.defaultFormatter": "redhat.vscode-yaml" }, "editor.codeActionsOnSave": { - "source.fixAll": true, - "source.organizeImports": true + "source.fixAll": "explicit", + "source.organizeImports": "explicit" }, "editor.formatOnPaste": true, "editor.formatOnSave": true,