From e102be835f08b328bc63c5d33cdecb563cf8a29a Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 14 Dec 2023 18:50:10 +0100 Subject: [PATCH 01/13] Export checkEquals --- packages/Core/_.candy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/Core/_.candy b/packages/Core/_.candy index ad7a60ac0..5719f5d21 100644 --- a/packages/Core/_.candy +++ b/packages/Core/_.candy @@ -1,5 +1,5 @@ bool := use ".bool" -[check] := use ".check" +[check, checkEquals] := use ".check" [if, ifElse, loop, recursive, repeat] := use ".controlFlow" [equals] := use ".equality" fixedDecimal := use ".fixedDecimal" From 09cd664c9ecdfa437574c086c8b2a7d95ab054bc Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 14 Dec 2023 18:50:23 +0100 Subject: [PATCH 02/13] Add iterable.joinToTextWithSeparator --- packages/Core/iterable.candy | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/Core/iterable.candy b/packages/Core/iterable.candy index 63bca3d01..122185672 100644 --- a/packages/Core/iterable.candy +++ b/packages/Core/iterable.candy @@ -142,6 +142,16 @@ joinToText iterable := needs (text.is item) result | text.concatenate item } +joinToTextWithSeparator iterable separator := + needs (is iterable) + needs (text.is separator) + + iterable | reduceLeft { a b -> + needs (text.is a) + needs (text.is b) + a | text.concatenate separator | text.concatenate b + } + | result.unwrapOr "" ## Filtering From 6d08d349a76e84b8881775fe0238a53f2855e8c1 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 14 Dec 2023 18:50:36 +0100 Subject: [PATCH 03/13] Add CSV encoder --- packages/Csv/_.candy | 1 + packages/Csv/_package.candy | 0 packages/Csv/encode.candy | 47 +++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 packages/Csv/_.candy create mode 100644 packages/Csv/_package.candy create mode 100644 packages/Csv/encode.candy diff --git a/packages/Csv/_.candy b/packages/Csv/_.candy new file mode 100644 index 000000000..f3d73bac6 --- /dev/null +++ b/packages/Csv/_.candy @@ -0,0 +1 @@ +[encode] := use ".encode" diff --git a/packages/Csv/_package.candy b/packages/Csv/_package.candy new file mode 100644 index 000000000..e69de29bb diff --git a/packages/Csv/encode.candy b/packages/Csv/encode.candy new file mode 100644 index 000000000..3f3539664 --- /dev/null +++ b/packages/Csv/encode.candy @@ -0,0 +1,47 @@ +[bool, checkEquals, equals, ifElse, iterable, list, result, text] = use "Core" + +encodeField field = + needs (text.is field) + ifElse + field | text.contains '"""' | bool.lazyOr { field | text.contains text.newline } + { + encoded = field | text.characters | iterable.fromList + | iterable.map { char -> ifElse (char | equals '"""') { '""""' } { char } } + | iterable.joinToText + '""{{encoded}}""' + } + { field } + +encodeLine line = + needs (list.is line) + needs (line | iterable.fromList | iterable.all text.is) + line | iterable.fromList | iterable.map encodeField | iterable.joinToTextWithSeparator "," + +encode lines := + needs (list.is lines) + needs (lines | iterable.fromList | iterable.all list.is) + fieldsPerLine = lines | list.first | result.map list.length | result.unwrapOr 0 + lines | iterable.fromList | iterable.map { line -> + needs (line | list.length | equals fieldsPerLine) + "{line | encodeLine}{text.newline}" + } + | iterable.joinToText + +test = + # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 + checkEquals (encodeLine ("aaa", "bbb", "ccc")) "aaa,bbb,ccc" + + checkEquals + encode (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}" + checkEquals + encode (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + " + field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline} + " + checkEquals + encode (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx")) + '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' + checkEquals (encode (("aaa", '"b"bb"', "ccc"),)) '"aaa,"b""bb",ccc{{text.newline}}"' + +main environment := "Abc" \ No newline at end of file From 4c99667e6567fbc914fc363c52696f85eb45e484 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 18:23:37 +0100 Subject: [PATCH 04/13] Add iterator.joinToTextWithSeparator --- packages/Core/iterator.candy | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/Core/iterator.candy b/packages/Core/iterator.candy index 01984db51..47ecd356f 100644 --- a/packages/Core/iterator.candy +++ b/packages/Core/iterator.candy @@ -174,6 +174,16 @@ joinToText iterator := iterator | wrapSafe { needs False "`joinToText` needs an needs (text.is item) result | text.concatenate item } +joinToTextWithSeparator iterator separator := + iterator = iterator | wrapSafe { needs False "`joinToTextWithSeparator` needs an iterator." } + needs (text.is separator) + + iterator | reduceLeft { a b -> + needs (text.is a) + needs (text.is b) + a | text.concatenate separator | text.concatenate b + } + | result.unwrapOr "" ## Filtering From 34aa7b384bf8284858ad53b9deee37c2fae5310d Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 18:37:42 +0100 Subject: [PATCH 05/13] Update CSV encoder to iterator (vs. iterable) --- packages/Csv/encode.candy | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/Csv/encode.candy b/packages/Csv/encode.candy index 3f3539664..1f0bfb809 100644 --- a/packages/Csv/encode.candy +++ b/packages/Csv/encode.candy @@ -1,31 +1,31 @@ -[bool, checkEquals, equals, ifElse, iterable, list, result, text] = use "Core" +[bool, checkEquals, equals, ifElse, iterator, list, result, text] = use "Core" encodeField field = needs (text.is field) ifElse field | text.contains '"""' | bool.lazyOr { field | text.contains text.newline } { - encoded = field | text.characters | iterable.fromList - | iterable.map { char -> ifElse (char | equals '"""') { '""""' } { char } } - | iterable.joinToText + encoded = field | text.characters | iterator.fromList + | iterator.map { char -> ifElse (char | equals '"""') { '""""' } { char } } + | iterator.joinToText '""{{encoded}}""' } { field } encodeLine line = needs (list.is line) - needs (line | iterable.fromList | iterable.all text.is) - line | iterable.fromList | iterable.map encodeField | iterable.joinToTextWithSeparator "," + needs (line | iterator.fromList | iterator.all text.is) + line | iterator.fromList | iterator.map encodeField | iterator.joinToTextWithSeparator "," encode lines := needs (list.is lines) - needs (lines | iterable.fromList | iterable.all list.is) + needs (lines | iterator.fromList | iterator.all list.is) fieldsPerLine = lines | list.first | result.map list.length | result.unwrapOr 0 - lines | iterable.fromList | iterable.map { line -> + lines | iterator.fromList | iterator.map { line -> needs (line | list.length | equals fieldsPerLine) "{line | encodeLine}{text.newline}" } - | iterable.joinToText + | iterator.joinToText test = # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 From 734b4b5b2a88388a61d84de36f6d813f44b6e3c5 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 18:37:56 +0100 Subject: [PATCH 06/13] Remove CSV encoder's main function --- packages/Csv/encode.candy | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/Csv/encode.candy b/packages/Csv/encode.candy index 1f0bfb809..9b418caf3 100644 --- a/packages/Csv/encode.candy +++ b/packages/Csv/encode.candy @@ -43,5 +43,3 @@ test = encode (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx")) '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' checkEquals (encode (("aaa", '"b"bb"', "ccc"),)) '"aaa,"b""bb",ccc{{text.newline}}"' - -main environment := "Abc" \ No newline at end of file From 3d8a2aaecdd80e359b562ed35b9706cdd8a9e01d Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 18:44:38 +0100 Subject: [PATCH 07/13] Set CSV package label automatically --- .github/labels.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/labels.yaml b/.github/labels.yaml index deb49ebc1..598d43f32 100644 --- a/.github/labels.yaml +++ b/.github/labels.yaml @@ -20,6 +20,8 @@ - vscode_extension/**/* 'P: Core': - packages/Core/**/* +'P: Csv': + - packages/Csv/**/* 'P: Examples': - packages/Examples/**/* 'P: Http': From 90113459f2b8eaee1c02beb6be72fa5eb94062a9 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 22:10:46 +0100 Subject: [PATCH 08/13] Add Parser package --- .github/labels.yaml | 2 ++ packages/Parser/_.candy | 2 ++ packages/Parser/_package.candy | 0 packages/Parser/cursor.candy | 25 ++++++++++++++++++ packages/Parser/parser.candy | 48 ++++++++++++++++++++++++++++++++++ 5 files changed, 77 insertions(+) create mode 100644 packages/Parser/_.candy create mode 100644 packages/Parser/_package.candy create mode 100644 packages/Parser/cursor.candy create mode 100644 packages/Parser/parser.candy diff --git a/.github/labels.yaml b/.github/labels.yaml index 598d43f32..62389dc93 100644 --- a/.github/labels.yaml +++ b/.github/labels.yaml @@ -26,6 +26,8 @@ - packages/Examples/**/* 'P: Http': - packages/Http/**/* +'P: Parser': + - packages/Parser/**/* 'P: ProgrammingLanguageBenchmarks': - packages/ProgrammingLanguageBenchmarks/**/* 'P: Random': diff --git a/packages/Parser/_.candy b/packages/Parser/_.candy new file mode 100644 index 000000000..f58aec955 --- /dev/null +++ b/packages/Parser/_.candy @@ -0,0 +1,2 @@ +cursor := use ".cursor" +parser := use ".parser" diff --git a/packages/Parser/_package.candy b/packages/Parser/_package.candy new file mode 100644 index 000000000..e69de29bb diff --git a/packages/Parser/cursor.candy b/packages/Parser/cursor.candy new file mode 100644 index 000000000..416dd549f --- /dev/null +++ b/packages/Parser/cursor.candy @@ -0,0 +1,25 @@ +[bool, equals, int, text] = use "Core" + +is cursor := cursor % + Cursor [source, offset] -> + text.is source | bool.lazyAnd { int.is offset } | bool.lazyAnd { int.isNonNegative offset } + | bool.lazyAnd { offset | int.isLessThanOrEqualTo (source | text.length) } + _ -> False + +newAtStart source := + needs (text.is source) + Cursor [source, Offset: 0] + +isAtEnd cursor := + needs (is cursor) + Cursor [source, offset] = cursor + offset | equals (source | text.length) + +add cursor length := + needs (is cursor) + needs (int.is length) + needs (int.isNonNegative length) + Cursor [source, offset] = cursor + offset = offset | int.add length + needs (offset | int.isLessThanOrEqualTo (source | text.length)) + Cursor [source, offset] diff --git a/packages/Parser/parser.candy b/packages/Parser/parser.candy new file mode 100644 index 000000000..d550d4d7a --- /dev/null +++ b/packages/Parser/parser.candy @@ -0,0 +1,48 @@ +[bool, equals, ifElse, int, text] = use "Core" +cursor = use "..cursor" + +is parser := parser % + Parser c -> cursor.is c + _ -> False + +new source := + needs (text.is source) + Parser (cursor.newAtStart source) + +peek parser := + needs (is parser) + Parser c = parser + ifElse (c | cursor.isAtEnd) { Error Empty } { + Cursor [source, offset] = c + Ok (source | text.getRange offset (offset | int.add 1)) + } +next parser := + needs (is parser) + Parser c = parser + ifElse (c | cursor.isAtEnd) { Error Empty } { + Cursor [source, offset] = c + c = c | cursor.add 1 + Cursor [Offset: newOffset] = c + Ok [Parser: Parser c, Character: source | text.getRange offset newOffset] + } + +matches parser expectedText := + needs (is parser) + needs (text.is expectedText) + Parser (Cursor [source, offset]) = parser + endExclusive = offset | int.add (expectedText | text.length) + endExclusive | int.isLessThanOrEqualTo (source | text.length) + | bool.lazyAnd { source | text.getRange offset endExclusive | equals expectedText } +require parser expectedText := + needs (is parser) + needs (text.is expectedText) + ifElse + parser | matches expectedText + { + Parser c = parser + Ok (Parser (c | cursor.add (expectedText | text.length))) + } + { + Parser (Cursor [offset]) = parser + Error '"Expected "{{expectedText}}" at offset {offset}."' + } From a042ecded9ce31a93e93c29f7d43655d9f70e2de Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 22:11:09 +0100 Subject: [PATCH 09/13] Add CSV decoder --- packages/Core/list.candy | 4 +-- packages/Csv/_.candy | 1 + packages/Csv/decode.candy | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 packages/Csv/decode.candy diff --git a/packages/Core/list.candy b/packages/Core/list.candy index a08562e7a..301516bc9 100644 --- a/packages/Core/list.candy +++ b/packages/Core/list.candy @@ -12,7 +12,7 @@ is value := type.is value List length := builtins.listLength isEmpty list := needs (is list) - equals (length list) 0 + equals (list | length) 0 lastIndex list := needs (is list) @@ -55,7 +55,7 @@ prepend list item := list | insert 0 item append list item := needs (is list) - list | insert (length list) item + list | insert (list | length) item replace := builtins.listReplace update list index updater := diff --git a/packages/Csv/_.candy b/packages/Csv/_.candy index f3d73bac6..ae2e4492a 100644 --- a/packages/Csv/_.candy +++ b/packages/Csv/_.candy @@ -1 +1,2 @@ +[decode] := use ".decode" [encode] := use ".encode" diff --git a/packages/Csv/decode.candy b/packages/Csv/decode.candy new file mode 100644 index 000000000..378be905c --- /dev/null +++ b/packages/Csv/decode.candy @@ -0,0 +1,55 @@ +[bool, checkEquals, ifElse, int, list, recursive, text] = use "Core" +[parser] = use "Parser" + +decodeFile csv := + needs (text.is csv) + recursive [Lines: (,), Fields: (,), FieldStartOffset: 0, P: parser.new csv] { + recurse [lines, fields, fieldStartOffset, p] -> + Parser (Cursor [source, Offset: oldOffset]) = p + p | parser.next % + Ok [Parser: p, character] -> + Parser (Cursor [Offset: newOffset]) = p + character % + "," -> + recurse [ + lines, + Fields: fields | list.append (source | text.getRange fieldStartOffset oldOffset), + FieldStartOffset: newOffset, + p, + ] + " + + + " -> + fields = fields | list.append (source | text.getRange fieldStartOffset oldOffset) + recurse [Lines: lines | list.append fields, Fields: (,), FieldStartOffset: newOffset, p] + # TODO(JonasWanke): handle quoted field + _ -> recurse [lines, fields, fieldStartOffset, p] + Error Empty -> + finalField = source | text.getRange fieldStartOffset oldOffset + Ok + ifElse + finalField | text.isEmpty | bool.lazyAnd { fields | list.isEmpty } + { lines } + { lines | list.append (fields | list.append finalField) } + } + +test = + # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 + checkEquals + decodeFile "aaa,bbb,ccc" + Ok (("aaa", "bbb", "ccc"),) + + checkEquals + decodeFile "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}" + Ok (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + checkEquals + decodeFile " + field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline} + " + Ok (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) + # TODO(JonasWanke): handle quoted field + # checkEquals + # decodeFile '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' + # Ok (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx")) + # checkEquals (decodeFile '"aaa,"b""bb",ccc{{text.newline}}"') (Ok (("aaa", '"b"bb"', "ccc"),)) From 3721b20c0af0684c5e18e9e746f61d544b543566 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 22:34:00 +0100 Subject: [PATCH 10/13] Enable automerge only for PRs to main --- .github/workflows/pr.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f6ba90dd6..feca1f429 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -28,7 +28,11 @@ jobs: - uses: technote-space/assign-author@v1.6.2 automerge-pr: - if: (github.event.action == 'opened' && !github.event.pull_request.draft) || github.event.action == 'ready_for_review' + if: > + github.head_ref == 'refs/heads/main' + && ( + (github.event.action == 'opened' && !github.event.pull_request.draft) + || github.event.action == 'ready_for_review') runs-on: ubuntu-latest permissions: contents: write From 382ee108008749d26b28256a4985e5ea0edab769 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 22:40:42 +0100 Subject: [PATCH 11/13] Fix compiler errors after merge --- compiler/frontend/src/mir_optimize/call_tracing.rs | 2 +- compiler/frontend/src/mir_optimize/pure.rs | 12 +++++++----- packages/candy.code-workspace | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/compiler/frontend/src/mir_optimize/call_tracing.rs b/compiler/frontend/src/mir_optimize/call_tracing.rs index 2803265c3..d01b47c25 100644 --- a/compiler/frontend/src/mir_optimize/call_tracing.rs +++ b/compiler/frontend/src/mir_optimize/call_tracing.rs @@ -29,7 +29,7 @@ pub fn remove_unnecessary_call_tracing( } } _ => { - if !pureness.pure_definitions().contains(id) { + if !pureness.pure_definitions().contains(*id) { for (_, only_pure_since_then) in &mut trace_call_starts { *only_pure_since_then = false; } diff --git a/compiler/frontend/src/mir_optimize/pure.rs b/compiler/frontend/src/mir_optimize/pure.rs index e091a8158..757978eb8 100644 --- a/compiler/frontend/src/mir_optimize/pure.rs +++ b/compiler/frontend/src/mir_optimize/pure.rs @@ -153,7 +153,7 @@ impl PurenessInsights { } #[must_use] - pub const fn pure_definitions(&self) -> &FxHashSet { + pub const fn pure_definitions(&self) -> &IdSet { &self.pure_definitions } #[must_use] @@ -338,9 +338,10 @@ impl PurenessInsights { /// case: We store a [`BitVec`] where each index corresponds to an [`Id`] /// because our [`Id`]s are numbered sequentially. #[derive(Clone, Debug, Default, Eq, PartialEq)] -struct IdSet(BitVec); +pub struct IdSet(BitVec); impl IdSet { - fn contains(&self, id: Id) -> bool { + #[must_use] + pub fn contains(&self, id: Id) -> bool { if id.to_usize() >= self.0.len() { false } else { @@ -348,11 +349,12 @@ impl IdSet { } } + #[must_use] pub fn iter(&self) -> IdSetIter { self.into_iter() } - fn insert(&mut self, id: Id) { + pub fn insert(&mut self, id: Id) { let additional_length_to_reserve = (id.to_usize() + 1).saturating_sub(self.0.len()); if additional_length_to_reserve > 0 { self.0 @@ -360,7 +362,7 @@ impl IdSet { } self.0.set(id.to_usize(), true); } - fn remove(&mut self, id: Id) { + pub fn remove(&mut self, id: Id) { if id.to_usize() >= self.0.len() { return; } diff --git a/packages/candy.code-workspace b/packages/candy.code-workspace index 82b93988c..da72918b7 100644 --- a/packages/candy.code-workspace +++ b/packages/candy.code-workspace @@ -22,8 +22,8 @@ "editor.defaultFormatter": "redhat.vscode-yaml" }, "editor.codeActionsOnSave": { - "source.fixAll": true, - "source.organizeImports": true + "source.fixAll": "explicit", + "source.organizeImports": "explicit" }, "editor.formatOnPaste": true, "editor.formatOnSave": true, From aec4db94907d1bce6c8f0a193a1a9a3c158d7463 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 22:44:05 +0100 Subject: [PATCH 12/13] Fix csv.decode's export --- packages/Csv/decode.candy | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/Csv/decode.candy b/packages/Csv/decode.candy index 378be905c..4e7a1c9d7 100644 --- a/packages/Csv/decode.candy +++ b/packages/Csv/decode.candy @@ -1,7 +1,7 @@ [bool, checkEquals, ifElse, int, list, recursive, text] = use "Core" [parser] = use "Parser" -decodeFile csv := +decode csv := needs (text.is csv) recursive [Lines: (,), Fields: (,), FieldStartOffset: 0, P: parser.new csv] { recurse [lines, fields, fieldStartOffset, p] -> @@ -36,20 +36,18 @@ decodeFile csv := test = # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 - checkEquals - decodeFile "aaa,bbb,ccc" - Ok (("aaa", "bbb", "ccc"),) + checkEquals (decode "aaa,bbb,ccc") (Ok (("aaa", "bbb", "ccc"),)) checkEquals - decodeFile "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}" + decode "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}" Ok (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) checkEquals - decodeFile " + decode " field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline} " Ok (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx")) # TODO(JonasWanke): handle quoted field # checkEquals - # decodeFile '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' + # decode '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"' # Ok (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx")) - # checkEquals (decodeFile '"aaa,"b""bb",ccc{{text.newline}}"') (Ok (("aaa", '"b"bb"', "ccc"),)) + # checkEquals (decode '"aaa,"b""bb",ccc{{text.newline}}"') (Ok (("aaa", '"b"bb"', "ccc"),)) From 74b5426eeb817c05c69a0d4efbd3a54abd369f30 Mon Sep 17 00:00:00 2001 From: Jonas Wanke Date: Thu, 18 Jan 2024 23:27:57 +0100 Subject: [PATCH 13/13] Add more CSV test cases --- packages/Csv/decode.candy | 59 ++++++++++++++++++++++++++++++++++++--- packages/Csv/encode.candy | 19 ++++++++++++- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/packages/Csv/decode.candy b/packages/Csv/decode.candy index 4e7a1c9d7..546f02539 100644 --- a/packages/Csv/decode.candy +++ b/packages/Csv/decode.candy @@ -1,14 +1,16 @@ -[bool, checkEquals, ifElse, int, list, recursive, text] = use "Core" -[parser] = use "Parser" +[bool, checkEquals, equals, ifElse, int, list, recursive, text] = use "Core" +[cursor, parser] = use "Parser" decode csv := needs (text.is csv) + # TODO(JonasWanke): Error on lines with different field counts recursive [Lines: (,), Fields: (,), FieldStartOffset: 0, P: parser.new csv] { recurse [lines, fields, fieldStartOffset, p] -> Parser (Cursor [source, Offset: oldOffset]) = p p | parser.next % Ok [Parser: p, character] -> - Parser (Cursor [Offset: newOffset]) = p + Parser c = p + Cursor [Offset: newOffset] = c character % "," -> recurse [ @@ -22,7 +24,16 @@ decode csv := " -> fields = fields | list.append (source | text.getRange fieldStartOffset oldOffset) - recurse [Lines: lines | list.append fields, Fields: (,), FieldStartOffset: newOffset, p] + lines = lines | list.append fields + # CSV files can have a trailing newline. + ifElse + (c | cursor.isAtEnd) + { + ifElse (oldOffset | equals 0) { Ok (,) } { Ok lines } + } + { + recurse [lines, Fields: (,), FieldStartOffset: newOffset, p] + } # TODO(JonasWanke): handle quoted field _ -> recurse [lines, fields, fieldStartOffset, p] Error Empty -> @@ -35,6 +46,46 @@ decode csv := } test = + checkEquals (decode "") (Ok (,)) + checkEquals (decode "{text.newline}") (Ok (,)) + + checkEquals (decode "aaa") (Ok (("aaa",),)) + checkEquals (decode "aaa{text.newline}") (Ok (("aaa",),)) + + checkEquals (decode " aaa ") (Ok ((" aaa ",),)) + checkEquals (decode " aaa {text.newline}") (Ok ((" aaa ",),)) + + checkEquals (decode "aaa,bbb") (Ok (("aaa", "bbb"),)) + checkEquals (decode "aaa,bbb{text.newline}") (Ok (("aaa", "bbb"),)) + checkEquals (decode "aaa,bbb{text.newline}ccc,ddd") (Ok (("aaa", "bbb"), ("ccc", "ddd"))) + checkEquals + decode "aaa,bbb{text.newline}ccc,ddd{text.newline}" + Ok (("aaa", "bbb"), ("ccc", "ddd")) + # Parser is broken, hence this verbose formatting: https://github.com/candy-lang/candy/issues/896 + checkEquals + decode " + aaa , bbb {text.newline} ccc , ddd {text.newline} + " + Ok ( + ( + " + aaa + ", + " + bbb + ", + ), + ( + " + ccc + ", + " + ddd + ", + ), + ) + +testRfcExamples = # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 checkEquals (decode "aaa,bbb,ccc") (Ok (("aaa", "bbb", "ccc"),)) diff --git a/packages/Csv/encode.candy b/packages/Csv/encode.candy index 9b418caf3..9b54f278d 100644 --- a/packages/Csv/encode.candy +++ b/packages/Csv/encode.candy @@ -27,7 +27,24 @@ encode lines := } | iterator.joinToText -test = +testEncodeLine = + # TODO(JonaWanke): Add tests cases for leading/trailing whitespace when our parser is fixed, + # https://github.com/candy-lang/candy/issues/896 + checkEquals (encodeLine (,)) "" + checkEquals (encodeLine ("aaa",)) "aaa" + checkEquals (encodeLine ("aaa", "bbb")) "aaa,bbb" + +testEncodeLineWithSpecialCharacters = + checkEquals (encodeLine ('"aa"a"',)) '""aa""a""' + checkEquals (encodeLine ("aa{text.newline}a",)) '""aa{{text.newline}}a""' + +testEncode = + checkEquals (encode (,)) "" + checkEquals (encode (("aaa",),)) "aaa{text.newline}" + checkEquals (encode (("aaa", "bbb"),)) "aaa,bbb{text.newline}" + checkEquals (encode (("aaa",), ("bbb",))) "aaa{text.newline}bbb{text.newline}" + +testRfcExamples = # From https://datatracker.ietf.org/doc/html/rfc4180#section-2 checkEquals (encodeLine ("aaa", "bbb", "ccc")) "aaa,bbb,ccc"