Merge pull request #885 from candy-lang/csv

CSV Package
candy-lang · Jan 25, 2024 · 85bbecd · 85bbecd · jwbot · Jan 25, 2024
2 parents bb55714 + 512d13a
commit 85bbecd
Show file tree

Hide file tree

Showing 15 changed files with 269 additions and 6 deletions.
diff --git a/.github/labels.yaml b/.github/labels.yaml
@@ -20,10 +20,14 @@
   - vscode_extension/**/*
 'P: Core':
   - packages/Core/**/*
+'P: Csv':
+  - packages/Csv/**/*
 'P: Examples':
   - packages/Examples/**/*
 'P: Http':
   - packages/Http/**/*
+'P: Parser':
+  - packages/Parser/**/*
 'P: ProgrammingLanguageBenchmarks':
   - packages/ProgrammingLanguageBenchmarks/**/*
 'P: Random':

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -28,7 +28,11 @@ jobs:
       - uses: technote-space/[email protected]
 
   automerge-pr:
-    if: (github.event.action == 'opened' && !github.event.pull_request.draft) || github.event.action == 'ready_for_review'
+    if: >
+      github.head_ref == 'refs/heads/main'
+      && (
+        (github.event.action == 'opened' && !github.event.pull_request.draft)
+        || github.event.action == 'ready_for_review')
     runs-on: ubuntu-latest
     permissions:
       contents: write

diff --git a/compiler/frontend/src/mir_optimize/pure.rs b/compiler/frontend/src/mir_optimize/pure.rs
@@ -340,6 +340,7 @@ impl PurenessInsights {
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct IdSet(BitVec);
 impl IdSet {
+    #[must_use]
     pub fn contains(&self, id: Id) -> bool {
         if id.to_usize() >= self.0.len() {
             false
@@ -348,6 +349,7 @@ impl IdSet {
         }
     }
 
+    #[must_use]
     pub fn iter(&self) -> IdSetIter {
         self.into_iter()
     }

diff --git a/packages/Core/_.candy b/packages/Core/_.candy
@@ -1,5 +1,5 @@
 bool := use ".bool"
-[check] := use ".check"
+[check, checkEquals] := use ".check"
 [if, ifElse, loop, recursive, repeat] := use ".controlFlow"
 [equals] := use ".equality"
 fixedDecimal := use ".fixedDecimal"

diff --git a/packages/Core/iterator.candy b/packages/Core/iterator.candy
@@ -174,6 +174,16 @@ joinToText iterator := iterator | wrapSafe { needs False "`joinToText` needs an
   needs (text.is item)
   result | text.concatenate item
 }
+joinToTextWithSeparator iterator separator :=
+  iterator = iterator | wrapSafe { needs False "`joinToTextWithSeparator` needs an iterator." }
+  needs (text.is separator)
+
+  iterator | reduceLeft { a b ->
+    needs (text.is a)
+    needs (text.is b)
+    a | text.concatenate separator | text.concatenate b
+  }
+  | result.unwrapOr ""
 
 ## Filtering
 

diff --git a/packages/Core/list.candy b/packages/Core/list.candy
@@ -12,7 +12,7 @@ is value := type.is value List
 length := builtins.listLength
 isEmpty list :=
   needs (is list)
-  equals (length list) 0
+  equals (list | length) 0
 
 lastIndex list :=
   needs (is list)
@@ -55,7 +55,7 @@ prepend list item :=
   list | insert 0 item
 append list item :=
   needs (is list)
-  list | insert (length list) item
+  list | insert (list | length) item
 
 replace := builtins.listReplace
 update list index updater :=

diff --git a/packages/Csv/_.candy b/packages/Csv/_.candy
@@ -0,0 +1,2 @@
+[decode] := use ".decode"
+[encode] := use ".encode"
diff --git a/packages/Csv/_package.candy b/packages/Csv/_package.candy
diff --git a/packages/Csv/decode.candy b/packages/Csv/decode.candy
@@ -0,0 +1,104 @@
+[bool, checkEquals, equals, ifElse, int, list, recursive, text] = use "Core"
+[cursor, parser] = use "Parser"
+
+decode csv :=
+  needs (text.is csv)
+  # TODO(JonasWanke): Error on lines with different field counts
+  recursive [Lines: (,), Fields: (,), FieldStartOffset: 0, P: parser.new csv] {
+    recurse [lines, fields, fieldStartOffset, p] ->
+    Parser (Cursor [source, Offset: oldOffset]) = p
+    p | parser.next %
+      Ok [Parser: p, character] ->
+        Parser c = p
+        Cursor [Offset: newOffset] = c
+        character %
+          "," ->
+            recurse [
+              lines,
+              Fields: fields | list.append (source | text.getRange fieldStartOffset oldOffset),
+              FieldStartOffset: newOffset,
+              p,
+            ]
+          "
+
+
+          " ->
+            fields = fields | list.append (source | text.getRange fieldStartOffset oldOffset)
+            lines = lines | list.append fields
+            # CSV files can have a trailing newline.
+            ifElse
+              (c | cursor.isAtEnd)
+              {
+                ifElse (oldOffset | equals 0) { Ok (,) } { Ok lines }
+              }
+              {
+                recurse [lines, Fields: (,), FieldStartOffset: newOffset, p]
+              }
+          # TODO(JonasWanke): handle quoted field
+          _ -> recurse [lines, fields, fieldStartOffset, p]
+      Error Empty ->
+        finalField = source | text.getRange fieldStartOffset oldOffset
+        Ok
+          ifElse
+            finalField | text.isEmpty | bool.lazyAnd { fields | list.isEmpty }
+            { lines }
+            { lines | list.append (fields | list.append finalField) }
+  }
+
+test =
+  checkEquals (decode "") (Ok (,))
+  checkEquals (decode "{text.newline}") (Ok (,))
+
+  checkEquals (decode "aaa") (Ok (("aaa",),))
+  checkEquals (decode "aaa{text.newline}") (Ok (("aaa",),))
+
+  checkEquals (decode " aaa ") (Ok ((" aaa ",),))
+  checkEquals (decode " aaa {text.newline}") (Ok ((" aaa ",),))
+
+  checkEquals (decode "aaa,bbb") (Ok (("aaa", "bbb"),))
+  checkEquals (decode "aaa,bbb{text.newline}") (Ok (("aaa", "bbb"),))
+  checkEquals (decode "aaa,bbb{text.newline}ccc,ddd") (Ok (("aaa", "bbb"), ("ccc", "ddd")))
+  checkEquals
+    decode "aaa,bbb{text.newline}ccc,ddd{text.newline}"
+    Ok (("aaa", "bbb"), ("ccc", "ddd"))
+  # Parser is broken, hence this verbose formatting: https://github.com/candy-lang/candy/issues/896
+  checkEquals
+    decode "
+       aaa , bbb {text.newline} ccc , ddd {text.newline}
+    "
+    Ok (
+      (
+        "
+           aaa 
+        ",
+        "
+           bbb 
+        ",
+      ),
+      (
+        "
+           ccc 
+        ",
+        "
+           ddd 
+        ",
+      ),
+    )
+
+testRfcExamples =
+  # From https://datatracker.ietf.org/doc/html/rfc4180#section-2
+  checkEquals (decode "aaa,bbb,ccc") (Ok (("aaa", "bbb", "ccc"),))
+
+  checkEquals
+    decode "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}"
+    Ok (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
+  checkEquals
+    decode "
+      field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}
+    "
+    Ok (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
+  # TODO(JonasWanke): handle quoted field
+  # checkEquals
+  #   decode '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"'
+  #   Ok (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx"))
+  # checkEquals (decode '"aaa,"b""bb",ccc{{text.newline}}"') (Ok (("aaa", '"b"bb"', "ccc"),))
diff --git a/packages/Csv/encode.candy b/packages/Csv/encode.candy
@@ -0,0 +1,62 @@
+[bool, checkEquals, equals, ifElse, iterator, list, result, text] = use "Core"
+
+encodeField field =
+  needs (text.is field)
+  ifElse
+    field | text.contains '"""' | bool.lazyOr { field | text.contains text.newline }
+    {
+      encoded = field | text.characters | iterator.fromList
+      | iterator.map { char -> ifElse (char | equals '"""') { '""""' } { char } }
+      | iterator.joinToText
+      '""{{encoded}}""'
+    }
+    { field }
+
+encodeLine line =
+  needs (list.is line)
+  needs (line | iterator.fromList | iterator.all text.is)
+  line | iterator.fromList | iterator.map encodeField | iterator.joinToTextWithSeparator ","
+
+encode lines :=
+  needs (list.is lines)
+  needs (lines | iterator.fromList | iterator.all list.is)
+  fieldsPerLine = lines | list.first | result.map list.length | result.unwrapOr 0
+  lines | iterator.fromList | iterator.map { line ->
+    needs (line | list.length | equals fieldsPerLine)
+    "{line | encodeLine}{text.newline}"
+  }
+  | iterator.joinToText
+
+testEncodeLine =
+  # TODO(JonaWanke): Add tests cases for leading/trailing whitespace when our parser is fixed,
+  # https://github.com/candy-lang/candy/issues/896
+  checkEquals (encodeLine (,)) ""
+  checkEquals (encodeLine ("aaa",)) "aaa"
+  checkEquals (encodeLine ("aaa", "bbb")) "aaa,bbb"
+
+testEncodeLineWithSpecialCharacters =
+  checkEquals (encodeLine ('"aa"a"',)) '""aa""a""'
+  checkEquals (encodeLine ("aa{text.newline}a",)) '""aa{{text.newline}}a""'
+
+testEncode =
+  checkEquals (encode (,)) ""
+  checkEquals (encode (("aaa",),)) "aaa{text.newline}"
+  checkEquals (encode (("aaa", "bbb"),)) "aaa,bbb{text.newline}"
+  checkEquals (encode (("aaa",), ("bbb",))) "aaa{text.newline}bbb{text.newline}"
+
+testRfcExamples =
+  # From https://datatracker.ietf.org/doc/html/rfc4180#section-2
+  checkEquals (encodeLine ("aaa", "bbb", "ccc")) "aaa,bbb,ccc"
+
+  checkEquals
+    encode (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
+    "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}"
+  checkEquals
+    encode (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
+    "
+      field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}
+    "
+  checkEquals
+    encode (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx"))
+    '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"'
+  checkEquals (encode (("aaa", '"b"bb"', "ccc"),)) '"aaa,"b""bb",ccc{{text.newline}}"'
diff --git a/packages/Parser/_.candy b/packages/Parser/_.candy
@@ -0,0 +1,2 @@
+cursor := use ".cursor"
+parser := use ".parser"
diff --git a/packages/Parser/_package.candy b/packages/Parser/_package.candy
diff --git a/packages/Parser/cursor.candy b/packages/Parser/cursor.candy
@@ -0,0 +1,25 @@
+[bool, equals, int, text] = use "Core"
+
+is cursor := cursor %
+  Cursor [source, offset] ->
+    text.is source | bool.lazyAnd { int.is offset } | bool.lazyAnd { int.isNonNegative offset }
+    | bool.lazyAnd { offset | int.isLessThanOrEqualTo (source | text.length) }
+  _ -> False
+
+newAtStart source :=
+  needs (text.is source)
+  Cursor [source, Offset: 0]
+
+isAtEnd cursor :=
+  needs (is cursor)
+  Cursor [source, offset] = cursor
+  offset | equals (source | text.length)
+
+add cursor length :=
+  needs (is cursor)
+  needs (int.is length)
+  needs (int.isNonNegative length)
+  Cursor [source, offset] = cursor
+  offset = offset | int.add length
+  needs (offset | int.isLessThanOrEqualTo (source | text.length))
+  Cursor [source, offset]
diff --git a/packages/Parser/parser.candy b/packages/Parser/parser.candy
@@ -0,0 +1,48 @@
+[bool, equals, ifElse, int, text] = use "Core"
+cursor = use "..cursor"
+
+is parser := parser %
+  Parser c -> cursor.is c
+  _ -> False
+
+new source :=
+  needs (text.is source)
+  Parser (cursor.newAtStart source)
+
+peek parser :=
+  needs (is parser)
+  Parser c = parser
+  ifElse (c | cursor.isAtEnd) { Error Empty } {
+    Cursor [source, offset] = c
+    Ok (source | text.getRange offset (offset | int.add 1))
+  }
+next parser :=
+  needs (is parser)
+  Parser c = parser
+  ifElse (c | cursor.isAtEnd) { Error Empty } {
+    Cursor [source, offset] = c
+    c = c | cursor.add 1
+    Cursor [Offset: newOffset] = c
+    Ok [Parser: Parser c, Character: source | text.getRange offset newOffset]
+  }
+
+matches parser expectedText :=
+  needs (is parser)
+  needs (text.is expectedText)
+  Parser (Cursor [source, offset]) = parser
+  endExclusive = offset | int.add (expectedText | text.length)
+  endExclusive | int.isLessThanOrEqualTo (source | text.length)
+  | bool.lazyAnd { source | text.getRange offset endExclusive | equals expectedText }
+require parser expectedText :=
+  needs (is parser)
+  needs (text.is expectedText)
+  ifElse
+    parser | matches expectedText
+    {
+      Parser c = parser
+      Ok (Parser (c | cursor.add (expectedText | text.length)))
+    }
+    {
+      Parser (Cursor [offset]) = parser
+      Error '"Expected "{{expectedText}}" at offset {offset}."'
+    }
diff --git a/packages/candy.code-workspace b/packages/candy.code-workspace
@@ -22,8 +22,8 @@
       "editor.defaultFormatter": "redhat.vscode-yaml"
     },
     "editor.codeActionsOnSave": {
-      "source.fixAll": true,
-      "source.organizeImports": true
+      "source.fixAll": "explicit",
+      "source.organizeImports": "explicit"
     },
     "editor.formatOnPaste": true,
     "editor.formatOnSave": true,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[decode] := use ".decode"
		[encode] := use ".encode"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		cursor := use ".cursor"
		parser := use ".parser"
Benchmark suite	Current: `85bbecd`	Previous: `bb55714`	Ratio
`Time: Compiler/hello_world`	`37817577` ns/iter (`± 401562`)	`37726636` ns/iter (`± 581259`)	`1.00`
`Time: Compiler/fibonacci`	`192516036` ns/iter (`± 555334`)	`191835875` ns/iter (`± 698916`)	`1.00`
`Time: VM Runtime/hello_world`	`36117` ns/iter (`± 2221`)	`43466` ns/iter (`± 5701`)	`0.83`
`Time: VM Runtime/fibonacci/15`	`296468610` ns/iter (`± 1728444`)	`296425323` ns/iter (`± 1677685`)	`1.00`
`Time: VM Runtime/PLB/binarytrees/6`	`1593328858` ns/iter (`± 7402098`)	`1613548766` ns/iter (`± 21582512`)	`0.99`