Skip to content

Commit

Permalink
Merge pull request #885 from candy-lang/csv
Browse files Browse the repository at this point in the history
CSV Package
  • Loading branch information
MarcelGarus authored Jan 25, 2024
2 parents bb55714 + 512d13a commit 85bbecd
Show file tree
Hide file tree
Showing 15 changed files with 269 additions and 6 deletions.
4 changes: 4 additions & 0 deletions .github/labels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
- vscode_extension/**/*
'P: Core':
- packages/Core/**/*
'P: Csv':
- packages/Csv/**/*
'P: Examples':
- packages/Examples/**/*
'P: Http':
- packages/Http/**/*
'P: Parser':
- packages/Parser/**/*
'P: ProgrammingLanguageBenchmarks':
- packages/ProgrammingLanguageBenchmarks/**/*
'P: Random':
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ jobs:
- uses: technote-space/[email protected]

automerge-pr:
if: (github.event.action == 'opened' && !github.event.pull_request.draft) || github.event.action == 'ready_for_review'
if: >
github.head_ref == 'refs/heads/main'
&& (
(github.event.action == 'opened' && !github.event.pull_request.draft)
|| github.event.action == 'ready_for_review')
runs-on: ubuntu-latest
permissions:
contents: write
Expand Down
2 changes: 2 additions & 0 deletions compiler/frontend/src/mir_optimize/pure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ impl PurenessInsights {
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct IdSet(BitVec);
impl IdSet {
#[must_use]
pub fn contains(&self, id: Id) -> bool {
if id.to_usize() >= self.0.len() {
false
Expand All @@ -348,6 +349,7 @@ impl IdSet {
}
}

#[must_use]
pub fn iter(&self) -> IdSetIter {
self.into_iter()
}
Expand Down
2 changes: 1 addition & 1 deletion packages/Core/_.candy
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
bool := use ".bool"
[check] := use ".check"
[check, checkEquals] := use ".check"
[if, ifElse, loop, recursive, repeat] := use ".controlFlow"
[equals] := use ".equality"
fixedDecimal := use ".fixedDecimal"
Expand Down
10 changes: 10 additions & 0 deletions packages/Core/iterator.candy
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,16 @@ joinToText iterator := iterator | wrapSafe { needs False "`joinToText` needs an
needs (text.is item)
result | text.concatenate item
}
joinToTextWithSeparator iterator separator :=
iterator = iterator | wrapSafe { needs False "`joinToTextWithSeparator` needs an iterator." }
needs (text.is separator)

iterator | reduceLeft { a b ->
needs (text.is a)
needs (text.is b)
a | text.concatenate separator | text.concatenate b
}
| result.unwrapOr ""

## Filtering

Expand Down
4 changes: 2 additions & 2 deletions packages/Core/list.candy
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ is value := type.is value List
length := builtins.listLength
isEmpty list :=
needs (is list)
equals (length list) 0
equals (list | length) 0

lastIndex list :=
needs (is list)
Expand Down Expand Up @@ -55,7 +55,7 @@ prepend list item :=
list | insert 0 item
append list item :=
needs (is list)
list | insert (length list) item
list | insert (list | length) item

replace := builtins.listReplace
update list index updater :=
Expand Down
2 changes: 2 additions & 0 deletions packages/Csv/_.candy
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[decode] := use ".decode"
[encode] := use ".encode"
Empty file added packages/Csv/_package.candy
Empty file.
104 changes: 104 additions & 0 deletions packages/Csv/decode.candy
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
[bool, checkEquals, equals, ifElse, int, list, recursive, text] = use "Core"
[cursor, parser] = use "Parser"

decode csv :=
needs (text.is csv)
# TODO(JonasWanke): Error on lines with different field counts
recursive [Lines: (,), Fields: (,), FieldStartOffset: 0, P: parser.new csv] {
recurse [lines, fields, fieldStartOffset, p] ->
Parser (Cursor [source, Offset: oldOffset]) = p
p | parser.next %
Ok [Parser: p, character] ->
Parser c = p
Cursor [Offset: newOffset] = c
character %
"," ->
recurse [
lines,
Fields: fields | list.append (source | text.getRange fieldStartOffset oldOffset),
FieldStartOffset: newOffset,
p,
]
"


" ->
fields = fields | list.append (source | text.getRange fieldStartOffset oldOffset)
lines = lines | list.append fields
# CSV files can have a trailing newline.
ifElse
(c | cursor.isAtEnd)
{
ifElse (oldOffset | equals 0) { Ok (,) } { Ok lines }
}
{
recurse [lines, Fields: (,), FieldStartOffset: newOffset, p]
}
# TODO(JonasWanke): handle quoted field
_ -> recurse [lines, fields, fieldStartOffset, p]
Error Empty ->
finalField = source | text.getRange fieldStartOffset oldOffset
Ok
ifElse
finalField | text.isEmpty | bool.lazyAnd { fields | list.isEmpty }
{ lines }
{ lines | list.append (fields | list.append finalField) }
}

test =
checkEquals (decode "") (Ok (,))
checkEquals (decode "{text.newline}") (Ok (,))

checkEquals (decode "aaa") (Ok (("aaa",),))
checkEquals (decode "aaa{text.newline}") (Ok (("aaa",),))

checkEquals (decode " aaa ") (Ok ((" aaa ",),))
checkEquals (decode " aaa {text.newline}") (Ok ((" aaa ",),))

checkEquals (decode "aaa,bbb") (Ok (("aaa", "bbb"),))
checkEquals (decode "aaa,bbb{text.newline}") (Ok (("aaa", "bbb"),))
checkEquals (decode "aaa,bbb{text.newline}ccc,ddd") (Ok (("aaa", "bbb"), ("ccc", "ddd")))
checkEquals
decode "aaa,bbb{text.newline}ccc,ddd{text.newline}"
Ok (("aaa", "bbb"), ("ccc", "ddd"))
# Parser is broken, hence this verbose formatting: https://github.com/candy-lang/candy/issues/896
checkEquals
decode "
aaa , bbb {text.newline} ccc , ddd {text.newline}
"
Ok (
(
"
aaa
",
"
bbb
",
),
(
"
ccc
",
"
ddd
",
),
)

testRfcExamples =
# From https://datatracker.ietf.org/doc/html/rfc4180#section-2
checkEquals (decode "aaa,bbb,ccc") (Ok (("aaa", "bbb", "ccc"),))

checkEquals
decode "aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}"
Ok (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
checkEquals
decode "
field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}
"
Ok (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
# TODO(JonasWanke): handle quoted field
# checkEquals
# decode '"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"'
# Ok (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx"))
# checkEquals (decode '"aaa,"b""bb",ccc{{text.newline}}"') (Ok (("aaa", '"b"bb"', "ccc"),))
62 changes: 62 additions & 0 deletions packages/Csv/encode.candy
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[bool, checkEquals, equals, ifElse, iterator, list, result, text] = use "Core"

encodeField field =
needs (text.is field)
ifElse
field | text.contains '"""' | bool.lazyOr { field | text.contains text.newline }
{
encoded = field | text.characters | iterator.fromList
| iterator.map { char -> ifElse (char | equals '"""') { '""""' } { char } }
| iterator.joinToText
'""{{encoded}}""'
}
{ field }

encodeLine line =
needs (list.is line)
needs (line | iterator.fromList | iterator.all text.is)
line | iterator.fromList | iterator.map encodeField | iterator.joinToTextWithSeparator ","

encode lines :=
needs (list.is lines)
needs (lines | iterator.fromList | iterator.all list.is)
fieldsPerLine = lines | list.first | result.map list.length | result.unwrapOr 0
lines | iterator.fromList | iterator.map { line ->
needs (line | list.length | equals fieldsPerLine)
"{line | encodeLine}{text.newline}"
}
| iterator.joinToText

testEncodeLine =
# TODO(JonaWanke): Add tests cases for leading/trailing whitespace when our parser is fixed,
# https://github.com/candy-lang/candy/issues/896
checkEquals (encodeLine (,)) ""
checkEquals (encodeLine ("aaa",)) "aaa"
checkEquals (encodeLine ("aaa", "bbb")) "aaa,bbb"

testEncodeLineWithSpecialCharacters =
checkEquals (encodeLine ('"aa"a"',)) '""aa""a""'
checkEquals (encodeLine ("aa{text.newline}a",)) '""aa{{text.newline}}a""'

testEncode =
checkEquals (encode (,)) ""
checkEquals (encode (("aaa",),)) "aaa{text.newline}"
checkEquals (encode (("aaa", "bbb"),)) "aaa,bbb{text.newline}"
checkEquals (encode (("aaa",), ("bbb",))) "aaa{text.newline}bbb{text.newline}"

testRfcExamples =
# From https://datatracker.ietf.org/doc/html/rfc4180#section-2
checkEquals (encodeLine ("aaa", "bbb", "ccc")) "aaa,bbb,ccc"

checkEquals
encode (("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
"aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}"
checkEquals
encode (("field_name", "field_name", "field_name"), ("aaa", "bbb", "ccc"), ("zzz", "yyy", "xxx"))
"
field_name,field_name,field_name{text.newline}aaa,bbb,ccc{text.newline}zzz,yyy,xxx{text.newline}
"
checkEquals
encode (("aaa", "b{text.newline}bb", "ccc"), ("zzz", "yyy", "xxx"))
'"aaa,"b{{text.newline}}bb",ccc{{text.newline}}zzz,yyy,xxx{{text.newline}}"'
checkEquals (encode (("aaa", '"b"bb"', "ccc"),)) '"aaa,"b""bb",ccc{{text.newline}}"'
2 changes: 2 additions & 0 deletions packages/Parser/_.candy
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cursor := use ".cursor"
parser := use ".parser"
Empty file added packages/Parser/_package.candy
Empty file.
25 changes: 25 additions & 0 deletions packages/Parser/cursor.candy
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[bool, equals, int, text] = use "Core"

is cursor := cursor %
Cursor [source, offset] ->
text.is source | bool.lazyAnd { int.is offset } | bool.lazyAnd { int.isNonNegative offset }
| bool.lazyAnd { offset | int.isLessThanOrEqualTo (source | text.length) }
_ -> False

newAtStart source :=
needs (text.is source)
Cursor [source, Offset: 0]

isAtEnd cursor :=
needs (is cursor)
Cursor [source, offset] = cursor
offset | equals (source | text.length)

add cursor length :=
needs (is cursor)
needs (int.is length)
needs (int.isNonNegative length)
Cursor [source, offset] = cursor
offset = offset | int.add length
needs (offset | int.isLessThanOrEqualTo (source | text.length))
Cursor [source, offset]
48 changes: 48 additions & 0 deletions packages/Parser/parser.candy
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[bool, equals, ifElse, int, text] = use "Core"
cursor = use "..cursor"

is parser := parser %
Parser c -> cursor.is c
_ -> False

new source :=
needs (text.is source)
Parser (cursor.newAtStart source)

peek parser :=
needs (is parser)
Parser c = parser
ifElse (c | cursor.isAtEnd) { Error Empty } {
Cursor [source, offset] = c
Ok (source | text.getRange offset (offset | int.add 1))
}
next parser :=
needs (is parser)
Parser c = parser
ifElse (c | cursor.isAtEnd) { Error Empty } {
Cursor [source, offset] = c
c = c | cursor.add 1
Cursor [Offset: newOffset] = c
Ok [Parser: Parser c, Character: source | text.getRange offset newOffset]
}

matches parser expectedText :=
needs (is parser)
needs (text.is expectedText)
Parser (Cursor [source, offset]) = parser
endExclusive = offset | int.add (expectedText | text.length)
endExclusive | int.isLessThanOrEqualTo (source | text.length)
| bool.lazyAnd { source | text.getRange offset endExclusive | equals expectedText }
require parser expectedText :=
needs (is parser)
needs (text.is expectedText)
ifElse
parser | matches expectedText
{
Parser c = parser
Ok (Parser (c | cursor.add (expectedText | text.length)))
}
{
Parser (Cursor [offset]) = parser
Error '"Expected "{{expectedText}}" at offset {offset}."'
}
4 changes: 2 additions & 2 deletions packages/candy.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
"editor.defaultFormatter": "redhat.vscode-yaml"
},
"editor.codeActionsOnSave": {
"source.fixAll": true,
"source.organizeImports": true
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
},
"editor.formatOnPaste": true,
"editor.formatOnSave": true,
Expand Down

1 comment on commit 85bbecd

@jwbot
Copy link
Collaborator

@jwbot jwbot commented on 85bbecd Jan 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compiler

Benchmark suite Current: 85bbecd Previous: bb55714 Ratio
Time: Compiler/hello_world 37817577 ns/iter (± 401562) 37726636 ns/iter (± 581259) 1.00
Time: Compiler/fibonacci 192516036 ns/iter (± 555334) 191835875 ns/iter (± 698916) 1.00
Time: VM Runtime/hello_world 36117 ns/iter (± 2221) 43466 ns/iter (± 5701) 0.83
Time: VM Runtime/fibonacci/15 296468610 ns/iter (± 1728444) 296425323 ns/iter (± 1677685) 1.00
Time: VM Runtime/PLB/binarytrees/6 1593328858 ns/iter (± 7402098) 1613548766 ns/iter (± 21582512) 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.