Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add column name/utility accessors for Legolas.Schema #52

Closed
wants to merge 10 commits into from
72 changes: 53 additions & 19 deletions src/rows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,22 +105,25 @@ struct UnknownSchemaError <: Exception
end

function Base.showerror(io::IO, e::UnknownSchemaError)
print(io, """
encountered unknown `Legolas.Schema` type: $(e.schema)

This generally indicates that this schema has not been defined (i.e.
the schema's corresponding `@row` statement has not been executed) in
the current Julia session.

In practice, this can arise if you try to read a Legolas table with a
prescribed schema, but haven't actually loaded the schema definition
(or commonly, haven't loaded the dependency that contains the schema
definition - check the versions of loaded packages/modules to confirm
your environment is as expected).

Note that if you're in this particular situation, you can still load
the raw table as-is without Legolas; e.g., to load an Arrow table, call `Arrow.Table(path)`.
""")
print(
io,
"""
encountered unknown `Legolas.Schema` type: $(e.schema)

This generally indicates that this schema has not been defined (i.e.
the schema's corresponding `@row` statement has not been executed) in
the current Julia session.

In practice, this can arise if you try to read a Legolas table with a
prescribed schema, but haven't actually loaded the schema definition
(or commonly, haven't loaded the dependency that contains the schema
definition - check the versions of loaded packages/modules to confirm
your environment is as expected).

Note that if you're in this particular situation, you can still load
the raw table as-is without Legolas; e.g., to load an Arrow table, call `Arrow.Table(path)`.
"""
)
Comment on lines +108 to +126
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix style?

return nothing
end

Expand Down Expand Up @@ -227,6 +230,28 @@ function Base.show(io::IO, row::Row)
return nothing
end

"""
schema_field_names(::Type{<:Legolas.Schema})

Get a tuple with the names of the fields of this `Legolas.Schema`, including names that
have been inherited from this `Legolas.Schema`'s parent schema.
"""
schema_field_names(::Type{S}) where {S<:Legolas.Schema} = throw(UnknownSchemaError(S()))
schema_field_names(s::Legolas.Schema) = schema_field_names(typeof(s))
schema_field_names(::Legolas.Row{S}) where {S} = schema_field_names(S)
schema_field_names(::Type{<:Legolas.Row{S}}) where {S} = schema_field_names(S)
OTDE marked this conversation as resolved.
Show resolved Hide resolved

"""
schema_field_types(::Legolas.Schema{name,version})

Get a tuple with the types of the fields of this `Legolas.Schema`, including types of fields that
have been inherited from this `Legolas.Schema`'s parent schema.
"""
schema_field_types(::Type{S}) where {S<:Legolas.Schema} = throw(UnknownSchemaError(S()))
schema_field_types(s::Legolas.Schema) = schema_field_types(typeof(s))
schema_field_types(::Legolas.Row{S}) where {S} = schema_field_types(S)
schema_field_types(::Type{<:Legolas.Row{S}}) where {S} = schema_field_types(S)
OTDE marked this conversation as resolved.
Show resolved Hide resolved

function _parse_schema_expr(x)
if x isa Expr && x.head == :call && x.args[1] == :> && length(x.args) == 3
child, _ = _parse_schema_expr(x.args[2])
Expand Down Expand Up @@ -277,27 +302,36 @@ macro row(schema_expr, fields...)
name, type = f.args[1].args
return :(validate_expected_field(tables_schema, $(Base.Meta.quot(name)), $(esc(type))))
end
field_names = [esc(f.args[1].args[1]) for f in fields]
field_exprs = [f.args[1] for f in fields]
field_names = [e.args[1] for e in field_exprs]
field_types = [e.args[2] for e in field_exprs]
escaped_field_names = map(esc, field_names)
schema_type = Base.Meta.quot(typeof(schema))
quoted_parent = Base.Meta.quot(parent)
schema_qualified_string = string(schema_name(schema), '@', schema_version(schema))
schema_field_names = Expr(:tuple, map(QuoteNode, field_names)...)
schema_field_types = Expr(:tuple, field_types...)
parent_transform = nothing
parent_validate = nothing
if !isnothing(parent)
schema_qualified_string = :(string($schema_qualified_string, '>', Legolas.schema_qualified_string($quoted_parent)))
schema_field_names = :(($schema_field_names..., Legolas.schema_field_names($quoted_parent)...))
schema_field_types = :(($schema_field_types..., Legolas.schema_field_types($quoted_parent)...))
parent_transform = :(fields = transform($quoted_parent; fields...))
parent_validate = :(validate(tables_schema, $quoted_parent))
end

legolas_row_arrow_name = :(Symbol("JuliaLang.", $schema_qualified_string))
return quote
Legolas.schema_qualified_string(::$schema_type) = $schema_qualified_string
Legolas.schema_field_names(::Type{$schema_type}) = $schema_field_names
Legolas.schema_field_types(::Type{$schema_type}) = $schema_field_types

Legolas.schema_parent(::Type{<:$schema_type}) = $quoted_parent

function Legolas._transform(::$schema_type; $([Expr(:kw, f, :missing) for f in field_names]...), other...)
function Legolas._transform(::$schema_type; $([Expr(:kw, f, :missing) for f in escaped_field_names]...), other...)
$(map(esc, fields)...)
return (; $([Expr(:kw, f, f) for f in field_names]...), other...)
return (; $([Expr(:kw, f, f) for f in escaped_field_names]...), other...)
end

function Legolas._validate(tables_schema::Tables.Schema, legolas_schema::$schema_type)
Expand Down
153 changes: 96 additions & 57 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,64 +42,64 @@ end

@testset "Legolas.location" begin
collections = (['a', 'b', 'c', 'f', 'b'],
['d', 'c', 'e', 'b'],
['f', 'a', 'f'])
['d', 'c', 'e', 'b'],
['f', 'a', 'f'])
expected = Dict('f' => ([4], [], [1, 3]),
'a' => ([1], [], [2]),
'c' => ([3], [2], []),
'd' => ([], [1], []),
'e' => ([], [3], []),
'b' => ([2, 5], [4], []))
'a' => ([1], [], [2]),
'c' => ([3], [2], []),
'd' => ([], [1], []),
'e' => ([], [3], []),
'b' => ([2, 5], [4], []))
@test Legolas.locations(collections) == expected
end

@testset "Legolas.gather" begin
a = [(x=1, y="a", z="k"),
(x=2, y="b", z="j"),
(x=4, y="c", z="i"),
(x=4, y="d", z="h"),
(x=2, y="e", z="g"),
(x=5, y="f", z="f"),
(x=4, y="g", z="e"),
(x=3, y="h", z="d"),
(x=1, y="i", z="c"),
(x=5, y="j", z="b"),
(x=4, y="k", z="a")]
(x=2, y="b", z="j"),
(x=4, y="c", z="i"),
(x=4, y="d", z="h"),
(x=2, y="e", z="g"),
(x=5, y="f", z="f"),
(x=4, y="g", z="e"),
(x=3, y="h", z="d"),
(x=1, y="i", z="c"),
(x=5, y="j", z="b"),
(x=4, y="k", z="a")]
b = [(x=1, m=1),
(x=2, m=2),
(x=2, m=5),
(x=5, m=4),
(x=4, m=6)]
(x=2, m=2),
(x=2, m=5),
(x=5, m=4),
(x=4, m=6)]
c = [(test="a", x=1, z=1.0),
(test="b", x=2, z=1.0),
(test="d", x=4, z=1.0),
(test="e", x="gotcha", z=1.0),
(test="f", x=5, z=1.0),
(test="h", x=3, z=1.0),
(test="i", x=1, z=1.0),
(test="j", x=5, z=1.0),
(test="k", x=4, z=1.0)]
(test="b", x=2, z=1.0),
(test="d", x=4, z=1.0),
(test="e", x="gotcha", z=1.0),
(test="f", x=5, z=1.0),
(test="h", x=3, z=1.0),
(test="i", x=1, z=1.0),
(test="j", x=5, z=1.0),
(test="k", x=4, z=1.0)]
dfa, dfb, dfc = DataFrame(a), DataFrame(b), DataFrame(c)
g = Legolas.gather(:x, a, b, c; extract=(t, i) -> t[i])
dfg = Legolas.gather(:x, dfa, dfb, dfc)
expected = Dict(1 => ([(x=1, y="a", z="k"), (x=1, y="i", z="c")],
[(x=1, m=1)],
[(test="a", x=1, z=1.0), (test="i", x=1, z=1.0)]),
2 => ([(x=2, y="b", z="j"), (x=2, y="e", z="g")],
[(x=2, m=2), (x=2, m=5)],
[(test="b", x=2, z=1.0)]),
3 => ([(x=3, y="h", z="d")],
NamedTuple{(:x, :m),Tuple{Int64,Int64}}[],
[(test="h", x=3, z=1.0)]),
4 => ([(x=4, y="c", z="i"), (x=4, y="d", z="h"), (x=4, y="g", z="e"), (x=4, y="k", z="a")],
[(x=4, m=6)],
[(test="d", x=4, z=1.0), (test="k", x=4, z=1.0)]),
5 => ([(x=5, y="f", z="f"), (x=5, y="j", z="b")],
[(x=5, m=4)],
[(test="f", x=5, z=1.0), (test="j", x=5, z=1.0)]),
"gotcha" => (NamedTuple{(:x, :y, :z),NTuple{3,Any}}[],
NamedTuple{(:x, :m),NTuple{2,Any}}[],
[(test="e", x="gotcha", z=1.0)]))
[(x=1, m=1)],
[(test="a", x=1, z=1.0), (test="i", x=1, z=1.0)]),
2 => ([(x=2, y="b", z="j"), (x=2, y="e", z="g")],
[(x=2, m=2), (x=2, m=5)],
[(test="b", x=2, z=1.0)]),
3 => ([(x=3, y="h", z="d")],
NamedTuple{(:x, :m),Tuple{Int64,Int64}}[],
[(test="h", x=3, z=1.0)]),
4 => ([(x=4, y="c", z="i"), (x=4, y="d", z="h"), (x=4, y="g", z="e"), (x=4, y="k", z="a")],
[(x=4, m=6)],
[(test="d", x=4, z=1.0), (test="k", x=4, z=1.0)]),
5 => ([(x=5, y="f", z="f"), (x=5, y="j", z="b")],
[(x=5, m=4)],
[(test="f", x=5, z=1.0), (test="j", x=5, z=1.0)]),
"gotcha" => (NamedTuple{(:x, :y, :z),NTuple{3,Any}}[],
NamedTuple{(:x, :m),NTuple{2,Any}}[],
[(test="e", x="gotcha", z=1.0)]))
@test g == expected
@test keys(dfg) == keys(expected)
@test all(all(dfg[k] .== DataFrame.(expected[k])) for k in keys(dfg))
Expand All @@ -123,14 +123,14 @@ end
@test t == Baz.(Tables.rows(Legolas.read(path)))
tbl = Arrow.Table(Legolas.tobuffer(t, Schema("baz", 1); metadata=("a" => "b", "c" => "d")))
@test Set(Arrow.getmetadata(tbl)) == Set((Legolas.LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => "baz@1",
"a" => "b", "c" => "d"))
"a" => "b", "c" => "d"))

struct Foo
meta
end
Legolas.Arrow.getmetadata(foo::Foo) = foo.meta
foo = Foo(Dict("a" => "b", "b" => "b",
Legolas.LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => "baz@1"))
Legolas.LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => "baz@1"))
@test Legolas.Schema("baz", 1) == Legolas.extract_schema(foo)

t = [(a="a", c=1, b="b"), Baz(a=1, b=2)] # not a valid Tables.jl table
Expand All @@ -154,7 +154,7 @@ end
@test propertynames(r) == (:z, :x, :y)
@test r === Row(Schema("bar", 1), r)
@test r === Row(Schema("bar", 1); x=1, y=2, z=3)
@test r === Row(Schema("bar", 1), first(Tables.rows(Arrow.Table(Arrow.tobuffer((x=[1],y=[2],z=[3]))))))
@test r === Row(Schema("bar", 1), first(Tables.rows(Arrow.Table(Arrow.tobuffer((x=[1], y=[2], z=[3]))))))
@test r[1] === 3
@test string(r) == "Row(Schema(\"bar@1\"), (z = 3, x = 1, y = 2))"

Expand All @@ -164,7 +164,7 @@ end
long_row = Row(Schema("bar", 1), (x=1, y=2, z=zeros(100, 100)))
@test length(sprint(show, long_row; context=(:limit => true))) < 200

@test_throws Legolas.UnknownSchemaError Legolas.transform(Legolas.Schema("imadethisup@3"); a = 1, b = 2)
@test_throws Legolas.UnknownSchemaError Legolas.transform(Legolas.Schema("imadethisup@3"); a=1, b=2)
@test_throws Legolas.UnknownSchemaError Legolas.validate(Tables.Schema((:a, :b), (Int, Int)), Legolas.Schema("imadethisup@3"))
@test_throws Legolas.UnknownSchemaError Legolas.schema_qualified_string(Legolas.Schema("imadethisup@3"))

Expand All @@ -176,26 +176,65 @@ end
@test all(tbl.schema .== schemas)
end

@testset "schema field name and type tests" begin
Parent = @row("parent@1",
first_parent_field::Int=1,
second_parent_field::String="second")

parent_fields = (:first_parent_field, :second_parent_field)
parent_field_types = (Int, String)

@test Legolas.schema_field_names(Schema{:parent,1}) == parent_fields
@test Legolas.schema_field_names(Schema("parent@1")) == parent_fields
@test Legolas.schema_field_names(Parent()) == parent_fields
@test Legolas.schema_field_names(Parent) == parent_fields

@test Legolas.schema_field_types(Schema{:parent,1}) == parent_field_types
@test Legolas.schema_field_types(Schema("parent@1")) == parent_field_types
@test Legolas.schema_field_types(Parent()) == parent_field_types
@test Legolas.schema_field_types(Parent) == parent_field_types

Child = @row("child@1" > "parent@1",
first_child_field::Symbol=:first,
second_child_field="I can be anything")
OTDE marked this conversation as resolved.
Show resolved Hide resolved

child_fields = (:first_child_field, :second_child_field, parent_fields...)
child_field_types = (Symbol, Any, parent_field_types...)

@test Legolas.schema_field_names(Schema{:child,1}) == child_fields
@test Legolas.schema_field_names(Schema("child@1")) == child_fields
@test Legolas.schema_field_names(Child()) == child_fields
@test Legolas.schema_field_names(Child) == child_fields

@test Legolas.schema_field_types(Schema{:child,1}) == child_field_types
@test Legolas.schema_field_types(Schema("child@1")) == child_field_types
@test Legolas.schema_field_types(Child()) == child_field_types
@test Legolas.schema_field_types(Child) == child_field_types

@test_throws Legolas.UnknownSchemaError Legolas.schema_field_names(Legolas.Schema("imadethisup@3"))
@test_throws Legolas.UnknownSchemaError Legolas.schema_field_types(Legolas.Schema("imadethisup@3"))
end

@testset "isequal, hash" begin
TestRow = @row("testrow@1", x, y)

foo = TestRow(; x = [1])
foo2 = TestRow(; x = [1])
foo = TestRow(; x=[1])
foo2 = TestRow(; x=[1])
@test isequal(foo, foo2)
@test hash(foo) == hash(foo2)

foo3 = TestRow(; x = [3])
foo3 = TestRow(; x=[3])
@test !isequal(foo, foo3)
@test hash(foo) != hash(foo3)
end

const MyInnerRow = @row("my-inner-schema@1", b::Int=1)
const MyInnerRow = @row("my-inner-schema@1", b::Int = 1)
const MyOuterRow = @row("my-outer-schema@1",
a::String,
x::MyInnerRow=MyInnerRow(x))
a::String,
x::MyInnerRow = MyInnerRow(x))

@testset "Nested arrow serialization" begin
table = [MyOuterRow(; a="outer_a", x = MyInnerRow())]
table = [MyOuterRow(; a="outer_a", x=MyInnerRow())]
roundtripped_table = Legolas.read(Legolas.tobuffer(table, Legolas.Schema("my-outer-schema@1")))
@test table == MyOuterRow.(Tables.rows(roundtripped_table))
end