Skip to content

Commit

Permalink
CI: Activate doctests for javelin-frames
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentRDC committed Jan 26, 2025
1 parent a077b5e commit 38ce587
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 48 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/haskell-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,5 @@ jobs:
run: |
cabal install doctest
cabal repl javelin --with-ghc=doctest
cabal repl javelin-io --with-ghc=doctest
cabal repl javelin-io --with-ghc=doctest
cabal repl javelin-frames --with-ghc=doctest
1 change: 1 addition & 0 deletions javelin-frames/javelin-frames.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ library
import: common
exposed-modules: Data.Frame
build-depends: base >=4.15.0.0 && <4.22,
containers >=0.6 && <0.8,
vector >=0.12.3.0 && <0.14,
hs-source-dirs: src
default-language: GHC2021
Expand Down
163 changes: 130 additions & 33 deletions javelin-frames/src/Data/Frame.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@
-- To define a data frame, create a record type using @data@
-- and derive an instance of `Generic` and `Frameable`:
--
-- data User f =
-- MkUser { userName :: Column f String
-- , userAge :: Column f Int
-- }
-- deriving (Generic, Frameable)
-- @
-- data User f =
-- MkUser { userName :: `Column` f `String`
-- , userAge :: `Column` f `Int`
-- }
-- deriving (`Generic`, `Frameable`)
-- @
--
-- There are three special things with this type definition:
--
-- * @User@ is a higher-kinded type, and admits a type constructor @f@. This
-- type constructor @f@ is used to distinguish between single rows and data frames, as we
-- will see in a second
Expand All @@ -42,35 +45,68 @@
-- To make it more obvious, the type synonyms `Row` and `Frame` are provided, where
-- @`Row` User@ is equivalent to:
--
-- data User =
-- MkUser { userName :: String
-- , userAge :: Int
-- }
-- @
-- data User =
-- MkUser { userName :: `String`
-- , userAge :: `Int`
-- }
-- @
--
-- Each field (e.g. @userName@) must involve the type family `Column` because
-- @`Column` `Identity` a@ simplifies to @a@. This is why @`Row` User@ is exactly
-- like a normal, non-higher-kinded record type.
--
-- On the other hand, `Frame User` is equivalent to:
--
-- data User =
-- MkUser { userName :: Vector String
-- , userAge :: Vector Int
-- }
-- @
-- data User =
-- MkUser { userName :: `Vector` `String`
-- , userAge :: `Vector` `Int`
-- }
-- @
--
-- One small annoyance we must put up with is that deriving instances of `Show`, `Eq`, etc.
-- for @User@ is now a little different:
--
-- deriving instance Show (Row User)
-- deriving instance Eq (Row User)
-- @
-- deriving instance `Show` (`Row` User)
-- deriving instance `Eq` (`Row` User)
-- @
--
-- Finally, we are ready to do some data processing. First, we must build a dataframe
-- using `fromRows`:
-- Let's look at a real example. First, let's get some setup out of the way. We must
-- activate @-XDeriveAnyClass@ to automatically derive `Frameable`:
--
-- >>> :set -XDeriveAnyClass
--
-- and we'll import the "Data.Vector" module as well:
--
-- >>> import Data.Vector as Vector
-- >>> users = fromRows $ Vector.fromList [MkUser "Albert" 12, MkUser "Beatrice" 35, MkUser "Clara" 24]
--
-- We define
--
-- >>> :{
-- data Student f
-- = MkStudent { studentName :: Column f String
-- , studentAge :: Column f Int
-- , studentMathGrade :: Column f Char
-- }
-- deriving (Generic, Frameable)
-- :}
--
-- We use `fromRows` to pack individual students into a dataframe:
--
-- >>> students = fromRows $ Vector.fromList [MkStudent "Albert" 12 'C', MkStudent "Beatrice" 13 'B', MkStudent "Clara" 12 'A']
--
-- We can render the dataframe @students@ into a nice string using `display`
-- (and print it out using using `putStrLn`):
--
-- TODO: complete the tutorial
-- >>> putStrLn (display students)
-- studentName | studentAge | studentMathGrade
-- ----------- | ---------- | ----------------
-- "Albert" | 12 | 'C'
-- "Beatrice" | 13 | 'B'
-- "Clara" | 12 | 'A'
--
module Data.Frame (
-- * Defining dataframe types
Column, Frameable, Row, Frame,
Expand All @@ -94,16 +130,21 @@ import Data.Bifunctor (second)
import qualified Data.Foldable
import Data.Functor.Identity (Identity(..))
import Data.Kind (Type)
import Data.List ( intersperse )
import qualified Data.List as List ( intersperse, foldl' )
import Data.Maybe (catMaybes)
import Data.Semigroup (Max(..))
import qualified Data.Set as Set
import Data.Vector (Vector)
import qualified Data.Vector
import Prelude hiding (lookup, null, length)
import qualified Prelude
import GHC.Generics ( Selector, Generic(..), S, D, C, K1(..), Rec0, M1(..), type (:*:)(..), selName )


-- $setup
-- >>> import qualified Data.Vector as Vector


-- | Build a dataframe from a container of rows.
--
-- For the inverse operation, see `toRows`.
Expand Down Expand Up @@ -406,6 +447,7 @@ class ( Frameable t
index :: Frame t -> Vector (Key t)


-- | Control how `displayWith` behaves.
data DisplayOptions t
= DisplayOptions
{ maximumNumberOfRows :: Int
Expand All @@ -425,13 +467,63 @@ defaultDisplayOptions
}


-- | Display a 'Series' using default 'DisplayOptions'.
-- | Display a @`Frame` t@ using default 'DisplayOptions'.
--
-- Example:
--
-- >>> :{
-- data Student f
-- = MkStudent { studentName :: Column f String
-- , studentAge :: Column f Int
-- , studentMathGrade :: Column f Char
-- }
-- deriving (Generic, Frameable)
-- :}
--
-- >>> students = fromRows $ Vector.fromList [MkStudent "Albert" 12 'C', MkStudent "Beatrice" 13 'B', MkStudent "Clara" 12 'A']
-- >>> putStrLn (display students)
-- studentName | studentAge | studentMathGrade
-- ----------- | ---------- | ----------------
-- "Albert" | 12 | 'C'
-- "Beatrice" | 13 | 'B'
-- "Clara" | 12 | 'A'
display :: Frameable t
=> Frame t
-> String
display = displayWith defaultDisplayOptions


-- | Display a @`Frame` t@ using custom 'DisplayOptions'.
--
-- Example:
--
-- >>> :{
-- data Student f
-- = MkStudent { studentName :: Column f String
-- , studentAge :: Column f Int
-- , studentMathGrade :: Column f Char
-- }
-- deriving (Generic, Frameable)
-- :}
--
-- >>> :{
-- students = fromRows
-- $ Vector.fromList
-- [ MkStudent "Albert" 12 'C'
-- , MkStudent "Beatrice" 13 'B'
-- , MkStudent "Clara" 12 'A'
-- , MkStudent "David" 13 'A'
-- , MkStudent "Erika" 13 'D'
-- , MkStudent "Frank" 11 'C'
-- ]
-- :}
--
-- >>> putStrLn (displayWith (defaultDisplayOptions{maximumNumberOfRows=2}) students)
-- studentName | studentAge | studentMathGrade
-- ----------- | ---------- | ----------------
-- "Albert" | 12 | 'C'
-- ... | ... | ...
-- "Frank" | 11 | 'C'
displayWith :: (Frameable t)
=> DisplayOptions t
-> Frame t
Expand All @@ -445,8 +537,12 @@ displayWith DisplayOptions{..} df
where
len = length df
n = max 1 (maximumNumberOfRows `div` 2)
headRows = catMaybes [ilookup i df | i <- [0 .. n - 1]]
tailRows = catMaybes [ilookup j df | j <- [len - n ..len]]
-- We prevent overlap between the 'head' rows and 'tail' rows
-- by favoring removing duplicate integer indices from the tail rows
headIxs = Set.fromList [0 .. n - 1]
tailIxs = Set.fromList [len - n ..len] `Set.difference` headIxs
headRows = catMaybes [ilookup i df | i <- Set.toList headIxs]
tailRows = catMaybes [ilookup j df | j <- Set.toList tailIxs]

firstRow = case headRows of
[] -> error "Impossible!" -- We already checked that `df` won't be empty
Expand All @@ -462,25 +558,26 @@ displayWith DisplayOptions{..} df
(headerLengths :: [(String, Int)]) = (map (\(k, _) -> (k, Prelude.length k)) (fields firstRow))
(colWidths :: [(String, Int)])
= map (second getMax)
$ foldl' (\acc mp -> zipWith (\(k1, v1) (k2, v2) -> ((assert (k1 == k2) k1, v1 <> v2))) acc (map (second (Max . Prelude.length)) mp))
(map (second Max) headerLengths)
rows
$ List.foldl'
(\acc mp -> zipWith (\(k1, v1) (k2, v2) -> ((assert (k1 == k2) k1, v1 <> v2))) acc (map (second (Max . Prelude.length)) mp))
(map (second Max) headerLengths)
rows

-- | Format a grid represented by a list of rows, where every row is a list of items
-- All columns will have a fixed width
formatGrid :: [ [(String, String)]] -- List of rows
-> String
formatGrid rs = unlines
$ [ mconcat $ intersperse " | " [ (pad w k) | (k, w) <- colWidths]]
++ [ mconcat $ intersperse " | " [ (pad w (replicate w '-')) | (_, w) <- colWidths]]
++ [ mconcat $ intersperse " | " [ (pad w v)
| ((_, v), (_, w)) <- zip mp colWidths
]
formatGrid rs = mconcat $ List.intersperse "\n"
$ [ mconcat $ List.intersperse " | " [ (pad w k) | (k, w) <- colWidths]]
++ [ mconcat $ List.intersperse " | " [ (pad w (replicate w '-')) | (_, w) <- colWidths]]
++ [ mconcat $ List.intersperse " | " [ (pad w v)
| ((_, v), (_, w)) <- zip mp colWidths
]
| mp <- rs
]
where
-- | Pad a string to a minimum of @n@ characters wide.
pad :: Int -> String -> String
pad minNumChars s
| minNumChars <= Prelude.length s = s
| otherwise = replicate (minNumChars - Prelude.length s) ' ' <> s
| otherwise = replicate (minNumChars - Prelude.length s) ' ' <> s
31 changes: 17 additions & 14 deletions javelin-frames/test/Test/Data/Frame.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module Test.Data.Frame (tests) where

import Control.Monad (guard, forM_)
import Data.Frame as Frame hiding (length)
import qualified Data.List as List (intersperse)
import qualified Data.Set as Set
import qualified Data.Vector as Vector

Expand Down Expand Up @@ -89,23 +90,25 @@ testDisplay =
in testGroup "displaytWith" [
testCase "Appropriately displaying all rows" $ do
let displayed = Frame.displayWith (Frame.defaultDisplayOptions {maximumNumberOfRows = 4}) frame
expectation = unlines [ "userName | userAge"
, "-------- | -------"
, " \"Alice\" | 37"
, " \"Bob\" | 38"
, " \"Clara\" | 39"
, " \"David\" | 40"
]
expectation = unlines' [ "userName | userAge"
, "-------- | -------"
, " \"Alice\" | 37"
, " \"Bob\" | 38"
, " \"Clara\" | 39"
, " \"David\" | 40"
]

assertEqual mempty expectation displayed,
testCase "Appropriately eliding some rows" $ do
let displayed = Frame.displayWith (Frame.defaultDisplayOptions {maximumNumberOfRows = 2}) frame
expectation = unlines [ "userName | userAge"
, "-------- | -------"
, " \"Alice\" | 37"
, " ... | ..."
, " \"David\" | 40"
]
expectation = unlines' [ "userName | userAge"
, "-------- | -------"
, " \"Alice\" | 37"
, " ... | ..."
, " \"David\" | 40"
]

assertEqual mempty expectation displayed
]
]
where
unlines' = mconcat . List.intersperse "\n"

0 comments on commit 38ce587

Please sign in to comment.