Skip to content

Commit

Permalink
Rename functions to specify that they operate on rows
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentRDC committed Jan 31, 2025
1 parent 787bcae commit 29d7489
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 38 deletions.
80 changes: 48 additions & 32 deletions javelin-frames/src/Data/Frame.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module Data.Frame (
fromRows, toRows, fields,

-- * Operations on rows
null, length, mapFrame, mapFrameM, filterFrame, foldlFrame,
null, length, mapRows, mapRowsM, filterRows, foldlRows,
-- ** Sorting rows in frames
sortRowsBy, sortRowsByUnique, sortRowsByKey, sortRowsByKeyUnique,

Expand Down Expand Up @@ -89,6 +89,7 @@ fromRows :: (Frameable t, Foldable f)
fromRows = pack . Data.Vector.fromList . Data.Foldable.toList
{-# INLINE[~2] fromRows #-}


-- | Deconstruct a dataframe into its rows.
--
-- For the inverse operation, see `fromRows`.
Expand All @@ -98,7 +99,8 @@ toRows :: Frameable t
toRows = unpack
{-# INLINE[~2] toRows #-}

-- TODO: Chaining operations such as `mapFrame` and `filterFrame`

-- TODO: Chaining operations such as `mapRows` and `filterRows`
-- should benefit from optimizing as `toRows . fromRows = id`
-- ( and `fromRows . toRows = id` as well).
-- See the rules below.
Expand Down Expand Up @@ -130,12 +132,12 @@ length = Data.Vector.length . toRows

-- | Map a function over each row individually.
--
-- For mapping with a monadic action, see `mapFrameM`.
mapFrame :: (Frameable t1, Frameable t2)
=> (Row t1 -> Row t2)
-> Frame t1
-> Frame t2
mapFrame f = fromRows
-- For mapping with a monadic action, see `mapRowsM`.
mapRows :: (Frameable t1, Frameable t2)
=> (Row t1 -> Row t2)
-> Frame t1
-> Frame t2
mapRows f = fromRows
. Data.Vector.map f
. toRows

Expand All @@ -144,23 +146,23 @@ mapFrame f = fromRows
-- these actions from left to right, and collect the result
-- in a new dataframe.
--
-- For mapping without a monadic action, see `mapFrame`.
mapFrameM :: (Frameable t1, Frameable t2, Monad m)
=> (Row t1 -> m (Row t2))
-> Frame t1
-> m (Frame t2)
mapFrameM f = fmap fromRows
-- For mapping without a monadic action, see `mapRows`.
mapRowsM :: (Frameable t1, Frameable t2, Monad m)
=> (Row t1 -> m (Row t2))
-> Frame t1
-> m (Frame t2)
mapRowsM f = fmap fromRows
. Data.Vector.mapM f
. toRows


-- | Filter rows from a @`Frame` t@, only keeping
-- the rows where the predicate is `True`.
filterFrame :: (Frameable t)
=> (Row t -> Bool)
-> Frame t
-> Frame t
filterFrame f = fromRows
filterRows :: (Frameable t)
=> (Row t -> Bool)
-> Frame t
-> Frame t
filterRows f = fromRows
. Data.Vector.filter f
. toRows

Expand All @@ -182,12 +184,12 @@ zipFramesWith f xs ys


-- | Left-associative fold of a structure but with strict application of the operator.
foldlFrame :: Frameable t
=> (b -> Row t -> b) -- ^ Reduction function that takes in individual rows
-> b -- ^ Initial value for the accumulator
-> Frame t -- ^ Data frame
-> b
foldlFrame f start
foldlRows :: Frameable t
=> (b -> Row t -> b) -- ^ Reduction function that takes in individual rows
-> b -- ^ Initial value for the accumulator
-> Frame t -- ^ Data frame
-> b
foldlRows f start
= Data.Vector.foldl' f start . toRows


Expand Down Expand Up @@ -473,8 +475,6 @@ fr `iat` (rowIx, col) = (col fr) Data.Vector.!? rowIx
-- left and/or right dataframes. Merge strategies can be user-defined,
-- or you can use predefined strategies (e.g. `matchedStrategy`).
--
--
--
-- Note that (@`Key` t1 ~ `Key` t2@) means that the type of keys in
-- in both dataframes must be the same.
mergeWithStrategy :: ( Indexable t1, Indexable t2, Indexable t3
Expand Down Expand Up @@ -503,11 +503,13 @@ mergeWithStrategy strat df1Unsorted df2Unsorted
fullRight

where
asThese :: (k, Maybe a) -> (k, Maybe b) -> (k, These a b)
asThese (k, Just a) (_, Nothing) = (k, This a)
asThese (k, Nothing) (_, Just b) = (k, That b)
asThese (k, Just a) (_, Just b) = (k, These a b)
asThese _ _ = error "impossible"
asThese :: Eq k => (k, Maybe a) -> (k, Maybe b) -> (k, These a b)
asThese (k1, Just a) (k2, Nothing) = assert (k1==k2) (k1, This a)
asThese (k1, Nothing) (k2, Just b) = assert (k1==k2) (k1, That b)
asThese (k1, Just a) (k2, Just b) = assert (k1==k2) (k1, These a b)
-- The following line is unreachable since we know that the key `k`
-- will be present in at least one of the two rows.
asThese _ _ = error "impossible"

reindex :: Ord k => Set.Set k -> Vector (k, Row t) -> Vector (k, Maybe (Row t))
reindex fullix vs = Data.Vector.fromListN (Set.size fullix)
Expand All @@ -533,9 +535,23 @@ mergeWithStrategy strat df1Unsorted df2Unsorted
-- the following case
GT -> error "impossible"


-- | A `MergeStrategy` is a function that describes how to
-- merge two rows together.
--
-- A merge strategy must handle three cases:
--
-- * Only the left row;
-- * Only the right row;
-- * Both the left and right rows.
--
-- The simplest merge strategy is `matchedStrategy`.
--
-- See examples in the documentation of `mergeWithStrategy`.
type MergeStrategy k t1 t2 t3
= (k -> These (Row t1) (Row t2) -> Maybe (Row t3))


-- | Merge strategy which only works if both the left and right
-- rows are found.
--
Expand Down
12 changes: 6 additions & 6 deletions javelin-frames/src/Data/Frame/Tutorial.hs
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,12 @@ module to operate on columns.
Many operations that treat a dataframe as an array
of rows are provided.
There's `mapFrame` to map each row to a new structure:
There's `mapRows` to map each row to a new structure:
>>> :{
putStrLn
$ display
$ mapFrame
$ mapRows
(\(MkStudent name age grade) -> MkStudent name (2*age) grade)
students
:}
Expand All @@ -124,12 +124,12 @@ studentName | studentAge | studentMathGrade
"Beatrice" | 26 | 'B'
"Clara" | 24 | 'A'
There's `filterFrame` to keep specific rows:
There's `filterRows` to keep specific rows:
>>> :{
putStrLn
$ display
$ filterFrame
$ filterRows
(\(MkStudent _ _ grade) -> grade < 'C')
students
:}
Expand All @@ -138,11 +138,11 @@ studentName | studentAge | studentMathGrade
"Beatrice" | 13 | 'B'
"Clara" | 12 | 'A'
Finally, there's `foldlFrame` to summarize a dataframe by using whole rows:
Finally, there's `foldlRows` to summarize a dataframe by using whole rows:
>>> import Data.Char (ord)
>>> :{
foldlFrame
foldlRows
(\acc (MkStudent _ age grade) -> acc + age + ord grade)
(0 :: Int)
students
Expand Down

0 comments on commit 29d7489

Please sign in to comment.