Skip to content

Commit

Permalink
Added a mechanism to look through dataframe via an index
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentRDC committed Jan 24, 2025
1 parent 74b6dce commit ef50645
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
1 change: 1 addition & 0 deletions javelin-frames/javelin-frames.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ test-suite javelin-frames-test
main-is: Main.hs
other-modules: Test.Data.Frame
build-depends: base >=4.15.0.0 && <4.22,
containers,
hedgehog,
javelin-frames,
tasty,
Expand Down
35 changes: 34 additions & 1 deletion javelin-frames/src/Data/Frame.hs
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,17 @@ module Data.Frame (
Column, Frameable(fromRows, toRows), Row, Frame,
-- * Basic interface
mapFrame, filterFrame, zipFramesWith, foldlFrame,

-- * Indexing operations
Indexable(Key, index), lookup
) where

import Data.Functor.Identity (Identity(..))
import Data.Kind (Type)
import Data.Vector (Vector)
import qualified Data.Vector
import GHC.Generics ( Generic(..), K1(..), Rec0, M1(..), type (:*:)(..) )
import Prelude hiding (lookup)


-- | Type family which allows for higher-kinded record types
Expand Down Expand Up @@ -210,4 +214,33 @@ foldlFrame :: Frameable t
-> Frame t -- ^ Data frame
-> b
foldlFrame f start
= Data.Vector.foldl' f start . toRows
= Data.Vector.foldl' f start . toRows


-- | Typeclass for dataframes with an index, a column or set of columns that can
-- be used to search through rows.
--
-- An index need not be unique, but the type of its keys must be an instance of `Eq`.
class ( Frameable t
, Eq (Key t) -- Effectively required for lookups
) => Indexable t where

-- | A type representing a lookup key for a dataframe.
-- This can be a single field, or a compound key composed
-- of multiple fields
type Key t

-- | How to create an index from a @`Frame` t@. This is generally
-- done by using record selectors.
index :: Frame t -> Vector (Key t)


-- | Look up a row in a data frame by key.
lookup :: (Indexable t)
=> Key t
-> Frame t
-> Maybe (Row t)
lookup key fr
= do
mix <- Data.Vector.findIndex (==key) (index fr)
pure $ (toRows fr) Data.Vector.! mix
37 changes: 34 additions & 3 deletions javelin-frames/test/Test/Data/Frame.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeFamilies #-}
module Test.Data.Frame (tests) where

import Data.Frame
import Control.Monad (guard, forM_)
import Data.Frame as Frame
import qualified Data.Set as Set
import qualified Data.Vector as Vector

import GHC.Generics (Generic)
Expand All @@ -14,7 +17,9 @@ import Test.Tasty ( testGroup, TestTree )
import Test.Tasty.Hedgehog ( testProperty )

tests :: TestTree
tests = testGroup "Data.Frame" [ testToFromRowsTripping ]
tests = testGroup "Data.Frame" [ testToFromRowsTripping
, testLookup
]

data User f
= MkUser { userName :: Column f String
Expand All @@ -33,4 +38,30 @@ testToFromRowsTripping = testProperty "Ensure that `toRows` and `fromRows` are i
(MkUser <$> Gen.string (Range.linear 0 100) Gen.alpha
<*> Gen.integral (Range.linear 10 25)
)
users === toRows (fromRows users)
users === toRows (fromRows users)


instance Indexable User where
type Key User = String
index = userName


testLookup :: TestTree
testLookup = testProperty "Ensure that `lookup` works" $ property $ do
users <- forAll $ Vector.fromList <$>
Gen.list (Range.linear 0 100)
(MkUser <$> Gen.string (Range.linear 0 100) Gen.alpha
<*> Gen.integral (Range.linear 10 25)
)

-- This property only makes sense for a unique index
guard (unique (Vector.map userName users))

let df = fromRows users

forM_ users $ \user -> do
Frame.lookup (userName user) df === Just user

where
unique :: Ord a => Vector.Vector a -> Bool
unique vs = length (Set.fromList (Vector.toList vs)) == Vector.length vs

0 comments on commit ef50645

Please sign in to comment.