Skip to content

Commit

Permalink
Documenentation updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
ondrap committed Apr 16, 2015
1 parent 993f896 commit 3b9fa38
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 40 deletions.
97 changes: 74 additions & 23 deletions Data/JsonStream/Parser.hs
Original file line number Diff line number Diff line change
@@ -1,22 +1,41 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}

-- |
-- Module : Data.JsonStream.Parser
-- License : BSD-style
--
-- Maintainer : [email protected]
-- Stability : experimental
-- Portability : portable
--
-- An incremental applicative-style JSON parser, suitable for high performance
-- memory efficient stream parsing.
--
-- The parser is using "Data.Aeson" types and 'FromJSON' instance, it can be
-- easily combined with aeson monadic parsing instances when appropriate.

module Data.JsonStream.Parser (
-- * How to use this library
-- $use

-- * The @Parser@ type
Parser
, ParseOutput(..)
-- * Parsing functions
, runParser
, runParser'
, parseByteString
, parseLazyByteString

-- * Basic JSON parsers
, value
, objectWithKey
, objectItems
, objectValues
, array
, arrayWithIndex
, indexedArray

-- * Parsing modifiers
, filterI
, toList
, defaultValue
Expand All @@ -33,6 +52,7 @@ import qualified Data.Vector as Vec

import Data.JsonStream.TokenParser

-- | Private parsing result
data ParseResult v = MoreData (Parser v, BS.ByteString -> TokenResult)
| Failed String
| Done TokenResult
Expand All @@ -47,6 +67,11 @@ instance Functor ParseResult where
fmap f (Yield v np) = Yield (f v) (fmap f np)
fmap _ (UnexpectedEnd el tok) = UnexpectedEnd el tok

-- | A representation of the parser.
newtype Parser a = Parser {
callParse :: TokenResult -> ParseResult a
}

instance Functor Parser where
fmap f (Parser p) = Parser $ \d -> fmap f (p d)

Expand Down Expand Up @@ -92,10 +117,6 @@ instance Alternative Parser where
process (UnexpectedEnd el ntok) (UnexpectedEnd _ _) = UnexpectedEnd el ntok
process _ _ = error "Unexpected error in parallel processing <|>"

newtype Parser a = Parser {
callParse :: TokenResult -> ParseResult a
}

array' :: (Int -> Parser a) -> Parser a
array' valparse = Parser $ \tp ->
case tp of
Expand All @@ -113,19 +134,19 @@ array' valparse = Parser $ \tp ->
arrcontent _ (UnexpectedEnd ArrayEnd ntp) = Done ntp
arrcontent _ (UnexpectedEnd el _) = Failed ("Array - UnexpectedEnd: " ++ show el)

-- | Match all items of an array
-- | Match all items of an array.
array :: Parser a -> Parser a
array valparse = array' (const valparse)

-- | Match n'th item of an array
-- | Match n'th item of an array.
arrayWithIndex :: Int -> Parser a -> Parser a
arrayWithIndex idx valparse = array' itemFn
where
itemFn aidx
| aidx == idx = valparse
| otherwise = ignoreVal

-- | Match all items of an array, add index to output
-- | Match all items of an array, add index to output.
indexedArray :: Parser a -> Parser (Int, a)
indexedArray valparse = array' (\(!key) -> (key,) <$> valparse)

Expand Down Expand Up @@ -154,15 +175,15 @@ object' valparse = Parser $ \tp ->
| otherwise = Failed ("Array - unexpected token: " ++ show el)


-- | Match all key-value pairs of an object, return them as a tuple
-- | Match all key-value pairs of an object, return them as a tuple.
objectItems :: Parser a -> Parser (T.Text, a)
objectItems valparse = object' $ \(!key) -> (key,) <$> valparse

-- | Match all key-value pairs of an object, return only values
-- | Match all key-value pairs of an object, return only values.
objectValues :: Parser a -> Parser a
objectValues valparse = object' (const valparse)

-- | Match only specific key of an object
-- | Match only specific key of an object.
objectWithKey :: T.Text -> Parser a -> Parser a
objectWithKey name valparse = object' itemFn
where
Expand All @@ -185,7 +206,7 @@ aeValue = Parser value'
| el == ArrayEnd || el == ObjectEnd = UnexpectedEnd el ntok
| otherwise = Failed ("aeValue - unexpected token: " ++ show el)

-- | Convert a value fromjson, fail with Failed if it doesn't work
-- | Match 'FromJSON' value.
value :: AE.FromJSON a => Parser a
value = Parser $ \ntok -> loop (callParse aeValue ntok)
where
Expand Down Expand Up @@ -220,7 +241,7 @@ ignoreVal = Parser $ handleTok 0
| elm == ArrayEnd || elm == ObjectEnd = handleTok (level - 1) ntok
handleTok _ _ = Failed "UnexpectedEnd "

-- | Fetch yields of a function and return them as list
-- | Fetch yields of a function and return them as list.
toList :: Parser a -> Parser [a]
toList f = Parser $ \ntok -> loop [] (callParse f ntok)
where
Expand All @@ -242,7 +263,7 @@ filterI cond valparse = Parser $ \ntok -> loop (callParse valparse ntok)
| cond v = Yield v (loop np)
| otherwise = loop np

-- | Returns a value if none is found upstream
-- | Returns a value if none is found upstream.
defaultValue :: a -> Parser a -> Parser a
defaultValue defvalue valparse = Parser $ \ntok -> loop False (callParse valparse ntok)
where
Expand All @@ -253,7 +274,7 @@ defaultValue defvalue valparse = Parser $ \ntok -> loop False (callParse valpars
loop found (MoreData (Parser np, ntok)) = MoreData (Parser (loop found . np), ntok)
loop _ (Yield v np) = Yield v (loop True np)

-- | Tries to catch an error in underlying parser
-- | Catch an error in underlying parser.
catchFail :: Parser a -> Parser a
catchFail valparse = Parser $ \tok -> process (callParse valparse tok) (callParse ignoreVal tok)
where -- Call ignoreVal in parallel, switch to it if the first parser fails
Expand All @@ -269,12 +290,13 @@ catchFail valparse = Parser $ \tok -> process (callParse valparse tok) (callPars
MoreData (Parser (process p1 . callParse np2), ntok2)
process _ _ = Failed "Unexpected error in parallel processing catchFail."

data ParseOutput a = ParseYield a (ParseOutput a)
| ParseNeedData (BS.ByteString -> ParseOutput a)
| ParseFailed String
| ParseDone BS.ByteString
-- | Result of parsing. Contains continuations to continue parsing.
data ParseOutput a = ParseYield a (ParseOutput a) -- ^ Returns a value from a parser.
| ParseNeedData (BS.ByteString -> ParseOutput a) -- ^ Parser needs more data to continue parsing.
| ParseFailed String -- ^ Parsing failed, error is reported.
| ParseDone BS.ByteString -- ^ Parsing finished, unparsed data is returned.

-- | Run streaming parser with initial input
-- | Run streaming parser with initial input.
runParser' :: Parser a -> BS.ByteString -> ParseOutput a
runParser' parser startdata = parse $ callParse parser (tokenParser startdata)
where
Expand All @@ -286,7 +308,7 @@ runParser' parser startdata = parse $ callParse parser (tokenParser startdata)
parse (Done (TokFailed rest)) = ParseDone rest
parse (Done (TokMoreData _ rest)) = ParseDone rest

-- | Run streaming parser, immediately returns ParseMoreData
-- | Run streaming parser, immediately returns 'ParseNeedData'.
runParser :: Parser a -> ParseOutput a
runParser parser = runParser' parser BS.empty

Expand All @@ -309,3 +331,32 @@ parseLazyByteString parser input = loop chunks (runParser parser)
loop _ (ParseDone _) = []
loop _ (ParseFailed err) = error err
loop rest (ParseYield v np) = v : loop rest np


-- $use
--
-- > >>> parseByteString value "[1,2,3]" :: [[Int]]
-- > [[1,2,3]]
-- The 'value' parser matches any 'AE.FromJSON' value. The above command is essentially
-- identical to the aeson decode function; the parsing process can generate more
-- objects, therefore the results is [a].
--
-- json-stream style parsing would rather look like this:
--
-- > >>> parseByteString (array value) "[1,2,3]" :: [Int]
-- > [1,2,3]
--
-- Parsers can be combinated using '<*>' and '<|>' operators. These operators cause
-- parallel parsing and yield some combination of the parsed values.
--
-- > JSON: text = [{"name": "John", "age": 20}, {"age": 30, "name": "Frank"} ]
-- > >>> let parser = array $ (,) <$> objectWithKey "name" value
-- > <*> objectWithKey "age" value
-- > >>> parseByteString parser text :: [(Text,Int)]
-- > [("John",20),("Frank",30)]
--
-- When parsing larger values, it is advisable to use lazy ByteStrings as the chunking
-- of the ByteStrings causes the parsing to continue more efficently because less state
-- is needed to be held in memory with parallel parsers.
--
-- More examples are available on <https://github.com/ondrap/json-stream>.
2 changes: 1 addition & 1 deletion Data/JsonStream/TokenParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ mainParser = do
_| isDigit chr -> parseNumber
| otherwise -> failTok

-- | Incremental lexer
-- | Incremental lexer
tokenParser :: BS.ByteString -> TokenResult
tokenParser dta = handle $ runTokParser mainParser (State dta dta)
where
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# json-stream - Applicative incremental JSON parser for Haskell

> Current state: the library should be normally usable, the parsing is
> from 40% faster to 30% slower than aeson (depending on the parser grammer).
> In general if you use the applicative parser grammer, it will have lower
> from 40% faster to 30% slower than aeson (depending on the parser grammar).
> In general if you use the applicative parser grammar, it will have lower
> memory consumption and it will be faster. When you use streaming, the lower
> memory consumption becomes significant.
>
> Counting number of array elements in 120MB
> JSON file needed 1.7GB in aeson, 1.5GB with json-stream in the aeson mode
> (the grammer being just `value`). It needed 700MB when json-stream grammar
> (the grammar being just `value`). It needed 700MB when json-stream grammar
> was used and only 2MB in streaming mode when parsed data was discarded.
Standard aeson parsing library reads the whole input, creates an object in memory representing
Expand All @@ -21,7 +21,7 @@ The parsing process uses the least amount of memory possible and is completely l
check for JSON syntax and the behaviour on incorrect JSON input is undefined (it cheats quite a lot).
**The result on badly formed input is undefined.**

- E.g. if the parser expects an array and a number is found, it is reported as an error.
- If the parser expects an array and a number is found, it is reported as an error.
Not finding a particular key in an object (`objectWithKey`) will not be reported as an error.
- The ',' character in the lexer is treated as white-space.
- When a value is not needed to be parsed, it is parsed by a parser counting braces and brackets.
Expand Down
17 changes: 5 additions & 12 deletions json-stream.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,14 @@ name: json-stream
version: 0.1.0.0
synopsis: Incremental applicative JSON parser
description: Easy to use JSON parser fully supporting incremental parsing.
Parsing rule is specified simply using a few functions in
applicative form.
Parsing grammar in applicative form.

Although the parser is applicative, it is compatibile with
aeson and its FromJSON class (in fact, json-stream internally
parses the values into aeson types). It is perfectly possible
to combine the parser with aeson monadic style.
The parser is compatibile with
aeson and its FromJSON class. It is possible to use aeson
monadic parsing when appropriate.

The parser supports incremental parsing while using as little
memory as possible. The performance is comparable to aeson,
it is mostly faster when used correctly.

The parser is easy on JSON grammer, it does not necesarilly
report error when badly-formed JSON is encountered. This
allows higher parsing performance.
memory as possible with performance comparable to aeson.

homepage: https://github.com/ondrap/json-stream
license: BSD3
Expand Down

0 comments on commit 3b9fa38

Please sign in to comment.