{-# LANGUAGE BangPatterns, CPP, DeriveFunctor #-}

-- | This module allows for streaming decoding of CSV data. This is
-- useful if you need to parse large amounts of input in constant
-- space. The API also allows you to ignore type conversion errors on
-- a per-record basis.
module Data.Csv.Streaming
    (
    -- * Usage example
    -- $example

    -- * Stream representation
    -- $stream-representation
      Records(..)

    -- * Decoding records
    -- $typeconversion

    -- ** Index-based record conversion
    -- $indexbased
    , HasHeader(..)
    , decode
    , decodeWith
    , decodeWithP

    -- ** Name-based record conversion
    -- $namebased
    , decodeByName
    , decodeByNameWith
    , decodeByNameWithP
    ) where

import Control.DeepSeq (NFData(rnf))
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
import Data.Foldable (Foldable(..))
import Prelude hiding (foldr)

import Data.Csv.Conversion
import qualified Data.Csv.Conversion as Conversion
import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith,
                                    decodeByNameWithP, decodeWith, decodeWithP)
import qualified Data.Csv.Incremental as I
import Data.Csv.Parser
import Data.Csv.Types

-- $example
--
-- A short usage example:
--
-- > for_ (decode NoHeader "John,27\r\nJane,28\r\n") $ \ (name, age :: Int) ->
-- >     putStrLn $ name ++ " is " ++ show age ++ " years old"
--
-- N.B. The 'Foldable' instance, which is used above, skips records
-- that failed to convert. If you don't want this behavior, work
-- directly with the 'Cons' and 'Nil' constructors.

-- $stream-representation
--
-- A stream of records is represented as a (lazy) list that may
-- contain errors.

-- $typeconversion
--
-- Just like in the case of non-streaming decoding, there are two ways
-- to convert CSV records to and from and user-defined data types:
-- index-based conversion and name-based conversion.

-- $indexbased
--
-- See documentation on index-based conversion in "Data.Csv" for more
-- information.

-- $namebased
--
-- See documentation on name-based conversion in "Data.Csv" for more
-- information.

-- | A stream of parsed records. If type conversion failed for the
-- record, the error is returned as @'Left' errMsg@.
data Records a
    = -- | A record or an error message, followed by more records.
      Cons (Either String a) (Records a)

      -- | End of stream, potentially due to a parse error. If a parse
      -- error occured, the first field contains the error message.
      -- The second field contains any unconsumed input.
    | Nil (Maybe String) BL.ByteString
    deriving (Records a -> Records a -> Bool
(Records a -> Records a -> Bool)
-> (Records a -> Records a -> Bool) -> Eq (Records a)
forall a. Eq a => Records a -> Records a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a. Eq a => Records a -> Records a -> Bool
== :: Records a -> Records a -> Bool
$c/= :: forall a. Eq a => Records a -> Records a -> Bool
/= :: Records a -> Records a -> Bool
Eq, (forall a b. (a -> b) -> Records a -> Records b)
-> (forall a b. a -> Records b -> Records a) -> Functor Records
forall a b. a -> Records b -> Records a
forall a b. (a -> b) -> Records a -> Records b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
$cfmap :: forall a b. (a -> b) -> Records a -> Records b
fmap :: forall a b. (a -> b) -> Records a -> Records b
$c<$ :: forall a b. a -> Records b -> Records a
<$ :: forall a b. a -> Records b -> Records a
Functor, Int -> Records a -> ShowS
[Records a] -> ShowS
Records a -> String
(Int -> Records a -> ShowS)
-> (Records a -> String)
-> ([Records a] -> ShowS)
-> Show (Records a)
forall a. Show a => Int -> Records a -> ShowS
forall a. Show a => [Records a] -> ShowS
forall a. Show a => Records a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> Records a -> ShowS
showsPrec :: Int -> Records a -> ShowS
$cshow :: forall a. Show a => Records a -> String
show :: Records a -> String
$cshowList :: forall a. Show a => [Records a] -> ShowS
showList :: [Records a] -> ShowS
Show)

-- | Skips records that failed to convert.
instance Foldable Records where
    foldr :: forall a b. (a -> b -> b) -> b -> Records a -> b
foldr = (a -> b -> b) -> b -> Records a -> b
forall a b. (a -> b -> b) -> b -> Records a -> b
foldrRecords
    foldl' :: forall b a. (b -> a -> b) -> b -> Records a -> b
foldl' = (b -> a -> b) -> b -> Records a -> b
forall b a. (b -> a -> b) -> b -> Records a -> b
foldlRecords'

foldrRecords :: (a -> b -> b) -> b -> Records a -> b
foldrRecords :: forall a b. (a -> b -> b) -> b -> Records a -> b
foldrRecords a -> b -> b
f = b -> Records a -> b
go
  where
    go :: b -> Records a -> b
go b
z (Cons (Right a
x) Records a
rs) = a -> b -> b
f a
x (b -> Records a -> b
go b
z Records a
rs)
    go b
z (Cons (Left String
_) Records a
rs) = b -> Records a -> b
go b
z Records a
rs
    go b
z Records a
_ = b
z
{-# INLINE foldrRecords #-}

foldlRecords' :: (a -> b -> a) -> a -> Records b -> a
foldlRecords' :: forall b a. (b -> a -> b) -> b -> Records a -> b
foldlRecords' a -> b -> a
f = a -> Records b -> a
go
  where
    go :: a -> Records b -> a
go a
z (Cons (Right b
x) Records b
rs) = let z' :: a
z' = a -> b -> a
f a
z b
x in a
z' a -> a -> a
forall a b. a -> b -> b
`seq` a -> Records b -> a
go a
z' Records b
rs
    go a
z (Cons (Left String
_) Records b
rs) = a -> Records b -> a
go a
z Records b
rs
    go a
z Records b
_ = a
z
{-# INLINE foldlRecords' #-}

instance Traversable Records where
    traverse :: forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Records a -> f (Records b)
traverse a -> f b
_ (Nil Maybe String
merr ByteString
rest) = Records b -> f (Records b)
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Records b -> f (Records b)) -> Records b -> f (Records b)
forall a b. (a -> b) -> a -> b
$ Maybe String -> ByteString -> Records b
forall a. Maybe String -> ByteString -> Records a
Nil Maybe String
merr ByteString
rest
    traverse a -> f b
f (Cons Either String a
x Records a
xs)     = Either String b -> Records b -> Records b
forall a. Either String a -> Records a -> Records a
Cons (Either String b -> Records b -> Records b)
-> f (Either String b) -> f (Records b -> Records b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either String a -> f (Either String b)
forall {a}. Either a a -> f (Either a b)
traverseElem Either String a
x f (Records b -> Records b) -> f (Records b) -> f (Records b)
forall a b. f (a -> b) -> f a -> f b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (a -> f b) -> Records a -> f (Records b)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Records a -> f (Records b)
traverse a -> f b
f Records a
xs
      where
        traverseElem :: Either a a -> f (Either a b)
traverseElem (Left a
err) = Either a b -> f (Either a b)
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either a b -> f (Either a b)) -> Either a b -> f (Either a b)
forall a b. (a -> b) -> a -> b
$ a -> Either a b
forall a b. a -> Either a b
Left a
err
        traverseElem (Right a
y)  = b -> Either a b
forall a b. b -> Either a b
Right (b -> Either a b) -> f b -> f (Either a b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f b
f a
y

instance NFData a => NFData (Records a) where
    rnf :: Records a -> ()
rnf (Cons Either String a
r Records a
rs) = Either String a -> ()
forall a. NFData a => a -> ()
rnf Either String a
r () -> () -> ()
forall a b. a -> b -> b
`seq` Records a -> ()
forall a. NFData a => a -> ()
rnf Records a
rs
    rnf (Nil Maybe String
errMsg ByteString
rest) = Maybe String -> ()
forall a. NFData a => a -> ()
rnf Maybe String
errMsg () -> () -> ()
forall a b. a -> b -> b
`seq` ByteString -> ()
forall a. NFData a => a -> ()
rnf ByteString
rest

-- | Efficiently deserialize CSV records in a streaming fashion.
-- Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
decode :: FromRecord a
       => HasHeader      -- ^ Data contains header that should be
                         -- skipped
       -> BL.ByteString  -- ^ CSV data
       -> Records a
decode :: forall a. FromRecord a => HasHeader -> ByteString -> Records a
decode = DecodeOptions -> HasHeader -> ByteString -> Records a
forall a.
FromRecord a =>
DecodeOptions -> HasHeader -> ByteString -> Records a
decodeWith DecodeOptions
defaultDecodeOptions

-- | Like 'decode', but lets you customize how the CSV data is parsed.
decodeWith :: FromRecord a
           => DecodeOptions  -- ^ Decoding options
           -> HasHeader      -- ^ Data contains header that should be
                             -- skipped
           -> BL.ByteString  -- ^ CSV data
           -> Records a
decodeWith :: forall a.
FromRecord a =>
DecodeOptions -> HasHeader -> ByteString -> Records a
decodeWith = (Record -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Records a
forall a.
(Record -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Records a
decodeWithP Record -> Parser a
forall a. FromRecord a => Record -> Parser a
parseRecord

-- | Like 'decodeWith', but lets you specify a parser function.
--
-- @since 0.5.4.0
decodeWithP :: (Record -> Conversion.Parser a)
           -> DecodeOptions  -- ^ Decoding options
           -> HasHeader      -- ^ Data contains header that should be
                             -- skipped
           -> BL.ByteString  -- ^ CSV data
           -> Records a
decodeWithP :: forall a.
(Record -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Records a
decodeWithP Record -> Parser a
_parseRecord !DecodeOptions
opts HasHeader
hasHeader ByteString
s0 =
    [StrictByteString] -> Parser a -> Records a
forall {a}. [StrictByteString] -> Parser a -> Records a
go (ByteString -> [StrictByteString]
BL.toChunks ByteString
s0) ((Record -> Parser a) -> DecodeOptions -> HasHeader -> Parser a
forall a.
(Record -> Parser a) -> DecodeOptions -> HasHeader -> Parser a
I.decodeWithP Record -> Parser a
_parseRecord DecodeOptions
opts HasHeader
hasHeader)
  where
    go :: [StrictByteString] -> Parser a -> Records a
go [StrictByteString]
ss (Done [Either String a]
xs)       = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons (Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil Maybe String
forall a. Maybe a
Nothing ([StrictByteString] -> ByteString
BL.fromChunks [StrictByteString]
ss)) [Either String a]
xs
    go [StrictByteString]
ss (Fail StrictByteString
rest String
err) = Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil (String -> Maybe String
forall a. a -> Maybe a
Just String
err) ([StrictByteString] -> ByteString
BL.fromChunks (StrictByteString
restStrictByteString -> [StrictByteString] -> [StrictByteString]
forall a. a -> [a] -> [a]
:[StrictByteString]
ss))
    go [] (Many [Either String a]
xs StrictByteString -> Parser a
k)     = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([StrictByteString] -> Parser a -> Records a
go [] (StrictByteString -> Parser a
k StrictByteString
B.empty)) [Either String a]
xs
    go (StrictByteString
s:[StrictByteString]
ss) (Many [Either String a]
xs StrictByteString -> Parser a
k) = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([StrictByteString] -> Parser a -> Records a
go [StrictByteString]
ss (StrictByteString -> Parser a
k StrictByteString
s)) [Either String a]
xs

-- | Efficiently deserialize CSV in a streaming fashion. The data is
-- assumed to be preceded by a header. Returns @'Left' errMsg@ if
-- parsing the header fails. Equivalent to @'decodeByNameWith'
-- 'defaultDecodeOptions'@.
decodeByName :: FromNamedRecord a
             => BL.ByteString  -- ^ CSV data
             -> Either String (Header, Records a)
decodeByName :: forall a.
FromNamedRecord a =>
ByteString -> Either String (Record, Records a)
decodeByName = DecodeOptions -> ByteString -> Either String (Record, Records a)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Record, Records a)
decodeByNameWith DecodeOptions
defaultDecodeOptions

-- TODO: Include something more in error messages?

-- | Like 'decodeByName', but lets you customize how the CSV data is
-- parsed.
decodeByNameWith :: FromNamedRecord a
                 => DecodeOptions  -- ^ Decoding options
                 -> BL.ByteString  -- ^ CSV data
                 -> Either String (Header, Records a)
decodeByNameWith :: forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Record, Records a)
decodeByNameWith = (NamedRecord -> Parser a)
-> DecodeOptions -> ByteString -> Either String (Record, Records a)
forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> ByteString -> Either String (Record, Records a)
decodeByNameWithP NamedRecord -> Parser a
forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord

-- | Like 'decodeByNameWith', but lets you specify a parser function.
--
-- @since 0.5.4.0
decodeByNameWithP :: (NamedRecord -> Conversion.Parser a)
                  -- ^ Custom parser function
                 -> DecodeOptions  -- ^ Decoding options
                 -> BL.ByteString  -- ^ CSV data
                 -> Either String (Header, Records a)
decodeByNameWithP :: forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> ByteString -> Either String (Record, Records a)
decodeByNameWithP NamedRecord -> Parser a
_parseNamedRecord !DecodeOptions
opts ByteString
s0 =
  [StrictByteString]
-> HeaderParser (Parser a) -> Either String (Record, Records a)
forall {a}.
[StrictByteString]
-> HeaderParser (Parser a) -> Either String (Record, Records a)
go (ByteString -> [StrictByteString]
BL.toChunks ByteString
s0) ((NamedRecord -> Parser a)
-> DecodeOptions -> HeaderParser (Parser a)
forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> HeaderParser (Parser a)
I.decodeByNameWithP NamedRecord -> Parser a
_parseNamedRecord DecodeOptions
opts)
  where
    go :: [StrictByteString]
-> HeaderParser (Parser a) -> Either String (Record, Records a)
go [StrictByteString]
ss (DoneH Record
hdr Parser a
p)    = (Record, Records a) -> Either String (Record, Records a)
forall a b. b -> Either a b
Right (Record
hdr, [StrictByteString] -> Parser a -> Records a
forall {a}. [StrictByteString] -> Parser a -> Records a
go2 [StrictByteString]
ss Parser a
p)
    go [StrictByteString]
ss (FailH StrictByteString
rest String
err) = String -> Either String (Record, Records a)
forall a b. a -> Either a b
Left (String -> Either String (Record, Records a))
-> String -> Either String (Record, Records a)
forall a b. (a -> b) -> a -> b
$ String
err String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" at " String -> ShowS
forall a. [a] -> [a] -> [a]
++
                             ShowS
forall a. Show a => a -> String
show (ByteString -> String
BL8.unpack (ByteString -> String)
-> ([StrictByteString] -> ByteString)
-> [StrictByteString]
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [StrictByteString] -> ByteString
BL.fromChunks ([StrictByteString] -> String) -> [StrictByteString] -> String
forall a b. (a -> b) -> a -> b
$ StrictByteString
rest StrictByteString -> [StrictByteString] -> [StrictByteString]
forall a. a -> [a] -> [a]
: [StrictByteString]
ss)
    go [] (PartialH StrictByteString -> HeaderParser (Parser a)
k)     = [StrictByteString]
-> HeaderParser (Parser a) -> Either String (Record, Records a)
go [] (StrictByteString -> HeaderParser (Parser a)
k StrictByteString
B.empty)
    go (StrictByteString
s:[StrictByteString]
ss) (PartialH StrictByteString -> HeaderParser (Parser a)
k) = [StrictByteString]
-> HeaderParser (Parser a) -> Either String (Record, Records a)
go [StrictByteString]
ss (StrictByteString -> HeaderParser (Parser a)
k StrictByteString
s)

    go2 :: [StrictByteString] -> Parser a -> Records a
go2 [StrictByteString]
ss (Done [Either String a]
xs)       = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons (Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil Maybe String
forall a. Maybe a
Nothing ([StrictByteString] -> ByteString
BL.fromChunks [StrictByteString]
ss)) [Either String a]
xs
    go2 [StrictByteString]
ss (Fail StrictByteString
rest String
err) = Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil (String -> Maybe String
forall a. a -> Maybe a
Just String
err) ([StrictByteString] -> ByteString
BL.fromChunks (StrictByteString
restStrictByteString -> [StrictByteString] -> [StrictByteString]
forall a. a -> [a] -> [a]
:[StrictByteString]
ss))
    go2 [] (Many [Either String a]
xs StrictByteString -> Parser a
k)     = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([StrictByteString] -> Parser a -> Records a
go2 [] (StrictByteString -> Parser a
k StrictByteString
B.empty)) [Either String a]
xs
    go2 (StrictByteString
s:[StrictByteString]
ss) (Many [Either String a]
xs StrictByteString -> Parser a
k) = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([StrictByteString] -> Parser a -> Records a
go2 [StrictByteString]
ss (StrictByteString -> Parser a
k StrictByteString
s)) [Either String a]
xs