From 427776569dce0aed6e3b1cdfe045688a19932d3d Mon Sep 17 00:00:00 2001 From: Victor Miraldo Date: Fri, 12 Jun 2020 17:06:12 +0200 Subject: [PATCH 1/4] WIP: First sketch of parametrizing the AST; the library builds alright! :) --- HaTeX.cabal | 1 + Text/LaTeX/Base/Class.hs | 2 +- Text/LaTeX/Base/Math.hs | 7 +- Text/LaTeX/Base/Render.hs | 56 ++-- Text/LaTeX/Base/Syntax.hs | 415 +++++++---------------------- Text/LaTeX/Base/Syntax/WithParm.hs | 347 ++++++++++++++++++++++++ Text/LaTeX/Base/Texy.hs | 1 + Text/LaTeX/Base/Types.hs | 6 +- Text/LaTeX/Packages/Acronym.hs | 4 +- Text/LaTeX/Packages/Bigstrut.hs | 2 +- Text/LaTeX/Packages/LTableX.hs | 2 +- Text/LaTeX/Packages/LongTable.hs | 2 +- Text/LaTeX/Packages/Lscape.hs | 2 +- Text/LaTeX/Packages/Multirow.hs | 2 +- Text/LaTeX/Packages/TabularX.hs | 2 +- 15 files changed, 493 insertions(+), 358 deletions(-) create mode 100644 Text/LaTeX/Base/Syntax/WithParm.hs diff --git a/HaTeX.cabal b/HaTeX.cabal index 834df27..962e9ed 100644 --- a/HaTeX.cabal +++ b/HaTeX.cabal @@ -88,6 +88,7 @@ Library Text.LaTeX.Base.Pretty Text.LaTeX.Base.Render Text.LaTeX.Base.Syntax + Text.LaTeX.Base.Syntax.WithParm Text.LaTeX.Base.Texy Text.LaTeX.Base.Types Text.LaTeX.Base.Writer diff --git a/Text/LaTeX/Base/Class.hs b/Text/LaTeX/Base/Class.hs index e11d5e0..a2d95fb 100644 --- a/Text/LaTeX/Base/Class.hs +++ b/Text/LaTeX/Base/Class.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP , TypeSynonymInstances , FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-} -- | Definition of the 'LaTeXC' class, used to combine the classic applicative and diff --git a/Text/LaTeX/Base/Math.hs b/Text/LaTeX/Base/Math.hs index 01a47e3..05849ba 100644 --- a/Text/LaTeX/Base/Math.hs +++ b/Text/LaTeX/Base/Math.hs @@ -1,6 +1,7 @@ - -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} -- | This module contains the maths-specific part of "Text.LaTeX.Base.Commands", -- i.e. of the commands that are available in LaTeX out of the box without diff --git a/Text/LaTeX/Base/Render.hs b/Text/LaTeX/Base/Render.hs index 51705d3..abc1bca 100644 --- a/Text/LaTeX/Base/Render.hs +++ b/Text/LaTeX/Base/Render.hs @@ -22,7 +22,7 @@ module Text.LaTeX.Base.Render , showFloat ) where -import Text.LaTeX.Base.Syntax +import Text.LaTeX.Base.Syntax.WithParm import Text.LaTeX.Base.Class import Data.String import Data.List (intersperse) @@ -105,12 +105,12 @@ readFileTex = fmap decodeUtf8 . B.readFile -- -- /Warning: /'rendertex'/ does not escape LaTeX reserved characters./ -- /Use /'protectText'/ to escape them./ -rendertex :: (Render a,LaTeXC l) => a -> l -rendertex = fromLaTeX . TeXRaw . render +rendertex :: (Render a, LaTeXC l) => a -> l +rendertex = fromLaTeX . TeXRaw () . render -- Render instances -instance Render Measure where +instance (Show a) => Render (Measure a) where render (Pt x) = render x <> "pt" render (Mm x) = render x <> "mm" render (Cm x) = render x <> "cm" @@ -121,18 +121,18 @@ instance Render Measure where -- LaTeX instances -instance Render LaTeX where +instance (Show a) => Render (LaTeX a) where - renderBuilder (TeXRaw t) = Builder.fromText t + renderBuilder (TeXRaw _ t) = Builder.fromText t - renderBuilder (TeXComm name []) = "\\" <> fromString name <> "{}" - renderBuilder (TeXComm name args) = + renderBuilder (TeXComm _ name []) = "\\" <> fromString name <> "{}" + renderBuilder (TeXComm _ name args) = "\\" <> fromString name <> renderAppendBuilder args - renderBuilder (TeXCommS name) = "\\" <> fromString name + renderBuilder (TeXCommS _ name) = "\\" <> fromString name - renderBuilder (TeXEnv name args c) = + renderBuilder (TeXEnv _ _ name args c) = "\\begin{" <> fromString name <> "}" @@ -142,16 +142,16 @@ instance Render LaTeX where <> fromString name <> "}" - renderBuilder (TeXMath Dollar l) = "$" <> renderBuilder l <> "$" - renderBuilder (TeXMath DoubleDollar l) = "$$" <> renderBuilder l <> "$$" - renderBuilder (TeXMath Square l) = "\\[" <> renderBuilder l <> "\\]" - renderBuilder (TeXMath Parentheses l) = "\\(" <> renderBuilder l <> "\\)" + renderBuilder (TeXMath _ Dollar l) = "$" <> renderBuilder l <> "$" + renderBuilder (TeXMath _ DoubleDollar l) = "$$" <> renderBuilder l <> "$$" + renderBuilder (TeXMath _ Square l) = "\\[" <> renderBuilder l <> "\\]" + renderBuilder (TeXMath _ Parentheses l) = "\\(" <> renderBuilder l <> "\\)" - renderBuilder (TeXLineBreak m b) = "\\\\" <> maybe mempty (\x -> "[" <> renderBuilder x <> "]") m <> ( if b then "*" else mempty ) + renderBuilder (TeXLineBreak _ m b) = "\\\\" <> maybe mempty (\x -> "[" <> renderBuilder x <> "]") m <> ( if b then "*" else mempty ) - renderBuilder (TeXBraces l) = "{" <> renderBuilder l <> "}" + renderBuilder (TeXBraces _ l) = "{" <> renderBuilder l <> "}" - renderBuilder (TeXComment c) = + renderBuilder (TeXComment _ c) = let xs = Data.Text.lines c in if null xs then "%\n" else Builder.fromText $ Data.Text.unlines $ fmap ("%" <>) xs @@ -161,17 +161,17 @@ instance Render LaTeX where render = renderDefault -instance Render TeXArg where - renderBuilder (FixArg l) = "{" <> renderBuilder l <> "}" - renderBuilder (OptArg l) = "[" <> renderBuilder l <> "]" - renderBuilder (MOptArg []) = mempty - renderBuilder (MOptArg ls) = "[" <> renderCommasBuilder ls <> "]" - renderBuilder (SymArg l) = "<" <> renderBuilder l <> ">" - renderBuilder (MSymArg []) = mempty - renderBuilder (MSymArg ls) = "<" <> renderCommasBuilder ls <> ">" - renderBuilder (ParArg l) = "(" <> renderBuilder l <> ")" - renderBuilder (MParArg []) = mempty - renderBuilder (MParArg ls) = "(" <> renderCommasBuilder ls <> ")" +instance (Show a) => Render (TeXArg a) where + renderBuilder (FixArg _ l) = "{" <> renderBuilder l <> "}" + renderBuilder (OptArg _ l) = "[" <> renderBuilder l <> "]" + renderBuilder (MOptArg _ []) = mempty + renderBuilder (MOptArg _ ls) = "[" <> renderCommasBuilder ls <> "]" + renderBuilder (SymArg _ l) = "<" <> renderBuilder l <> ">" + renderBuilder (MSymArg _ []) = mempty + renderBuilder (MSymArg _ ls) = "<" <> renderCommasBuilder ls <> ">" + renderBuilder (ParArg _ l) = "(" <> renderBuilder l <> ")" + renderBuilder (MParArg _ []) = mempty + renderBuilder (MParArg _ ls) = "(" <> renderCommasBuilder ls <> ")" render = renderDefault -- Other instances diff --git a/Text/LaTeX/Base/Syntax.hs b/Text/LaTeX/Base/Syntax.hs index 29b5ef8..daabe2f 100644 --- a/Text/LaTeX/Base/Syntax.hs +++ b/Text/LaTeX/Base/Syntax.hs @@ -1,347 +1,132 @@ - -{-# LANGUAGE CPP, DeriveDataTypeable, DeriveGeneric #-} - --- | LaTeX syntax description in the definition of the 'LaTeX' datatype. --- If you want to add new commands or environments not defined in --- the library, import this module and use 'LaTeX' data constructors. -module Text.LaTeX.Base.Syntax +{-# LANGUAGE PatternSynonyms #-} +module Text.LaTeX.Base.Syntax ( -- * @LaTeX@ datatype - Measure (..) - , MathType (..) - , LaTeX (..) - , TeXArg (..) - , (<>), between + S.MathType (..) + , Measure + , pattern Pt + , pattern Mm + , pattern Cm + , pattern In + , pattern Ex + , pattern Em + , pattern CustomMeasure + + , LaTeX + , pattern TeXRaw + , pattern TeXComm + , pattern TeXCommS + , pattern TeXEnv + , pattern TeXMath + , pattern TeXLineBreak + , pattern TeXBraces + , pattern TeXComment + , pattern TeXSeq + , pattern TeXEmpty + + , TeXArg + , pattern FixArg + , pattern OptArg + , pattern MOptArg + , pattern SymArg + , pattern MSymArg + , pattern ParArg + , pattern MParArg + , (<>), S.between -- * Escaping reserved characters - , protectString - , protectText + , S.protectString + , S.protectText -- * Syntax analysis - , matchCommand - , lookForCommand - , matchEnv - , lookForEnv - , texmap - , texmapM + , S.matchCommand + , S.lookForCommand + , S.matchEnv + , S.lookForEnv + , S.texmap + , S.texmapM -- ** Utils - , getBody - , getPreamble + , S.getBody + , S.getPreamble ) where -import Data.Text (Text,pack) -import qualified Data.Text -import qualified Data.Semigroup as Semigroup -import Data.String -import Control.Applicative -import Control.Monad (replicateM) -import Data.Functor.Identity (runIdentity) -import Data.Data (Data) -import Data.Typeable -import Test.QuickCheck -import Data.Hashable -import GHC.Generics (Generic) -#if !MIN_VERSION_base(4,11,0) -import Data.Monoid -#endif - --- | Measure units defined in LaTeX. Use 'CustomMeasure' to use commands like 'textwidth'. --- For instance: --- --- > rule Nothing (CustomMeasure linewidth) (Pt 2) --- --- This will create a black box (see 'rule') as wide as the text and two points tall. --- -data Measure = - Pt Double -- ^ A point is 1/72.27 inch, that means about 0.0138 inch or 0.3515 mm. - | Mm Double -- ^ Millimeter. - | Cm Double -- ^ Centimeter. - | In Double -- ^ Inch. - | Ex Double -- ^ The height of an \"x\" in the current font. - | Em Double -- ^ The width of an \"M\" in the current font. - | CustomMeasure LaTeX -- ^ You can introduce a 'LaTeX' expression as a measure. - deriving (Data, Eq, Generic, Show, Typeable) - --- | Different types of syntax for mathematical expressions. -data MathType = Parentheses | Square | Dollar | DoubleDollar - deriving (Data, Eq, Generic, Show, Typeable) - --- | Type of @LaTeX@ blocks. -data LaTeX = - TeXRaw Text -- ^ Raw text. - | TeXComm String [TeXArg] -- ^ Constructor for commands. - -- First argument is the name of the command. - -- Second, its arguments. - | TeXCommS String -- ^ Constructor for commands with no arguments. - -- When rendering, no space or @{}@ will be added at - -- the end. - | TeXEnv String [TeXArg] LaTeX -- ^ Constructor for environments. - -- First argument is the name of the environment. - -- Second, its arguments. - -- Third, its content. - | TeXMath MathType LaTeX -- ^ Mathematical expressions. - | TeXLineBreak (Maybe Measure) Bool -- ^ Line break command. - | TeXBraces LaTeX -- ^ A expression between braces. - | TeXComment Text -- ^ Comments. - | TeXSeq LaTeX LaTeX -- ^ Sequencing of 'LaTeX' expressions. - -- Use '<>' preferably. - | TeXEmpty -- ^ An empty block. - -- /Neutral element/ of '<>'. - deriving (Data, Eq, Generic, Show, Typeable) - --- | An argument for a 'LaTeX' command or environment. -data TeXArg = - FixArg LaTeX -- ^ Fixed argument. - | OptArg LaTeX -- ^ Optional argument. - | MOptArg [LaTeX] -- ^ Multiple optional argument. - | SymArg LaTeX -- ^ An argument enclosed between @\<@ and @\>@. - | MSymArg [LaTeX] -- ^ Version of 'SymArg' with multiple options. - | ParArg LaTeX -- ^ An argument enclosed between @(@ and @)@. - | MParArg [LaTeX] -- ^ Version of 'ParArg' with multiple options. - deriving (Data, Eq, Generic, Show, Typeable) - --- Monoid instance for 'LaTeX'. - --- | Method 'mappend' is strict in both arguments (except in the case when the first argument is 'TeXEmpty'). -instance Monoid LaTeX where - mempty = TeXEmpty - mappend TeXEmpty x = x - mappend x TeXEmpty = x - -- This equation is to make 'mappend' associative. - mappend (TeXSeq x y) z = TeXSeq x $ mappend y z - -- - mappend x y = TeXSeq x y - -instance Semigroup.Semigroup LaTeX where - (<>) = mappend +import Data.Text(Text) --- | Calling 'between' @c l1 l2@ puts @c@ between @l1@ and @l2@ and --- appends them. --- --- > between c l1 l2 = l1 <> c <> l2 -between :: Monoid m => m -> m -> m -> m -between c l1 l2 = l1 <> c <> l2 +import qualified Text.LaTeX.Base.Syntax.WithParm as S --- | Method 'fromString' escapes LaTeX reserved characters using 'protectString'. -instance IsString LaTeX where - fromString = TeXRaw . fromString . protectString +type Measure = S.Measure () +type LaTeX = S.LaTeX () +type TeXArg = S.TeXArg () --- | Escape LaTeX reserved characters in a 'String'. -protectString :: String -> String -protectString = mconcat . fmap protectChar +{-# COMPLETE TeXRaw, TeXComm , TeXCommS , TeXEnv , TeXMath , TeXLineBreak + , TeXBraces , TeXSeq , TeXEmpty #-} +pattern TeXRaw :: Text -> LaTeX +pattern TeXRaw t = S.TeXRaw () t --- | Escape LaTeX reserved characters in a 'Text'. -protectText :: Text -> Text -protectText = Data.Text.concatMap (fromString . protectChar) +pattern TeXComm :: String -> [TeXArg] -> LaTeX +pattern TeXComm s l = S.TeXComm () s l -protectChar :: Char -> String -protectChar '#' = "\\#" -protectChar '$' = "\\$" -protectChar '%' = "\\%" -protectChar '^' = "\\^{}" -protectChar '&' = "\\&" -protectChar '{' = "\\{" -protectChar '}' = "\\}" -protectChar '~' = "\\~{}" -protectChar '\\' = "\\textbackslash{}" -protectChar '_' = "\\_{}" -protectChar x = [x] +pattern TeXCommS :: String -> LaTeX +pattern TeXCommS s = S.TeXCommS () s --- Syntax analysis +pattern TeXEnv :: String -> [TeXArg] -> LaTeX -> LaTeX +pattern TeXEnv s l e = S.TeXEnv () () s l e --- | Look into a 'LaTeX' syntax tree to find any call to the command with --- the given name. It returns a list of arguments with which this command --- is called. --- --- > lookForCommand = (fmap snd .) . matchCommand . (==) --- --- If the returned list is empty, the command was not found. However, --- if the list contains empty lists, those are callings to the command --- with no arguments. --- --- For example --- --- > lookForCommand "author" l --- --- would look for the argument passed to the @\\author@ command in @l@. -lookForCommand :: String -- ^ Name of the command. - -> LaTeX -- ^ LaTeX syntax tree. - -> [[TeXArg]] -- ^ List of arguments passed to the command. -lookForCommand = (fmap snd .) . matchCommand . (==) +pattern TeXMath :: S.MathType -> LaTeX -> LaTeX +pattern TeXMath m e = S.TeXMath () m e --- | Traverse a 'LaTeX' syntax tree and returns the commands (see 'TeXComm' and --- 'TeXCommS') that matches the condition and their arguments in each call. -matchCommand :: (String -> Bool) -> LaTeX -> [(String,[TeXArg])] -matchCommand f (TeXComm str as) = - let xs = concatMap (matchCommandArg f) as - in if f str then (str,as) : xs else xs -matchCommand f (TeXCommS str) = [(str, []) | f str] -matchCommand f (TeXEnv _ as l) = - let xs = concatMap (matchCommandArg f) as - in xs ++ matchCommand f l -matchCommand f (TeXMath _ l) = matchCommand f l -matchCommand f (TeXBraces l) = matchCommand f l -matchCommand f (TeXSeq l1 l2) = matchCommand f l1 ++ matchCommand f l2 -matchCommand _ _ = [] +pattern TeXLineBreak :: Maybe (Measure) -> Bool -> LaTeX +pattern TeXLineBreak m e = S.TeXLineBreak () m e -matchCommandArg :: (String -> Bool) -> TeXArg -> [(String,[TeXArg])] -matchCommandArg f (OptArg l ) = matchCommand f l -matchCommandArg f (FixArg l ) = matchCommand f l -matchCommandArg f (MOptArg ls) = concatMap (matchCommand f) ls -matchCommandArg f (SymArg l ) = matchCommand f l -matchCommandArg f (MSymArg ls) = concatMap (matchCommand f) ls -matchCommandArg f (ParArg l ) = matchCommand f l -matchCommandArg f (MParArg ls) = concatMap (matchCommand f) ls +pattern TeXComment :: Text -> LaTeX +pattern TeXComment t = S.TeXComment () t --- | Similar to 'lookForCommand', but applied to environments. --- It returns a list with arguments passed and content of the --- environment in each call. --- --- > lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) --- -lookForEnv :: String -> LaTeX -> [([TeXArg],LaTeX)] -lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) +pattern TeXBraces :: LaTeX -> LaTeX +pattern TeXBraces t = S.TeXBraces () t --- | Traverse a 'LaTeX' syntax tree and returns the environments (see --- 'TeXEnv') that matches the condition, their arguments and their content --- in each call. -matchEnv :: (String -> Bool) -> LaTeX -> [(String,[TeXArg],LaTeX)] -matchEnv f (TeXComm _ as) = concatMap (matchEnvArg f) as -matchEnv f (TeXEnv str as l) = - let xs = concatMap (matchEnvArg f) as - ys = matchEnv f l - zs = xs ++ ys - in if f str then (str,as,l) : zs else zs -matchEnv f (TeXMath _ l) = matchEnv f l -matchEnv f (TeXBraces l) = matchEnv f l -matchEnv f (TeXSeq l1 l2) = matchEnv f l1 ++ matchEnv f l2 -matchEnv _ _ = [] +pattern TeXSeq :: LaTeX -> LaTeX -> LaTeX +pattern TeXSeq m n = S.TeXSeq m n -matchEnvArg :: (String -> Bool) -> TeXArg -> [(String,[TeXArg],LaTeX)] -matchEnvArg f (OptArg l ) = matchEnv f l -matchEnvArg f (FixArg l ) = matchEnv f l -matchEnvArg f (MOptArg ls) = concatMap (matchEnv f) ls -matchEnvArg f (SymArg l ) = matchEnv f l -matchEnvArg f (MSymArg ls) = concatMap (matchEnv f) ls -matchEnvArg f (ParArg l ) = matchEnv f l -matchEnvArg f (MParArg ls) = concatMap (matchEnv f) ls +pattern TeXEmpty :: LaTeX +pattern TeXEmpty = S.TeXEmpty --- | The function 'texmap' looks for subexpressions that match a given --- condition and applies a function to them. --- --- > texmap c f = runIdentity . texmapM c (pure . f) -texmap :: (LaTeX -> Bool) -- ^ Condition. - -> (LaTeX -> LaTeX) -- ^ Function to apply when the condition matches. - -> LaTeX -> LaTeX -texmap c f = runIdentity . texmapM c (pure . f) +{-# COMPLETE FixArg , OptArg , MOptArg , SymArg , MSymArg , ParArg , MParArg #-} +pattern FixArg :: LaTeX -> TeXArg +pattern FixArg m = S.FixArg () m --- | Version of 'texmap' where the function returns values in a 'Monad'. -texmapM :: (Applicative m, Monad m) - => (LaTeX -> Bool) -- ^ Condition. - -> (LaTeX -> m LaTeX) -- ^ Function to apply when the condition matches. - -> LaTeX -> m LaTeX -texmapM c f = go - where - go l@(TeXComm str as) = if c l then f l else TeXComm str <$> mapM go' as - go l@(TeXEnv str as b) = if c l then f l else TeXEnv str <$> mapM go' as <*> go b - go l@(TeXMath t b) = if c l then f l else TeXMath t <$> go b - go l@(TeXBraces b) = if c l then f l else TeXBraces <$> go b - go l@(TeXSeq l1 l2) = if c l then f l else liftA2 TeXSeq (go l1) (go l2) - go l = if c l then f l else pure l - -- - go' (FixArg l ) = FixArg <$> go l - go' (OptArg l ) = OptArg <$> go l - go' (MOptArg ls) = MOptArg <$> mapM go ls - go' (SymArg l ) = SymArg <$> go l - go' (MSymArg ls) = MSymArg <$> mapM go ls - go' (ParArg l ) = ParArg <$> go l - go' (MParArg ls) = MParArg <$> mapM go ls +pattern OptArg :: LaTeX -> TeXArg +pattern OptArg x = S.OptArg () x --- | Extract the content of the 'document' environment, if present. -getBody :: LaTeX -> Maybe LaTeX -getBody l = - case lookForEnv "document" l of - ((_,b):_) -> Just b - _ -> Nothing +pattern MOptArg :: [LaTeX] -> TeXArg +pattern MOptArg x = S.MOptArg () x --- | Extract the preamble of a 'LaTeX' document (everything before the 'document' --- environment). It could be empty. -getPreamble :: LaTeX -> LaTeX -getPreamble (TeXEnv "document" _ _) = mempty -getPreamble (TeXSeq l1 l2) = getPreamble l1 <> getPreamble l2 -getPreamble l = l +pattern SymArg :: LaTeX -> TeXArg +pattern SymArg x = S.SymArg () x ---------------------------------------- --- LaTeX Arbitrary instance +pattern MSymArg :: [LaTeX] -> TeXArg +pattern MSymArg x = S.MSymArg () x -arbitraryChar :: Gen Char -arbitraryChar = elements $ - ['A'..'Z'] - ++ ['a'..'z'] - ++ "\n-+*/!\"().,:;'@<>? " +pattern ParArg :: LaTeX -> TeXArg +pattern ParArg x = S.ParArg () x --- | Utility for the instance of 'LaTeX' to 'Arbitrary'. --- We generate a short sequence of characters and --- escape reserved characters with 'protectText'. -arbitraryRaw :: Gen Text -arbitraryRaw = do - n <- choose (1,20) - protectText . pack <$> replicateM n arbitraryChar +pattern MParArg :: [LaTeX] -> TeXArg +pattern MParArg x = S.MParArg () x --- | Generator for names of command and environments. --- We use only alphabetical characters. -arbitraryName :: Gen String -arbitraryName = do - n <- choose (1,10) - replicateM n $ elements $ ['a' .. 'z'] ++ ['A' .. 'Z'] +{-# COMPLETE Pt , Mm , Cm , In , Ex , Em , CustomMeasure #-} +pattern Pt :: Double -> Measure +pattern Pt x = S.Pt x -instance Arbitrary Measure where - arbitrary = do - n <- choose (0,5) - let f = [Pt,Mm,Cm,In,Ex,Em] !! n - f <$> arbitrary +pattern Mm :: Double -> Measure +pattern Mm x = S.Mm x -instance Arbitrary LaTeX where - arbitrary = arbitraryLaTeX False +pattern Cm :: Double -> Measure +pattern Cm x = S.Cm x -arbitraryLaTeX :: Bool -> Gen LaTeX -arbitraryLaTeX inDollar = do - -- We give more chances to 'TeXRaw'. - -- This results in arbitrary 'LaTeX' values - -- not getting too large. - n <- choose (0,16 :: Int) - case n of - 0 -> if inDollar then arbitraryLaTeX True else pure TeXEmpty - 1 -> do m <- choose (0,5) - TeXComm <$> arbitraryName <*> vectorOf m arbitrary - 2 -> TeXCommS <$> arbitraryName - 3 -> do m <- choose (0,5) - TeXEnv <$> arbitraryName <*> vectorOf m arbitrary <*> arbitrary - 4 -> if inDollar - then arbitraryLaTeX True - else do m <- choose (0,3) - let t = [Parentheses,Square,Dollar,DoubleDollar] !! m - TeXMath <$> pure t <*> arbitraryLaTeX (t == Dollar || t == DoubleDollar) - 5 -> TeXLineBreak <$> arbitrary <*> arbitrary - 6 -> TeXBraces <$> arbitrary - 7 -> TeXComment <$> arbitraryRaw - 8 -> TeXSeq <$> (if inDollar then arbitraryLaTeX True else arbitrary) <*> arbitrary - _ -> TeXRaw <$> arbitraryRaw +pattern In :: Double -> Measure +pattern In x = S.In x -instance Arbitrary TeXArg where - arbitrary = do - n <- choose (0,6 :: Int) - case n of - 0 -> OptArg <$> arbitrary - 1 -> do m <- choose (1,5) - MOptArg <$> vectorOf m arbitrary - 2 -> SymArg <$> arbitrary - 3 -> do m <- choose (1,5) - MSymArg <$> vectorOf m arbitrary - 4 -> ParArg <$> arbitrary - 5 -> do m <- choose (1,5) - MParArg <$> vectorOf m arbitrary - _ -> FixArg <$> arbitrary +pattern Ex :: Double -> Measure +pattern Ex x = S.Ex x +pattern Em :: Double -> Measure +pattern Em x = S.Em x -instance Hashable Measure -instance Hashable MathType -instance Hashable TeXArg -instance Hashable LaTeX +pattern CustomMeasure :: LaTeX -> Measure +pattern CustomMeasure x = S.CustomMeasure x diff --git a/Text/LaTeX/Base/Syntax/WithParm.hs b/Text/LaTeX/Base/Syntax/WithParm.hs new file mode 100644 index 0000000..7c59acf --- /dev/null +++ b/Text/LaTeX/Base/Syntax/WithParm.hs @@ -0,0 +1,347 @@ +{-# LANGUAGE FlexibleInstances, CPP, DeriveDataTypeable, DeriveGeneric, DeriveFunctor #-} + +-- | LaTeX syntax description in the definition of the 'LaTeX' datatype. +-- If you want to add new commands or environments not defined in +-- the library, import this module and use 'LaTeX' data constructors. +module Text.LaTeX.Base.Syntax.WithParm + ( -- * @LaTeX@ datatype + Measure (..) + , MathType (..) + , LaTeX (..) + , TeXArg (..) + , (<>), between + -- * Escaping reserved characters + , protectString + , protectText + -- * Syntax analysis + , matchCommand + , lookForCommand + , matchEnv + , lookForEnv + , texmap + , texmapM + -- ** Utils + , getBody + , getPreamble + ) where + +import Data.Text (Text,pack) +import qualified Data.Text +import qualified Data.Semigroup as Semigroup +import Data.String +import Control.Applicative +import Control.Monad (replicateM) +import Data.Functor.Identity (runIdentity) +import Data.Data (Data) +import Data.Typeable +import Test.QuickCheck +import Data.Hashable +import GHC.Generics (Generic) +#if !MIN_VERSION_base(4,11,0) +import Data.Monoid +#endif + +-- | Measure units defined in LaTeX. Use 'CustomMeasure' to use commands like 'textwidth'. +-- For instance: +-- +-- > rule Nothing (CustomMeasure linewidth) (Pt 2) +-- +-- This will create a black box (see 'rule') as wide as the text and two points tall. +-- +data Measure a = + Pt Double -- ^ A point is 1/72.27 inch, that means about 0.0138 inch or 0.3515 mm. + | Mm Double -- ^ Millimeter. + | Cm Double -- ^ Centimeter. + | In Double -- ^ Inch. + | Ex Double -- ^ The height of an \"x\" in the current font. + | Em Double -- ^ The width of an \"M\" in the current font. + | CustomMeasure (LaTeX a) -- ^ You can introduce a 'LaTeX' expression as a measure. + deriving (Data, Eq, Generic, Show, Typeable, Functor) + +-- | Different types of syntax for mathematical expressions. +data MathType = Parentheses | Square | Dollar | DoubleDollar | LHSInline + deriving (Data, Eq, Generic, Show, Typeable) + +-- | Type of @LaTeX@ blocks. The @a@ type variable can be used for +-- a number of things but mainly stores source location. +data LaTeX a = + TeXRaw a Text -- ^ Raw text. + | TeXComm a String [TeXArg a] -- ^ Constructor for commands. + -- First argument is the name of the command. + -- Second, its arguments. + | TeXCommS a String -- ^ Constructor for commands with no arguments. + -- When rendering, no space or @{}@ will be added at + -- the end. + | TeXEnv a a String [TeXArg a] (LaTeX a) -- ^ Constructor for environments. + -- First argument is the name of the environment. + -- Second, its arguments. + -- Third, its content. + | TeXMath a MathType (LaTeX a) -- ^ Mathematical expressions. + | TeXLineBreak a (Maybe (Measure a)) Bool -- ^ Line break command. + | TeXBraces a (LaTeX a) -- ^ A expression between braces. + | TeXComment a Text -- ^ Comments. + | TeXSeq (LaTeX a) (LaTeX a) -- ^ Sequencing of 'LaTeX' expressions. + -- Use '<>' preferably. + | TeXEmpty -- ^ An empty block. + -- /Neutral element/ of '<>'. + deriving (Data, Eq, Generic, Show, Typeable, Functor) + +-- | An argument for a 'LaTeX' command or environment. +data TeXArg a = + FixArg a (LaTeX a) -- ^ Fixed argument. + | OptArg a (LaTeX a) -- ^ Optional argument. + | MOptArg a [LaTeX a] -- ^ Multiple optional argument. + | SymArg a (LaTeX a) -- ^ An argument enclosed between @\<@ and @\>@. + | MSymArg a [LaTeX a] -- ^ Version of 'SymArg' with multiple options. + | ParArg a (LaTeX a) -- ^ An argument enclosed between @(@ and @)@. + | MParArg a [LaTeX a] -- ^ Version of 'ParArg' with multiple options. + deriving (Data, Eq, Generic, Show, Typeable, Functor) + +-- Monoid instance for 'LaTeX'. + +-- | Method 'mappend' is strict in both arguments (except in the case when the first argument is 'TeXEmpty'). +instance Monoid (LaTeX a) where + mempty = TeXEmpty + mappend TeXEmpty x = x + mappend x TeXEmpty = x + -- This equation is to make 'mappend' associative. + mappend (TeXSeq x y) z = TeXSeq x $ mappend y z + -- + mappend x y = TeXSeq x y + +instance Semigroup.Semigroup (LaTeX a) where + (<>) = mappend + +-- | Calling 'between' @c l1 l2@ puts @c@ between @l1@ and @l2@ and +-- appends them. +-- +-- > between c l1 l2 = l1 <> c <> l2 +between :: Monoid m => m -> m -> m -> m +between c l1 l2 = l1 <> c <> l2 + +-- | Method 'fromString' escapes LaTeX reserved characters using 'protectString'. +instance IsString (LaTeX ()) where + fromString = TeXRaw () . fromString . protectString + +-- | Escape LaTeX reserved characters in a 'String'. +protectString :: String -> String +protectString = mconcat . fmap protectChar + +-- | Escape LaTeX reserved characters in a 'Text'. +protectText :: Text -> Text +protectText = Data.Text.concatMap (fromString . protectChar) + +protectChar :: Char -> String +protectChar '#' = "\\#" +protectChar '$' = "\\$" +protectChar '%' = "\\%" +protectChar '^' = "\\^{}" +protectChar '&' = "\\&" +protectChar '{' = "\\{" +protectChar '}' = "\\}" +protectChar '~' = "\\~{}" +protectChar '\\' = "\\textbackslash{}" +protectChar '_' = "\\_{}" +protectChar x = [x] + +-- Syntax analysis + +-- | Look into a 'LaTeX' syntax tree to find any call to the command with +-- the given name. It returns a list of arguments with which this command +-- is called. +-- +-- > lookForCommand = (fmap snd .) . matchCommand . (==) +-- +-- If the returned list is empty, the command was not found. However, +-- if the list contains empty lists, those are callings to the command +-- with no arguments. +-- +-- For example +-- +-- > lookForCommand "author" l +-- +-- would look for the argument passed to the @\\author@ command in @l@. +lookForCommand :: String -- ^ Name of the command. + -> LaTeX a -- ^ LaTeX syntax tree. + -> [[TeXArg a]] -- ^ List of arguments passed to the command. +lookForCommand = (fmap snd .) . matchCommand . (==) + +-- | Traverse a 'LaTeX' syntax tree and returns the commands (see 'TeXComm' and +-- 'TeXCommS') that matches the condition and their arguments in each call. +matchCommand :: (String -> Bool) -> LaTeX a -> [(String,[TeXArg a])] +matchCommand f (TeXComm _ str as) = + let xs = concatMap (matchCommandArg f) as + in if f str then (str,as) : xs else xs +matchCommand f (TeXCommS _ str) = [(str, []) | f str] +matchCommand f (TeXEnv _ _ _ as l) = + let xs = concatMap (matchCommandArg f) as + in xs ++ matchCommand f l +matchCommand f (TeXMath _ _ l) = matchCommand f l +matchCommand f (TeXBraces _ l) = matchCommand f l +matchCommand f (TeXSeq l1 l2) = matchCommand f l1 ++ matchCommand f l2 +matchCommand _ _ = [] + +matchCommandArg :: (String -> Bool) -> TeXArg a -> [(String,[TeXArg a])] +matchCommandArg f (OptArg _ l ) = matchCommand f l +matchCommandArg f (FixArg _ l ) = matchCommand f l +matchCommandArg f (MOptArg _ ls) = concatMap (matchCommand f) ls +matchCommandArg f (SymArg _ l ) = matchCommand f l +matchCommandArg f (MSymArg _ ls) = concatMap (matchCommand f) ls +matchCommandArg f (ParArg _ l ) = matchCommand f l +matchCommandArg f (MParArg _ ls) = concatMap (matchCommand f) ls + +-- | Similar to 'lookForCommand', but applied to environments. +-- It returns a list with arguments passed and content of the +-- environment in each call. +-- +-- > lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) +-- +lookForEnv :: String -> LaTeX a -> [([TeXArg a],LaTeX a)] +lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) + +-- | Traverse a 'LaTeX' syntax tree and returns the environments (see +-- 'TeXEnv') that matches the condition, their arguments and their content +-- in each call. +matchEnv :: (String -> Bool) -> LaTeX a -> [(String,[TeXArg a],LaTeX a)] +matchEnv f (TeXComm _ _ as) = concatMap (matchEnvArg f) as +matchEnv f (TeXEnv _ _ str as l) = + let xs = concatMap (matchEnvArg f) as + ys = matchEnv f l + zs = xs ++ ys + in if f str then (str,as,l) : zs else zs +matchEnv f (TeXMath _ _ l) = matchEnv f l +matchEnv f (TeXBraces _ l) = matchEnv f l +matchEnv f (TeXSeq l1 l2) = matchEnv f l1 ++ matchEnv f l2 +matchEnv _ _ = [] + +matchEnvArg :: (String -> Bool) -> TeXArg a -> [(String,[TeXArg a],LaTeX a)] +matchEnvArg f (OptArg _ l ) = matchEnv f l +matchEnvArg f (FixArg _ l ) = matchEnv f l +matchEnvArg f (MOptArg _ ls) = concatMap (matchEnv f) ls +matchEnvArg f (SymArg _ l ) = matchEnv f l +matchEnvArg f (MSymArg _ ls) = concatMap (matchEnv f) ls +matchEnvArg f (ParArg _ l ) = matchEnv f l +matchEnvArg f (MParArg _ ls) = concatMap (matchEnv f) ls + +-- | The function 'texmap' looks for subexpressions that match a given +-- condition and applies a function to them. +-- +-- > texmap c f = runIdentity . texmapM c (pure . f) +texmap :: (LaTeX a -> Bool) -- ^ Condition. + -> (LaTeX a -> LaTeX a) -- ^ Function to apply when the condition matches. + -> LaTeX a -> LaTeX a +texmap c f = runIdentity . texmapM c (pure . f) + +-- | Version of 'texmap' where the function returns values in a 'Monad'. +texmapM :: (Applicative m, Monad m) + => (LaTeX a -> Bool) -- ^ Condition. + -> (LaTeX a -> m (LaTeX a)) -- ^ Function to apply when the condition matches. + -> LaTeX a -> m (LaTeX a) +texmapM c f = go + where + go l@(TeXComm a str as) = if c l then f l else TeXComm a str <$> mapM go' as + go l@(TeXEnv a a' str as b) = if c l then f l else TeXEnv a a' str <$> mapM go' as <*> go b + go l@(TeXMath a t b) = if c l then f l else TeXMath a t <$> go b + go l@(TeXBraces a b) = if c l then f l else TeXBraces a <$> go b + go l@(TeXSeq l1 l2) = if c l then f l else liftA2 TeXSeq (go l1) (go l2) + go l = if c l then f l else pure l + -- + go' (FixArg a l ) = FixArg a <$> go l + go' (OptArg a l ) = OptArg a <$> go l + go' (MOptArg a ls) = MOptArg a <$> mapM go ls + go' (SymArg a l ) = SymArg a <$> go l + go' (MSymArg a ls) = MSymArg a <$> mapM go ls + go' (ParArg a l ) = ParArg a <$> go l + go' (MParArg a ls) = MParArg a <$> mapM go ls + +-- | Extract the content of the 'document' environment, if present. +getBody :: LaTeX a -> Maybe (LaTeX a) +getBody l = + case lookForEnv "document" l of + ((_,b):_) -> Just b + _ -> Nothing + +-- | Extract the preamble of a 'LaTeX' document (everything before the 'document' +-- environment). It could be empty. +getPreamble :: LaTeX a -> LaTeX a +getPreamble (TeXEnv _ _ "document" _ _) = mempty +getPreamble (TeXSeq l1 l2) = getPreamble l1 <> getPreamble l2 +getPreamble l = l + +--------------------------------------- +-- LaTeX Arbitrary instance + +arbitraryChar :: Gen Char +arbitraryChar = elements $ + ['A'..'Z'] + ++ ['a'..'z'] + ++ "\n-+*/!\"().,:;'@<>? " + +-- | Utility for the instance of 'LaTeX' to 'Arbitrary'. +-- We generate a short sequence of characters and +-- escape reserved characters with 'protectText'. +arbitraryRaw :: Gen Text +arbitraryRaw = do + n <- choose (1,20) + protectText . pack <$> replicateM n arbitraryChar + +-- | Generator for names of command and environments. +-- We use only alphabetical characters. +arbitraryName :: Gen String +arbitraryName = do + n <- choose (1,10) + replicateM n $ elements $ ['a' .. 'z'] ++ ['A' .. 'Z'] + +instance Arbitrary (Measure a) where + arbitrary = do + n <- choose (0,5) + let f = [Pt,Mm,Cm,In,Ex,Em] !! n + f <$> arbitrary + +instance Arbitrary a => Arbitrary (LaTeX a) where + arbitrary = arbitraryLaTeX False + +arbitraryLaTeX :: Arbitrary a => Bool -> Gen (LaTeX a) +arbitraryLaTeX inDollar = do + -- We give more chances to 'TeXRaw'. + -- This results in arbitrary 'LaTeX' values + -- not getting too large. + n <- choose (0,16 :: Int) + case n of + 0 -> if inDollar then arbitraryLaTeX True else pure TeXEmpty + 1 -> do m <- choose (0,5) + TeXComm <$> arbitrary <*> arbitraryName <*> vectorOf m arbitrary + 2 -> TeXCommS <$> arbitrary <*> arbitraryName + 3 -> do m <- choose (0,5) + TeXEnv <$> arbitrary <*> arbitrary <*> arbitraryName <*> vectorOf m arbitrary <*> arbitrary + 4 -> if inDollar + then arbitraryLaTeX True + else do m <- choose (0,3) + let t = [Parentheses,Square,Dollar,DoubleDollar] !! m + TeXMath <$> arbitrary <*> pure t <*> arbitraryLaTeX (t == Dollar || t == DoubleDollar) + 5 -> TeXLineBreak <$> arbitrary <*> arbitrary <*> arbitrary + 6 -> TeXBraces <$> arbitrary <*> arbitrary + 7 -> TeXComment <$> arbitrary <*> arbitraryRaw + 8 -> TeXSeq <$> (if inDollar then arbitraryLaTeX True else arbitrary) <*> arbitrary + _ -> TeXRaw <$> arbitrary <*> arbitraryRaw + +instance Arbitrary a => Arbitrary (TeXArg a) where + arbitrary = do + n <- choose (0,6 :: Int) + case n of + 0 -> OptArg <$> arbitrary <*> arbitrary + 1 -> do m <- choose (1,5) + MOptArg <$> arbitrary <*> vectorOf m arbitrary + 2 -> SymArg <$> arbitrary <*> arbitrary + 3 -> do m <- choose (1,5) + MSymArg <$> arbitrary <*> vectorOf m arbitrary + 4 -> ParArg <$> arbitrary <*> arbitrary + 5 -> do m <- choose (1,5) + MParArg <$> arbitrary <*> vectorOf m arbitrary + _ -> FixArg <$> arbitrary <*> arbitrary + + +instance Hashable a => Hashable (Measure a) +instance Hashable MathType +instance Hashable a => Hashable (TeXArg a) +instance Hashable a => Hashable (LaTeX a) diff --git a/Text/LaTeX/Base/Texy.hs b/Text/LaTeX/Base/Texy.hs index ad22f24..f587b29 100644 --- a/Text/LaTeX/Base/Texy.hs +++ b/Text/LaTeX/Base/Texy.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleInstances , TypeSynonymInstances #-} -- | 'Texy' class, as proposed in . module Text.LaTeX.Base.Texy ( diff --git a/Text/LaTeX/Base/Types.hs b/Text/LaTeX/Base/Types.hs index ba900e0..6fbbc64 100644 --- a/Text/LaTeX/Base/Types.hs +++ b/Text/LaTeX/Base/Types.hs @@ -1,5 +1,4 @@ - -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings , PatternSynonyms #-} -- | Some types shared along the library. module Text.LaTeX.Base.Types ( @@ -10,7 +9,8 @@ module Text.LaTeX.Base.Types ( , createLabel , labelName , Pos (..) , HPos (..) , TableSpec (..) - , Measure (..) + , Measure , pattern Pt , pattern Mm , pattern Cm , pattern In + , pattern Ex , pattern Em , pattern CustomMeasure ) where import Text.LaTeX.Base.Syntax diff --git a/Text/LaTeX/Packages/Acronym.hs b/Text/LaTeX/Packages/Acronym.hs index f6b1026..84a5f77 100644 --- a/Text/LaTeX/Packages/Acronym.hs +++ b/Text/LaTeX/Packages/Acronym.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, CPP #-} +{-# LANGUAGE OverloadedStrings, CPP , PatternSynonyms #-} -- | Add acronyms to your documents using this module. -- @@ -24,7 +24,7 @@ module Text.LaTeX.Packages.Acronym import Data.String(IsString(fromString)) import Text.LaTeX.Base.Class(LaTeXC, comm0, comm1, comm2, liftL, liftL2) -import Text.LaTeX.Base.Syntax(LaTeX(TeXComm, TeXEnv), TeXArg(FixArg, OptArg)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Types(PackageName) import Text.LaTeX.Base.Writer(LaTeXT) diff --git a/Text/LaTeX/Packages/Bigstrut.hs b/Text/LaTeX/Packages/Bigstrut.hs index 81bc582..5cdf78d 100644 --- a/Text/LaTeX/Packages/Bigstrut.hs +++ b/Text/LaTeX/Packages/Bigstrut.hs @@ -11,7 +11,7 @@ module Text.LaTeX.Packages.Bigstrut , bigstrutBottom ) where -import Text.LaTeX.Base.Syntax (LaTeX(TeXComm), TeXArg(OptArg)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class (LaTeXC, fromLaTeX) import Text.LaTeX.Base.Types (PackageName) diff --git a/Text/LaTeX/Packages/LTableX.hs b/Text/LaTeX/Packages/LTableX.hs index 7ffa273..13bedc2 100644 --- a/Text/LaTeX/Packages/LTableX.hs +++ b/Text/LaTeX/Packages/LTableX.hs @@ -11,7 +11,7 @@ module Text.LaTeX.Packages.LTableX , module Text.LaTeX.Packages.LongTable ) where -import Text.LaTeX.Base.Syntax (LaTeX(TeXComm)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class (LaTeXC, fromLaTeX) import Text.LaTeX.Base.Types (PackageName) import Text.LaTeX.Packages.TabularX (tabularx) diff --git a/Text/LaTeX/Packages/LongTable.hs b/Text/LaTeX/Packages/LongTable.hs index 13c5d18..d9a0984 100644 --- a/Text/LaTeX/Packages/LongTable.hs +++ b/Text/LaTeX/Packages/LongTable.hs @@ -13,7 +13,7 @@ module Text.LaTeX.Packages.LongTable -- * Package Options ) where -import Text.LaTeX.Base.Syntax (LaTeX(TeXEnv, TeXRaw, TeXComm), TeXArg(FixArg, OptArg)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class (LaTeXC, fromLaTeX, liftL) import Text.LaTeX.Base.Render (render, renderAppend) import Text.LaTeX.Base.Types (PackageName, Pos, TableSpec) diff --git a/Text/LaTeX/Packages/Lscape.hs b/Text/LaTeX/Packages/Lscape.hs index 0c0c126..8c3b5f7 100644 --- a/Text/LaTeX/Packages/Lscape.hs +++ b/Text/LaTeX/Packages/Lscape.hs @@ -9,7 +9,7 @@ module Text.LaTeX.Packages.Lscape , pdftex ) where -import Text.LaTeX.Base.Syntax (LaTeX(TeXEnv)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class (LaTeXC, liftL) import Text.LaTeX.Base.Types (PackageName) diff --git a/Text/LaTeX/Packages/Multirow.hs b/Text/LaTeX/Packages/Multirow.hs index 568a3b1..aa64f98 100644 --- a/Text/LaTeX/Packages/Multirow.hs +++ b/Text/LaTeX/Packages/Multirow.hs @@ -12,7 +12,7 @@ module Text.LaTeX.Packages.Multirow import qualified Data.Semigroup as SG ((<>)) import Data.Maybe (catMaybes) -import Text.LaTeX.Base.Syntax (LaTeX(TeXComm), TeXArg(FixArg, OptArg)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class (LaTeXC, liftL) import Text.LaTeX.Base.Types (PackageName, Pos, Measure) import Text.LaTeX.Base.Render (Render, render, rendertex) diff --git a/Text/LaTeX/Packages/TabularX.hs b/Text/LaTeX/Packages/TabularX.hs index 59d2b2a..21c3709 100644 --- a/Text/LaTeX/Packages/TabularX.hs +++ b/Text/LaTeX/Packages/TabularX.hs @@ -8,7 +8,7 @@ module Text.LaTeX.Packages.TabularX , tabularx ) where -import Text.LaTeX.Base.Syntax (LaTeX(TeXEnv, TeXRaw), TeXArg(FixArg, OptArg)) +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class (LaTeXC, liftL) import Text.LaTeX.Base.Render (render, renderAppend) import Text.LaTeX.Base.Types (PackageName, Pos, TableSpec, Measure) From ef858e5c9dfddc0bc355700bbda1f086e3a07dff Mon Sep 17 00:00:00 2001 From: Victor Miraldo Date: Mon, 15 Jun 2020 14:14:25 +0200 Subject: [PATCH 2/4] Removed srcloc from most constructors --- Text/LaTeX/Base/Syntax.hs | 447 ++++++++++++++++++++++------- Text/LaTeX/Base/Syntax/WithParm.hs | 347 ---------------------- 2 files changed, 351 insertions(+), 443 deletions(-) delete mode 100644 Text/LaTeX/Base/Syntax/WithParm.hs diff --git a/Text/LaTeX/Base/Syntax.hs b/Text/LaTeX/Base/Syntax.hs index daabe2f..0b7742d 100644 --- a/Text/LaTeX/Base/Syntax.hs +++ b/Text/LaTeX/Base/Syntax.hs @@ -1,132 +1,387 @@ -{-# LANGUAGE PatternSynonyms #-} -module Text.LaTeX.Base.Syntax +{-# LANGUAGE FlexibleInstances, CPP, DeriveDataTypeable, DeriveGeneric + , DeriveFunctor , PatternSynonyms #-} + +-- | LaTeX syntax description in the definition of the 'LaTeX' datatype. +-- If you want to add new commands or environments not defined in +-- the library, import this module and use 'LaTeX' data constructors. +module Text.LaTeX.Base.Syntax.WithParm ( -- * @LaTeX@ datatype - S.MathType (..) - , Measure - , pattern Pt - , pattern Mm - , pattern Cm - , pattern In - , pattern Ex - , pattern Em - , pattern CustomMeasure - - , LaTeX - , pattern TeXRaw - , pattern TeXComm - , pattern TeXCommS - , pattern TeXEnv - , pattern TeXMath - , pattern TeXLineBreak - , pattern TeXBraces - , pattern TeXComment - , pattern TeXSeq - , pattern TeXEmpty - - , TeXArg - , pattern FixArg - , pattern OptArg - , pattern MOptArg - , pattern SymArg - , pattern MSymArg - , pattern ParArg - , pattern MParArg - , (<>), S.between + MeasureL (..) , Measure + , MathType (..) + , LaTeXL (..) + , LaTeX , pattern TeXRaw , pattern TeXComm , pattern TeXCommS + , pattern TeXEnv , pattern TeXMath + , TeXArgL (..) , TeXArg + , (<>), between -- * Escaping reserved characters - , S.protectString - , S.protectText + , protectString + , protectText -- * Syntax analysis - , S.matchCommand - , S.lookForCommand - , S.matchEnv - , S.lookForEnv - , S.texmap - , S.texmapM + , matchCommand + , lookForCommand + , matchEnv + , lookForEnv + , texmap + , texmapM -- ** Utils - , S.getBody - , S.getPreamble + , getBody + , getPreamble ) where -import Data.Text(Text) +import Data.Text (Text,pack) +import qualified Data.Text +import qualified Data.Semigroup as Semigroup +import Data.String +import Control.Applicative +import Control.Monad (replicateM) +import Data.Functor.Identity (runIdentity) +import Data.Data (Data) +import Data.Typeable +import Test.QuickCheck +import Data.Hashable +import GHC.Generics (Generic) +#if !MIN_VERSION_base(4,11,0) +import Data.Monoid +#endif + +-- | Measure units defined in LaTeX. Use 'CustomMeasure' to use commands like 'textwidth'. +-- For instance: +-- +-- > rule Nothing (CustomMeasure linewidth) (Pt 2) +-- +-- This will create a black box (see 'rule') as wide as the text and two points tall. +-- +data MeasureL a = + Pt Double -- ^ A point is 1/72.27 inch, that means about 0.0138 inch or 0.3515 mm. + | Mm Double -- ^ Millimeter. + | Cm Double -- ^ Centimeter. + | In Double -- ^ Inch. + | Ex Double -- ^ The height of an \"x\" in the current font. + | Em Double -- ^ The width of an \"M\" in the current font. + | CustomMeasure (LaTeXL a) -- ^ You can introduce a 'LaTeX' expression as a measure. + deriving (Data, Eq, Generic, Show, Typeable, Functor) + +-- | Different types of syntax for mathematical expressions. +data MathType = Parentheses | Square | Dollar | DoubleDollar | LHSInline + deriving (Data, Eq, Generic, Show, Typeable) + +-- | Type of @LaTeX@ blocks with additional information of type @a@ annotated +-- through the tree. This is used, for example, to track source location +-- on the parser. If you wish to use the AST with no annotations, see 'LaTeX'. +data LaTeXL a = + TeXRawL a Text -- ^ Raw text, first argument is the location in the source file. + | TeXCommL a String [TeXArgL a] -- ^ Constructor for commands. + -- First is the location in the source + -- Second argument is the name of the command. + -- Third, its arguments. + | TeXCommSL a String -- ^ Constructor for commands with no arguments. + -- When rendering, no space or @{}@ will be added at + -- the end. + | TeXEnvL a a String [TeXArgL a] (LaTeXL a) -- ^ Constructor for environments. + -- First two arguments are the locations of + -- its \begin and \end; then the name of the environment. + -- Fourth, its arguments. + -- Fifth, its content. + | TeXMathL a MathType (LaTeXL a) -- ^ Mathematical expressions. + | TeXLineBreak (Maybe (MeasureL a)) Bool -- ^ Line break command. + | TeXBraces (LaTeXL a) -- ^ A expression between braces. + | TeXComment Text -- ^ Comments. + | TeXSeq (LaTeXL a) (LaTeXL a) -- ^ Sequencing of 'LaTeXL' expressions. + -- Use '<>' preferably. + | TeXEmpty -- ^ An empty block. + -- /Neutral element/ of '<>'. + deriving (Data, Eq, Generic, Show, Typeable, Functor) + +-- | Type of @LaTeX@ blocks without source locations. +type LaTeX = LaTeXL () -import qualified Text.LaTeX.Base.Syntax.WithParm as S +-- | Type of @LaTeX@ arguments without source locations. +type TeXArg = TeXArgL () -type Measure = S.Measure () -type LaTeX = S.LaTeX () -type TeXArg = S.TeXArg () +-- | Type of @LaTeX@ measures without source locations. +type Measure = MeasureL () -{-# COMPLETE TeXRaw, TeXComm , TeXCommS , TeXEnv , TeXMath , TeXLineBreak - , TeXBraces , TeXSeq , TeXEmpty #-} + +{-# COMPLETE TeXRaw, TeXComm , TeXCommS , TeXEnv , TeXMath , TeXLineBreak , TeXBraces , TeXSeq , TeXEmpty #-} + +-- | Same as 'TeXRawL' but defaults @a@ to @()@ pattern TeXRaw :: Text -> LaTeX -pattern TeXRaw t = S.TeXRaw () t +pattern TeXRaw t = TeXRawL () t +-- | Same as 'TeXCommL' but defaults @a@ to @()@ pattern TeXComm :: String -> [TeXArg] -> LaTeX -pattern TeXComm s l = S.TeXComm () s l +pattern TeXComm s l = TeXCommL () s l +-- | Same as 'TeXCommSL' but defaults @a@ to @()@ pattern TeXCommS :: String -> LaTeX -pattern TeXCommS s = S.TeXCommS () s +pattern TeXCommS s = TeXCommSL () s +-- | Same as 'TeXEnv' but defaults @a@ to @()@ pattern TeXEnv :: String -> [TeXArg] -> LaTeX -> LaTeX -pattern TeXEnv s l e = S.TeXEnv () () s l e +pattern TeXEnv s l e = TeXEnvL () () s l e + +-- | Same as 'TeXMath' but defaults @a@ to @()@ +pattern TeXMath :: MathType -> LaTeX -> LaTeX +pattern TeXMath m e = TeXMathL () m e + +-- | An argument for a 'LaTeXL' command or environment that can carry additional +-- information through the parameter @a@. See 'TeXArg' if you wish to ignore the +-- parameter @a@ entirely. +data TeXArgL a = + FixArg (LaTeXL a) -- ^ Fixed argument. + | OptArg (LaTeXL a) -- ^ Optional argument. + | MOptArg [LaTeXL a] -- ^ Multiple optional argument. + | SymArg (LaTeXL a) -- ^ An argument enclosed between @\<@ and @\>@. + | MSymArg [LaTeXL a] -- ^ Version of 'SymArg' with multiple options. + | ParArg (LaTeXL a) -- ^ An argument enclosed between @(@ and @)@. + | MParArg [LaTeXL a] -- ^ Version of 'ParArg' with multiple options. + deriving (Data, Eq, Generic, Show, Typeable, Functor) + +-- Monoid instance for 'LaTeXL'. + +-- | Method 'mappend' is strict in both arguments (except in the case when the first argument is 'TeXEmpty'). +instance Monoid (LaTeXL a) where + mempty = TeXEmpty + mappend TeXEmpty x = x + mappend x TeXEmpty = x + -- This equation is to make 'mappend' associative. + mappend (TeXSeq x y) z = TeXSeq x $ mappend y z + -- + mappend x y = TeXSeq x y + +instance Semigroup.Semigroup (LaTeXL a) where + (<>) = mappend + +-- | Calling 'between' @c l1 l2@ puts @c@ between @l1@ and @l2@ and +-- appends them. +-- +-- > between c l1 l2 = l1 <> c <> l2 +between :: Monoid m => m -> m -> m -> m +between c l1 l2 = l1 <> c <> l2 + +-- | Method 'fromString' escapes LaTeX reserved characters using 'protectString'. +instance IsString (LaTeXL ()) where + fromString = TeXRawL () . fromString . protectString + +-- | Escape LaTeX reserved characters in a 'String'. +protectString :: String -> String +protectString = mconcat . fmap protectChar + +-- | Escape LaTeX reserved characters in a 'Text'. +protectText :: Text -> Text +protectText = Data.Text.concatMap (fromString . protectChar) + +protectChar :: Char -> String +protectChar '#' = "\\#" +protectChar '$' = "\\$" +protectChar '%' = "\\%" +protectChar '^' = "\\^{}" +protectChar '&' = "\\&" +protectChar '{' = "\\{" +protectChar '}' = "\\}" +protectChar '~' = "\\~{}" +protectChar '\\' = "\\textbackslash{}" +protectChar '_' = "\\_{}" +protectChar x = [x] + +-- Syntax analysis -pattern TeXMath :: S.MathType -> LaTeX -> LaTeX -pattern TeXMath m e = S.TeXMath () m e +-- | Look into a 'LaTeXL' syntax tree to find any call to the command with +-- the given name. It returns a list of arguments with which this command +-- is called. +-- +-- > lookForCommand = (fmap snd .) . matchCommand . (==) +-- +-- If the returned list is empty, the command was not found. However, +-- if the list contains empty lists, those are callings to the command +-- with no arguments. +-- +-- For example +-- +-- > lookForCommand "author" l +-- +-- would look for the argument passed to the @\\author@ command in @l@. +lookForCommand :: String -- ^ Name of the command. + -> LaTeXL a -- ^ LaTeX syntax tree. + -> [[TeXArgL a]] -- ^ List of arguments passed to the command. +lookForCommand = (fmap snd .) . matchCommand . (==) -pattern TeXLineBreak :: Maybe (Measure) -> Bool -> LaTeX -pattern TeXLineBreak m e = S.TeXLineBreak () m e +-- | Traverse a 'LaTeXL' syntax tree and returns the commands (see 'TeXComm' and +-- 'TeXCommS') that matches the condition and their arguments in each call. +matchCommand :: (String -> Bool) -> LaTeXL a -> [(String,[TeXArgL a])] +matchCommand f (TeXCommL _ str as) = + let xs = concatMap (matchCommandArg f) as + in if f str then (str,as) : xs else xs +matchCommand f (TeXCommSL _ str) = [(str, []) | f str] +matchCommand f (TeXEnvL _ _ _ as l) = + let xs = concatMap (matchCommandArg f) as + in xs ++ matchCommand f l +matchCommand f (TeXMathL _ _ l) = matchCommand f l +matchCommand f (TeXBraces l) = matchCommand f l +matchCommand f (TeXSeq l1 l2) = matchCommand f l1 ++ matchCommand f l2 +matchCommand _ _ = [] -pattern TeXComment :: Text -> LaTeX -pattern TeXComment t = S.TeXComment () t +matchCommandArg :: (String -> Bool) -> TeXArgL a -> [(String,[TeXArgL a])] +matchCommandArg f (OptArg l ) = matchCommand f l +matchCommandArg f (FixArg l ) = matchCommand f l +matchCommandArg f (MOptArg ls) = concatMap (matchCommand f) ls +matchCommandArg f (SymArg l ) = matchCommand f l +matchCommandArg f (MSymArg ls) = concatMap (matchCommand f) ls +matchCommandArg f (ParArg l ) = matchCommand f l +matchCommandArg f (MParArg ls) = concatMap (matchCommand f) ls -pattern TeXBraces :: LaTeX -> LaTeX -pattern TeXBraces t = S.TeXBraces () t +-- | Similar to 'lookForCommand', but applied to environments. +-- It returns a list with arguments passed and content of the +-- environment in each call. +-- +-- > lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) +-- +lookForEnv :: String -> LaTeXL a -> [([TeXArgL a],LaTeXL a)] +lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) -pattern TeXSeq :: LaTeX -> LaTeX -> LaTeX -pattern TeXSeq m n = S.TeXSeq m n +-- | Traverse a 'LaTeXL' syntax tree and returns the environments (see +-- 'TeXEnv') that matches the condition, their arguments and their content +-- in each call. +matchEnv :: (String -> Bool) -> LaTeXL a -> [(String,[TeXArgL a],LaTeXL a)] +matchEnv f (TeXCommL _ _ as) = concatMap (matchEnvArg f) as +matchEnv f (TeXEnvL _ _ str as l) = + let xs = concatMap (matchEnvArg f) as + ys = matchEnv f l + zs = xs ++ ys + in if f str then (str,as,l) : zs else zs +matchEnv f (TeXMathL _ _ l) = matchEnv f l +matchEnv f (TeXBraces l) = matchEnv f l +matchEnv f (TeXSeq l1 l2) = matchEnv f l1 ++ matchEnv f l2 +matchEnv _ _ = [] -pattern TeXEmpty :: LaTeX -pattern TeXEmpty = S.TeXEmpty +matchEnvArg :: (String -> Bool) -> TeXArgL a -> [(String,[TeXArgL a],LaTeXL a)] +matchEnvArg f (OptArg l ) = matchEnv f l +matchEnvArg f (FixArg l ) = matchEnv f l +matchEnvArg f (MOptArg ls) = concatMap (matchEnv f) ls +matchEnvArg f (SymArg l ) = matchEnv f l +matchEnvArg f (MSymArg ls) = concatMap (matchEnv f) ls +matchEnvArg f (ParArg l ) = matchEnv f l +matchEnvArg f (MParArg ls) = concatMap (matchEnv f) ls -{-# COMPLETE FixArg , OptArg , MOptArg , SymArg , MSymArg , ParArg , MParArg #-} -pattern FixArg :: LaTeX -> TeXArg -pattern FixArg m = S.FixArg () m +-- | The function 'texmap' looks for subexpressions that match a given +-- condition and applies a function to them. +-- +-- > texmap c f = runIdentity . texmapM c (pure . f) +texmap :: (LaTeXL a -> Bool) -- ^ Condition. + -> (LaTeXL a -> LaTeXL a) -- ^ Function to apply when the condition matches. + -> LaTeXL a -> LaTeXL a +texmap c f = runIdentity . texmapM c (pure . f) -pattern OptArg :: LaTeX -> TeXArg -pattern OptArg x = S.OptArg () x +-- | Version of 'texmap' where the function returns values in a 'Monad'. +texmapM :: (Applicative m, Monad m) + => (LaTeXL a -> Bool) -- ^ Condition. + -> (LaTeXL a -> m (LaTeXL a)) -- ^ Function to apply when the condition matches. + -> LaTeXL a -> m (LaTeXL a) +texmapM c f = go + where + go l@(TeXCommL a str as) = if c l then f l else TeXCommL a str <$> mapM go' as + go l@(TeXEnvL a a' str as b) = if c l then f l else TeXEnvL a a' str <$> mapM go' as <*> go b + go l@(TeXMathL a t b) = if c l then f l else TeXMathL a t <$> go b + go l@(TeXBraces b) = if c l then f l else TeXBraces <$> go b + go l@(TeXSeq l1 l2) = if c l then f l else liftA2 TeXSeq (go l1) (go l2) + go l = if c l then f l else pure l + -- + go' (FixArg l ) = FixArg <$> go l + go' (OptArg l ) = OptArg <$> go l + go' (MOptArg ls) = MOptArg <$> mapM go ls + go' (SymArg l ) = SymArg <$> go l + go' (MSymArg ls) = MSymArg <$> mapM go ls + go' (ParArg l ) = ParArg <$> go l + go' (MParArg ls) = MParArg <$> mapM go ls -pattern MOptArg :: [LaTeX] -> TeXArg -pattern MOptArg x = S.MOptArg () x +-- | Extract the content of the 'document' environment, if present. +getBody :: LaTeXL a -> Maybe (LaTeXL a) +getBody l = + case lookForEnv "document" l of + ((_,b):_) -> Just b + _ -> Nothing -pattern SymArg :: LaTeX -> TeXArg -pattern SymArg x = S.SymArg () x +-- | Extract the preamble of a 'LaTeXL' document (everything before the 'document' +-- environment). It could be empty. +getPreamble :: LaTeXL a -> LaTeXL a +getPreamble (TeXEnvL _ _ "document" _ _) = mempty +getPreamble (TeXSeq l1 l2) = getPreamble l1 <> getPreamble l2 +getPreamble l = l -pattern MSymArg :: [LaTeX] -> TeXArg -pattern MSymArg x = S.MSymArg () x +--------------------------------------- +-- LaTeXL Arbitrary instance -pattern ParArg :: LaTeX -> TeXArg -pattern ParArg x = S.ParArg () x +arbitraryChar :: Gen Char +arbitraryChar = elements $ + ['A'..'Z'] + ++ ['a'..'z'] + ++ "\n-+*/!\"().,:;'@<>? " -pattern MParArg :: [LaTeX] -> TeXArg -pattern MParArg x = S.MParArg () x +-- | Utility for the instance of 'LaTeXL' to 'Arbitrary'. +-- We generate a short sequence of characters and +-- escape reserved characters with 'protectText'. +arbitraryRaw :: Gen Text +arbitraryRaw = do + n <- choose (1,20) + protectText . pack <$> replicateM n arbitraryChar -{-# COMPLETE Pt , Mm , Cm , In , Ex , Em , CustomMeasure #-} -pattern Pt :: Double -> Measure -pattern Pt x = S.Pt x +-- | Generator for names of command and environments. +-- We use only alphabetical characters. +arbitraryName :: Gen String +arbitraryName = do + n <- choose (1,10) + replicateM n $ elements $ ['a' .. 'z'] ++ ['A' .. 'Z'] -pattern Mm :: Double -> Measure -pattern Mm x = S.Mm x +instance Arbitrary (MeasureL a) where + arbitrary = do + n <- choose (0,5) + let f = [Pt,Mm,Cm,In,Ex,Em] !! n + f <$> arbitrary -pattern Cm :: Double -> Measure -pattern Cm x = S.Cm x +instance Arbitrary a => Arbitrary (LaTeXL a) where + arbitrary = arbitraryLaTeXL False -pattern In :: Double -> Measure -pattern In x = S.In x +arbitraryLaTeXL :: Arbitrary a => Bool -> Gen (LaTeXL a) +arbitraryLaTeXL inDollar = do + -- We give more chances to 'TeXRaw'. + -- This results in arbitrary 'LaTeXL' values + -- not getting too large. + n <- choose (0,16 :: Int) + case n of + 0 -> if inDollar then arbitraryLaTeXL True else pure TeXEmpty + 1 -> do m <- choose (0,5) + TeXCommL <$> arbitrary <*> arbitraryName <*> vectorOf m arbitrary + 2 -> TeXCommSL <$> arbitrary <*> arbitraryName + 3 -> do m <- choose (0,5) + TeXEnvL <$> arbitrary <*> arbitrary <*> arbitraryName <*> vectorOf m arbitrary <*> arbitrary + 4 -> if inDollar + then arbitraryLaTeXL True + else do m <- choose (0,3) + let t = [Parentheses,Square,Dollar,DoubleDollar] !! m + TeXMathL <$> arbitrary <*> pure t <*> arbitraryLaTeXL (t == Dollar || t == DoubleDollar) + 5 -> TeXLineBreak <$> arbitrary <*> arbitrary + 6 -> TeXBraces <$> arbitrary + 7 -> TeXComment <$> arbitraryRaw + 8 -> TeXSeq <$> (if inDollar then arbitraryLaTeXL True else arbitrary) <*> arbitrary + _ -> TeXRawL <$> arbitrary <*> arbitraryRaw -pattern Ex :: Double -> Measure -pattern Ex x = S.Ex x +instance Arbitrary a => Arbitrary (TeXArgL a) where + arbitrary = do + n <- choose (0,6 :: Int) + case n of + 0 -> OptArg <$> arbitrary + 1 -> do m <- choose (1,5) + MOptArg <$> vectorOf m arbitrary + 2 -> SymArg <$> arbitrary + 3 -> do m <- choose (1,5) + MSymArg <$> vectorOf m arbitrary + 4 -> ParArg <$> arbitrary + 5 -> do m <- choose (1,5) + MParArg <$> vectorOf m arbitrary + _ -> FixArg <$> arbitrary -pattern Em :: Double -> Measure -pattern Em x = S.Em x -pattern CustomMeasure :: LaTeX -> Measure -pattern CustomMeasure x = S.CustomMeasure x +instance Hashable a => Hashable (MeasureL a) +instance Hashable MathType +instance Hashable a => Hashable (TeXArgL a) +instance Hashable a => Hashable (LaTeXL a) diff --git a/Text/LaTeX/Base/Syntax/WithParm.hs b/Text/LaTeX/Base/Syntax/WithParm.hs deleted file mode 100644 index 7c59acf..0000000 --- a/Text/LaTeX/Base/Syntax/WithParm.hs +++ /dev/null @@ -1,347 +0,0 @@ -{-# LANGUAGE FlexibleInstances, CPP, DeriveDataTypeable, DeriveGeneric, DeriveFunctor #-} - --- | LaTeX syntax description in the definition of the 'LaTeX' datatype. --- If you want to add new commands or environments not defined in --- the library, import this module and use 'LaTeX' data constructors. -module Text.LaTeX.Base.Syntax.WithParm - ( -- * @LaTeX@ datatype - Measure (..) - , MathType (..) - , LaTeX (..) - , TeXArg (..) - , (<>), between - -- * Escaping reserved characters - , protectString - , protectText - -- * Syntax analysis - , matchCommand - , lookForCommand - , matchEnv - , lookForEnv - , texmap - , texmapM - -- ** Utils - , getBody - , getPreamble - ) where - -import Data.Text (Text,pack) -import qualified Data.Text -import qualified Data.Semigroup as Semigroup -import Data.String -import Control.Applicative -import Control.Monad (replicateM) -import Data.Functor.Identity (runIdentity) -import Data.Data (Data) -import Data.Typeable -import Test.QuickCheck -import Data.Hashable -import GHC.Generics (Generic) -#if !MIN_VERSION_base(4,11,0) -import Data.Monoid -#endif - --- | Measure units defined in LaTeX. Use 'CustomMeasure' to use commands like 'textwidth'. --- For instance: --- --- > rule Nothing (CustomMeasure linewidth) (Pt 2) --- --- This will create a black box (see 'rule') as wide as the text and two points tall. --- -data Measure a = - Pt Double -- ^ A point is 1/72.27 inch, that means about 0.0138 inch or 0.3515 mm. - | Mm Double -- ^ Millimeter. - | Cm Double -- ^ Centimeter. - | In Double -- ^ Inch. - | Ex Double -- ^ The height of an \"x\" in the current font. - | Em Double -- ^ The width of an \"M\" in the current font. - | CustomMeasure (LaTeX a) -- ^ You can introduce a 'LaTeX' expression as a measure. - deriving (Data, Eq, Generic, Show, Typeable, Functor) - --- | Different types of syntax for mathematical expressions. -data MathType = Parentheses | Square | Dollar | DoubleDollar | LHSInline - deriving (Data, Eq, Generic, Show, Typeable) - --- | Type of @LaTeX@ blocks. The @a@ type variable can be used for --- a number of things but mainly stores source location. -data LaTeX a = - TeXRaw a Text -- ^ Raw text. - | TeXComm a String [TeXArg a] -- ^ Constructor for commands. - -- First argument is the name of the command. - -- Second, its arguments. - | TeXCommS a String -- ^ Constructor for commands with no arguments. - -- When rendering, no space or @{}@ will be added at - -- the end. - | TeXEnv a a String [TeXArg a] (LaTeX a) -- ^ Constructor for environments. - -- First argument is the name of the environment. - -- Second, its arguments. - -- Third, its content. - | TeXMath a MathType (LaTeX a) -- ^ Mathematical expressions. - | TeXLineBreak a (Maybe (Measure a)) Bool -- ^ Line break command. - | TeXBraces a (LaTeX a) -- ^ A expression between braces. - | TeXComment a Text -- ^ Comments. - | TeXSeq (LaTeX a) (LaTeX a) -- ^ Sequencing of 'LaTeX' expressions. - -- Use '<>' preferably. - | TeXEmpty -- ^ An empty block. - -- /Neutral element/ of '<>'. - deriving (Data, Eq, Generic, Show, Typeable, Functor) - --- | An argument for a 'LaTeX' command or environment. -data TeXArg a = - FixArg a (LaTeX a) -- ^ Fixed argument. - | OptArg a (LaTeX a) -- ^ Optional argument. - | MOptArg a [LaTeX a] -- ^ Multiple optional argument. - | SymArg a (LaTeX a) -- ^ An argument enclosed between @\<@ and @\>@. - | MSymArg a [LaTeX a] -- ^ Version of 'SymArg' with multiple options. - | ParArg a (LaTeX a) -- ^ An argument enclosed between @(@ and @)@. - | MParArg a [LaTeX a] -- ^ Version of 'ParArg' with multiple options. - deriving (Data, Eq, Generic, Show, Typeable, Functor) - --- Monoid instance for 'LaTeX'. - --- | Method 'mappend' is strict in both arguments (except in the case when the first argument is 'TeXEmpty'). -instance Monoid (LaTeX a) where - mempty = TeXEmpty - mappend TeXEmpty x = x - mappend x TeXEmpty = x - -- This equation is to make 'mappend' associative. - mappend (TeXSeq x y) z = TeXSeq x $ mappend y z - -- - mappend x y = TeXSeq x y - -instance Semigroup.Semigroup (LaTeX a) where - (<>) = mappend - --- | Calling 'between' @c l1 l2@ puts @c@ between @l1@ and @l2@ and --- appends them. --- --- > between c l1 l2 = l1 <> c <> l2 -between :: Monoid m => m -> m -> m -> m -between c l1 l2 = l1 <> c <> l2 - --- | Method 'fromString' escapes LaTeX reserved characters using 'protectString'. -instance IsString (LaTeX ()) where - fromString = TeXRaw () . fromString . protectString - --- | Escape LaTeX reserved characters in a 'String'. -protectString :: String -> String -protectString = mconcat . fmap protectChar - --- | Escape LaTeX reserved characters in a 'Text'. -protectText :: Text -> Text -protectText = Data.Text.concatMap (fromString . protectChar) - -protectChar :: Char -> String -protectChar '#' = "\\#" -protectChar '$' = "\\$" -protectChar '%' = "\\%" -protectChar '^' = "\\^{}" -protectChar '&' = "\\&" -protectChar '{' = "\\{" -protectChar '}' = "\\}" -protectChar '~' = "\\~{}" -protectChar '\\' = "\\textbackslash{}" -protectChar '_' = "\\_{}" -protectChar x = [x] - --- Syntax analysis - --- | Look into a 'LaTeX' syntax tree to find any call to the command with --- the given name. It returns a list of arguments with which this command --- is called. --- --- > lookForCommand = (fmap snd .) . matchCommand . (==) --- --- If the returned list is empty, the command was not found. However, --- if the list contains empty lists, those are callings to the command --- with no arguments. --- --- For example --- --- > lookForCommand "author" l --- --- would look for the argument passed to the @\\author@ command in @l@. -lookForCommand :: String -- ^ Name of the command. - -> LaTeX a -- ^ LaTeX syntax tree. - -> [[TeXArg a]] -- ^ List of arguments passed to the command. -lookForCommand = (fmap snd .) . matchCommand . (==) - --- | Traverse a 'LaTeX' syntax tree and returns the commands (see 'TeXComm' and --- 'TeXCommS') that matches the condition and their arguments in each call. -matchCommand :: (String -> Bool) -> LaTeX a -> [(String,[TeXArg a])] -matchCommand f (TeXComm _ str as) = - let xs = concatMap (matchCommandArg f) as - in if f str then (str,as) : xs else xs -matchCommand f (TeXCommS _ str) = [(str, []) | f str] -matchCommand f (TeXEnv _ _ _ as l) = - let xs = concatMap (matchCommandArg f) as - in xs ++ matchCommand f l -matchCommand f (TeXMath _ _ l) = matchCommand f l -matchCommand f (TeXBraces _ l) = matchCommand f l -matchCommand f (TeXSeq l1 l2) = matchCommand f l1 ++ matchCommand f l2 -matchCommand _ _ = [] - -matchCommandArg :: (String -> Bool) -> TeXArg a -> [(String,[TeXArg a])] -matchCommandArg f (OptArg _ l ) = matchCommand f l -matchCommandArg f (FixArg _ l ) = matchCommand f l -matchCommandArg f (MOptArg _ ls) = concatMap (matchCommand f) ls -matchCommandArg f (SymArg _ l ) = matchCommand f l -matchCommandArg f (MSymArg _ ls) = concatMap (matchCommand f) ls -matchCommandArg f (ParArg _ l ) = matchCommand f l -matchCommandArg f (MParArg _ ls) = concatMap (matchCommand f) ls - --- | Similar to 'lookForCommand', but applied to environments. --- It returns a list with arguments passed and content of the --- environment in each call. --- --- > lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) --- -lookForEnv :: String -> LaTeX a -> [([TeXArg a],LaTeX a)] -lookForEnv = (fmap (\(_,as,l) -> (as,l)) .) . matchEnv . (==) - --- | Traverse a 'LaTeX' syntax tree and returns the environments (see --- 'TeXEnv') that matches the condition, their arguments and their content --- in each call. -matchEnv :: (String -> Bool) -> LaTeX a -> [(String,[TeXArg a],LaTeX a)] -matchEnv f (TeXComm _ _ as) = concatMap (matchEnvArg f) as -matchEnv f (TeXEnv _ _ str as l) = - let xs = concatMap (matchEnvArg f) as - ys = matchEnv f l - zs = xs ++ ys - in if f str then (str,as,l) : zs else zs -matchEnv f (TeXMath _ _ l) = matchEnv f l -matchEnv f (TeXBraces _ l) = matchEnv f l -matchEnv f (TeXSeq l1 l2) = matchEnv f l1 ++ matchEnv f l2 -matchEnv _ _ = [] - -matchEnvArg :: (String -> Bool) -> TeXArg a -> [(String,[TeXArg a],LaTeX a)] -matchEnvArg f (OptArg _ l ) = matchEnv f l -matchEnvArg f (FixArg _ l ) = matchEnv f l -matchEnvArg f (MOptArg _ ls) = concatMap (matchEnv f) ls -matchEnvArg f (SymArg _ l ) = matchEnv f l -matchEnvArg f (MSymArg _ ls) = concatMap (matchEnv f) ls -matchEnvArg f (ParArg _ l ) = matchEnv f l -matchEnvArg f (MParArg _ ls) = concatMap (matchEnv f) ls - --- | The function 'texmap' looks for subexpressions that match a given --- condition and applies a function to them. --- --- > texmap c f = runIdentity . texmapM c (pure . f) -texmap :: (LaTeX a -> Bool) -- ^ Condition. - -> (LaTeX a -> LaTeX a) -- ^ Function to apply when the condition matches. - -> LaTeX a -> LaTeX a -texmap c f = runIdentity . texmapM c (pure . f) - --- | Version of 'texmap' where the function returns values in a 'Monad'. -texmapM :: (Applicative m, Monad m) - => (LaTeX a -> Bool) -- ^ Condition. - -> (LaTeX a -> m (LaTeX a)) -- ^ Function to apply when the condition matches. - -> LaTeX a -> m (LaTeX a) -texmapM c f = go - where - go l@(TeXComm a str as) = if c l then f l else TeXComm a str <$> mapM go' as - go l@(TeXEnv a a' str as b) = if c l then f l else TeXEnv a a' str <$> mapM go' as <*> go b - go l@(TeXMath a t b) = if c l then f l else TeXMath a t <$> go b - go l@(TeXBraces a b) = if c l then f l else TeXBraces a <$> go b - go l@(TeXSeq l1 l2) = if c l then f l else liftA2 TeXSeq (go l1) (go l2) - go l = if c l then f l else pure l - -- - go' (FixArg a l ) = FixArg a <$> go l - go' (OptArg a l ) = OptArg a <$> go l - go' (MOptArg a ls) = MOptArg a <$> mapM go ls - go' (SymArg a l ) = SymArg a <$> go l - go' (MSymArg a ls) = MSymArg a <$> mapM go ls - go' (ParArg a l ) = ParArg a <$> go l - go' (MParArg a ls) = MParArg a <$> mapM go ls - --- | Extract the content of the 'document' environment, if present. -getBody :: LaTeX a -> Maybe (LaTeX a) -getBody l = - case lookForEnv "document" l of - ((_,b):_) -> Just b - _ -> Nothing - --- | Extract the preamble of a 'LaTeX' document (everything before the 'document' --- environment). It could be empty. -getPreamble :: LaTeX a -> LaTeX a -getPreamble (TeXEnv _ _ "document" _ _) = mempty -getPreamble (TeXSeq l1 l2) = getPreamble l1 <> getPreamble l2 -getPreamble l = l - ---------------------------------------- --- LaTeX Arbitrary instance - -arbitraryChar :: Gen Char -arbitraryChar = elements $ - ['A'..'Z'] - ++ ['a'..'z'] - ++ "\n-+*/!\"().,:;'@<>? " - --- | Utility for the instance of 'LaTeX' to 'Arbitrary'. --- We generate a short sequence of characters and --- escape reserved characters with 'protectText'. -arbitraryRaw :: Gen Text -arbitraryRaw = do - n <- choose (1,20) - protectText . pack <$> replicateM n arbitraryChar - --- | Generator for names of command and environments. --- We use only alphabetical characters. -arbitraryName :: Gen String -arbitraryName = do - n <- choose (1,10) - replicateM n $ elements $ ['a' .. 'z'] ++ ['A' .. 'Z'] - -instance Arbitrary (Measure a) where - arbitrary = do - n <- choose (0,5) - let f = [Pt,Mm,Cm,In,Ex,Em] !! n - f <$> arbitrary - -instance Arbitrary a => Arbitrary (LaTeX a) where - arbitrary = arbitraryLaTeX False - -arbitraryLaTeX :: Arbitrary a => Bool -> Gen (LaTeX a) -arbitraryLaTeX inDollar = do - -- We give more chances to 'TeXRaw'. - -- This results in arbitrary 'LaTeX' values - -- not getting too large. - n <- choose (0,16 :: Int) - case n of - 0 -> if inDollar then arbitraryLaTeX True else pure TeXEmpty - 1 -> do m <- choose (0,5) - TeXComm <$> arbitrary <*> arbitraryName <*> vectorOf m arbitrary - 2 -> TeXCommS <$> arbitrary <*> arbitraryName - 3 -> do m <- choose (0,5) - TeXEnv <$> arbitrary <*> arbitrary <*> arbitraryName <*> vectorOf m arbitrary <*> arbitrary - 4 -> if inDollar - then arbitraryLaTeX True - else do m <- choose (0,3) - let t = [Parentheses,Square,Dollar,DoubleDollar] !! m - TeXMath <$> arbitrary <*> pure t <*> arbitraryLaTeX (t == Dollar || t == DoubleDollar) - 5 -> TeXLineBreak <$> arbitrary <*> arbitrary <*> arbitrary - 6 -> TeXBraces <$> arbitrary <*> arbitrary - 7 -> TeXComment <$> arbitrary <*> arbitraryRaw - 8 -> TeXSeq <$> (if inDollar then arbitraryLaTeX True else arbitrary) <*> arbitrary - _ -> TeXRaw <$> arbitrary <*> arbitraryRaw - -instance Arbitrary a => Arbitrary (TeXArg a) where - arbitrary = do - n <- choose (0,6 :: Int) - case n of - 0 -> OptArg <$> arbitrary <*> arbitrary - 1 -> do m <- choose (1,5) - MOptArg <$> arbitrary <*> vectorOf m arbitrary - 2 -> SymArg <$> arbitrary <*> arbitrary - 3 -> do m <- choose (1,5) - MSymArg <$> arbitrary <*> vectorOf m arbitrary - 4 -> ParArg <$> arbitrary <*> arbitrary - 5 -> do m <- choose (1,5) - MParArg <$> arbitrary <*> vectorOf m arbitrary - _ -> FixArg <$> arbitrary <*> arbitrary - - -instance Hashable a => Hashable (Measure a) -instance Hashable MathType -instance Hashable a => Hashable (TeXArg a) -instance Hashable a => Hashable (LaTeX a) From 50dbf2681c7d503f8df06506b693794207078e33 Mon Sep 17 00:00:00 2001 From: Victor Miraldo Date: Mon, 15 Jun 2020 14:19:13 +0200 Subject: [PATCH 3/4] Propagated changes; tests pass. Will work on the parser now --- HaTeX.cabal | 1 - Text/LaTeX/Base/Render.hs | 54 +++++++++++++++++++-------------------- Text/LaTeX/Base/Syntax.hs | 10 +++++--- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/HaTeX.cabal b/HaTeX.cabal index 962e9ed..834df27 100644 --- a/HaTeX.cabal +++ b/HaTeX.cabal @@ -88,7 +88,6 @@ Library Text.LaTeX.Base.Pretty Text.LaTeX.Base.Render Text.LaTeX.Base.Syntax - Text.LaTeX.Base.Syntax.WithParm Text.LaTeX.Base.Texy Text.LaTeX.Base.Types Text.LaTeX.Base.Writer diff --git a/Text/LaTeX/Base/Render.hs b/Text/LaTeX/Base/Render.hs index abc1bca..cba2e24 100644 --- a/Text/LaTeX/Base/Render.hs +++ b/Text/LaTeX/Base/Render.hs @@ -22,7 +22,7 @@ module Text.LaTeX.Base.Render , showFloat ) where -import Text.LaTeX.Base.Syntax.WithParm +import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Class import Data.String import Data.List (intersperse) @@ -106,11 +106,11 @@ readFileTex = fmap decodeUtf8 . B.readFile -- /Warning: /'rendertex'/ does not escape LaTeX reserved characters./ -- /Use /'protectText'/ to escape them./ rendertex :: (Render a, LaTeXC l) => a -> l -rendertex = fromLaTeX . TeXRaw () . render +rendertex = fromLaTeX . TeXRawL () . render -- Render instances -instance (Show a) => Render (Measure a) where +instance (Show a) => Render (MeasureL a) where render (Pt x) = render x <> "pt" render (Mm x) = render x <> "mm" render (Cm x) = render x <> "cm" @@ -121,18 +121,18 @@ instance (Show a) => Render (Measure a) where -- LaTeX instances -instance (Show a) => Render (LaTeX a) where +instance (Show a) => Render (LaTeXL a) where - renderBuilder (TeXRaw _ t) = Builder.fromText t + renderBuilder (TeXRawL _ t) = Builder.fromText t - renderBuilder (TeXComm _ name []) = "\\" <> fromString name <> "{}" - renderBuilder (TeXComm _ name args) = + renderBuilder (TeXCommL _ name []) = "\\" <> fromString name <> "{}" + renderBuilder (TeXCommL _ name args) = "\\" <> fromString name <> renderAppendBuilder args - renderBuilder (TeXCommS _ name) = "\\" <> fromString name + renderBuilder (TeXCommSL _ name) = "\\" <> fromString name - renderBuilder (TeXEnv _ _ name args c) = + renderBuilder (TeXEnvL _ _ name args c) = "\\begin{" <> fromString name <> "}" @@ -142,16 +142,16 @@ instance (Show a) => Render (LaTeX a) where <> fromString name <> "}" - renderBuilder (TeXMath _ Dollar l) = "$" <> renderBuilder l <> "$" - renderBuilder (TeXMath _ DoubleDollar l) = "$$" <> renderBuilder l <> "$$" - renderBuilder (TeXMath _ Square l) = "\\[" <> renderBuilder l <> "\\]" - renderBuilder (TeXMath _ Parentheses l) = "\\(" <> renderBuilder l <> "\\)" + renderBuilder (TeXMathL _ Dollar l) = "$" <> renderBuilder l <> "$" + renderBuilder (TeXMathL _ DoubleDollar l) = "$$" <> renderBuilder l <> "$$" + renderBuilder (TeXMathL _ Square l) = "\\[" <> renderBuilder l <> "\\]" + renderBuilder (TeXMathL _ Parentheses l) = "\\(" <> renderBuilder l <> "\\)" - renderBuilder (TeXLineBreak _ m b) = "\\\\" <> maybe mempty (\x -> "[" <> renderBuilder x <> "]") m <> ( if b then "*" else mempty ) + renderBuilder (TeXLineBreak m b) = "\\\\" <> maybe mempty (\x -> "[" <> renderBuilder x <> "]") m <> ( if b then "*" else mempty ) - renderBuilder (TeXBraces _ l) = "{" <> renderBuilder l <> "}" + renderBuilder (TeXBraces l) = "{" <> renderBuilder l <> "}" - renderBuilder (TeXComment _ c) = + renderBuilder (TeXComment c) = let xs = Data.Text.lines c in if null xs then "%\n" else Builder.fromText $ Data.Text.unlines $ fmap ("%" <>) xs @@ -161,17 +161,17 @@ instance (Show a) => Render (LaTeX a) where render = renderDefault -instance (Show a) => Render (TeXArg a) where - renderBuilder (FixArg _ l) = "{" <> renderBuilder l <> "}" - renderBuilder (OptArg _ l) = "[" <> renderBuilder l <> "]" - renderBuilder (MOptArg _ []) = mempty - renderBuilder (MOptArg _ ls) = "[" <> renderCommasBuilder ls <> "]" - renderBuilder (SymArg _ l) = "<" <> renderBuilder l <> ">" - renderBuilder (MSymArg _ []) = mempty - renderBuilder (MSymArg _ ls) = "<" <> renderCommasBuilder ls <> ">" - renderBuilder (ParArg _ l) = "(" <> renderBuilder l <> ")" - renderBuilder (MParArg _ []) = mempty - renderBuilder (MParArg _ ls) = "(" <> renderCommasBuilder ls <> ")" +instance (Show a) => Render (TeXArgL a) where + renderBuilder (FixArg l) = "{" <> renderBuilder l <> "}" + renderBuilder (OptArg l) = "[" <> renderBuilder l <> "]" + renderBuilder (MOptArg []) = mempty + renderBuilder (MOptArg ls) = "[" <> renderCommasBuilder ls <> "]" + renderBuilder (SymArg l) = "<" <> renderBuilder l <> ">" + renderBuilder (MSymArg []) = mempty + renderBuilder (MSymArg ls) = "<" <> renderCommasBuilder ls <> ">" + renderBuilder (ParArg l) = "(" <> renderBuilder l <> ")" + renderBuilder (MParArg []) = mempty + renderBuilder (MParArg ls) = "(" <> renderCommasBuilder ls <> ")" render = renderDefault -- Other instances diff --git a/Text/LaTeX/Base/Syntax.hs b/Text/LaTeX/Base/Syntax.hs index 0b7742d..41b68cd 100644 --- a/Text/LaTeX/Base/Syntax.hs +++ b/Text/LaTeX/Base/Syntax.hs @@ -3,8 +3,12 @@ -- | LaTeX syntax description in the definition of the 'LaTeX' datatype. -- If you want to add new commands or environments not defined in --- the library, import this module and use 'LaTeX' data constructors. -module Text.LaTeX.Base.Syntax.WithParm +-- the library, import this module and use 'LaTeXL' data constructors. +-- +-- The 'LaTeX' type is defined as @LaTeXL ()@ and ignores the +-- additional information (such as source location) carried by 'LaTeXL'. +-- The constructors with an @L@ suffix carry said location data. +module Text.LaTeX.Base.Syntax ( -- * @LaTeX@ datatype MeasureL (..) , Measure , MathType (..) @@ -62,7 +66,7 @@ data MeasureL a = deriving (Data, Eq, Generic, Show, Typeable, Functor) -- | Different types of syntax for mathematical expressions. -data MathType = Parentheses | Square | Dollar | DoubleDollar | LHSInline +data MathType = Parentheses | Square | Dollar | DoubleDollar deriving (Data, Eq, Generic, Show, Typeable) -- | Type of @LaTeX@ blocks with additional information of type @a@ annotated From bead07af0a23dcad9a398fe75f92e37ad0d269e1 Mon Sep 17 00:00:00 2001 From: Victor Miraldo Date: Mon, 15 Jun 2020 14:41:50 +0200 Subject: [PATCH 4/4] Parser seems to be working alright! --- Text/LaTeX/Base/Parser.hs | 110 ++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/Text/LaTeX/Base/Parser.hs b/Text/LaTeX/Base/Parser.hs index ff14597..d40b646 100644 --- a/Text/LaTeX/Base/Parser.hs +++ b/Text/LaTeX/Base/Parser.hs @@ -25,6 +25,8 @@ module Text.LaTeX.Base.Parser ( -- * The parser parseLaTeX , parseLaTeXFile + , parseLaTeXPos + , parseLaTeXPosFile -- * Parsing errors , ParseError , errorPos @@ -42,6 +44,8 @@ module Text.LaTeX.Base.Parser ( , defaultParserConf , parseLaTeXWith , parseLaTeXFileWith + , parseLaTeXPosWith + , parseLaTeXPosFileWith -- * Parser combinators , Parser , latexParser @@ -97,26 +101,41 @@ type Parser = Parsec Text ParserConf -- | Parse a 'Text' sequence as a 'LaTeX' block. If it fails, it returns -- an error string. parseLaTeX :: Text -> Either ParseError LaTeX -parseLaTeX = parseLaTeXWith defaultParserConf +parseLaTeX = fmap (fmap (const ())) . parseLaTeXPos + +-- | Parse a 'Text' sequence as a 'LaTeXL' block. If it fails, it returns +-- an error string. +parseLaTeXPos :: Text -> Either ParseError (LaTeXL SourcePos) +parseLaTeXPos = parseLaTeXPosWith defaultParserConf parseLaTeXWith :: ParserConf -> Text -> Either ParseError LaTeX -parseLaTeXWith conf t +parseLaTeXWith conf = fmap (fmap (const ())) . parseLaTeXPosWith conf + +parseLaTeXPosWith :: ParserConf -> Text -> Either ParseError (LaTeXL SourcePos) +parseLaTeXPosWith conf t | T.null t = return TeXEmpty | otherwise = runParser latexParser conf "parseLaTeX input" t +-- | Read a file and parse it as 'LaTeX'. +parseLaTeXPosFile :: FilePath -> IO (Either ParseError (LaTeXL SourcePos)) +parseLaTeXPosFile = parseLaTeXPosFileWith defaultParserConf + -- | Read a file and parse it as 'LaTeX'. parseLaTeXFile :: FilePath -> IO (Either ParseError LaTeX) -parseLaTeXFile = parseLaTeXFileWith defaultParserConf +parseLaTeXFile = fmap (fmap $ fmap $ const ()) . parseLaTeXPosFileWith defaultParserConf parseLaTeXFileWith :: ParserConf -> FilePath -> IO (Either ParseError LaTeX) -parseLaTeXFileWith conf fp = runParser latexParser conf fp <$> readFileTex fp +parseLaTeXFileWith conf = fmap (fmap $ fmap $ const ()) . parseLaTeXFileWith conf + +parseLaTeXPosFileWith :: ParserConf -> FilePath -> IO (Either ParseError (LaTeXL SourcePos)) +parseLaTeXPosFileWith conf fp = runParser latexParser conf fp <$> readFileTex fp -- | The 'LaTeX' parser. -latexParser :: Parser LaTeX +latexParser :: Parser (LaTeXL SourcePos) latexParser = mconcat <$> latexBlockParser `manyTill` eof -- | Parser of a single 'LaTeX' constructor, no appending blocks. -latexBlockParser :: Parser LaTeX +latexBlockParser :: Parser (LaTeXL SourcePos) latexBlockParser = foldr1 (<|>) [ text "text" , dolMath "inline math ($)" @@ -134,48 +153,54 @@ latexBlockParser = foldr1 (<|>) nottext :: Set Char nottext = fromList "$%\\{]}" -text :: Parser LaTeX +text :: Parser (LaTeXL SourcePos) text = do + pos <- getPosition mbC <- peekChar case mbC of Nothing -> fail "text: Empty input." Just c | c `member` nottext -> fail "not text" - | otherwise -> TeXRaw <$> takeTill (`member` nottext) + | otherwise -> TeXRawL pos <$> takeTill (`member` nottext) ------------------------------------------------------------------------ -- Text without stopping on ']' ------------------------------------------------------------------------ -text2 :: Parser LaTeX +text2 :: Parser (LaTeXL SourcePos) text2 = do _ <- char ']' - t <- try (text <|> return (TeXRaw T.empty)) - return $ TeXRaw (T.pack "]") <> t + p <- getPosition + t <- try (text <|> return (TeXRawL p T.empty)) + return $ TeXRawL p (T.pack "]") <> t ------------------------------------------------------------------------ -- Environment ------------------------------------------------------------------------ -environment :: Parser LaTeX +environment :: Parser (LaTeXL SourcePos) environment = anonym <|> env -anonym :: Parser LaTeX +anonym :: Parser (LaTeXL SourcePos) anonym = do _ <- char '{' l <- TeXBraces . mconcat <$> many latexBlockParser _ <- char '}' return l -env :: Parser LaTeX +env :: Parser (LaTeXL SourcePos) env = do - n <- char '\\' *> envName "begin" + p0 <- getPosition + n <- char '\\' *> envName "begin" sps <- many $ char ' ' - let lsps = if null sps then mempty else TeXRaw $ T.pack sps + let lsps = if null sps then mempty else TeXRawL p0 $ T.pack sps as <- cmdArgs verbatims <- verbatimEnvironments <$> getState if n `elem` verbatims then let endenv = try $ string "\\end" >> spaces >> string ("{" <> n <> "}") - in TeXEnv n (fromMaybe [] as) . TeXRaw . T.pack <$> manyTill anyChar endenv - else do b <- envBody n - return $ TeXEnv n (fromMaybe [] as) $ + in do cont <- manyTill anyChar endenv + p1 <- getPosition + return $ TeXEnvL p0 p1 n (fromMaybe [] as) $ TeXRawL p0 (T.pack cont) + else do b <- envBody n + p1 <- getPosition + return $ TeXEnvL p0 p1 n (fromMaybe [] as) $ case as of Just [] -> lsps <> TeXBraces mempty <> b Nothing -> lsps <> b @@ -189,11 +214,11 @@ envName k = do _ <- char '}' return $ T.unpack n -envBody :: String -> Parser LaTeX +envBody :: String -> Parser (LaTeXL SourcePos) envBody n = mconcat <$> bodyBlock n `manyTill` endenv where endenv = try $ string "\\end" >> spaces >> string ("{" <> n <> "}") -bodyBlock :: String -> Parser LaTeX +bodyBlock :: String -> Parser (LaTeXL SourcePos) bodyBlock n = do c <- peekChar case c of @@ -203,7 +228,7 @@ bodyBlock n = do ------------------------------------------------------------------------ -- Command ------------------------------------------------------------------------ -command :: Parser LaTeX +command :: Parser (LaTeXL SourcePos) command = do _ <- char '\\' mbX <- peekChar @@ -212,18 +237,19 @@ command = do Just x -> if isSpecial x then special else do + p0 <- getPosition c <- takeTill endCmd - maybe (TeXCommS $ T.unpack c) (TeXComm $ T.unpack c) <$> cmdArgs + maybe (TeXCommSL p0 $ T.unpack c) (TeXCommL p0 $ T.unpack c) <$> cmdArgs ------------------------------------------------------------------------ -- Command Arguments ------------------------------------------------------------------------ -cmdArgs :: Parser (Maybe [TeXArg]) +cmdArgs :: Parser (Maybe [TeXArgL SourcePos]) cmdArgs = try (string "{}" >> return (Just [])) <|> fmap Just (try $ many1 cmdArg) <|> return Nothing -cmdArg :: Parser TeXArg +cmdArg :: Parser (TeXArgL SourcePos) cmdArg = do c <- char '[' <|> char '{' let e = case c of @@ -239,7 +265,7 @@ cmdArg = do ------------------------------------------------------------------------ -- Special commands (consisting of one char) ------------------------------------------------------------------------ -special :: Parser LaTeX +special :: Parser (LaTeXL SourcePos) special = do x <- anyChar case x of @@ -258,7 +284,7 @@ isSpecial = (`elem` specials) -- Line break ------------------------------------------------------------------------ -lbreak :: Parser LaTeX +lbreak :: Parser (LaTeXL SourcePos) lbreak = do y <- try (char '[' <|> char '*' <|> return ' ') case y of @@ -269,16 +295,16 @@ lbreak = do _ -> return (TeXLineBreak Nothing True) _ -> return (TeXLineBreak Nothing False) -linebreak :: Bool -> Parser LaTeX +linebreak :: Bool -> Parser (LaTeXL SourcePos) linebreak t = do m <- measure "measure" _ <- char ']' s <- try (char '*' <|> return ' ') return $ TeXLineBreak (Just m) (t || s == '*') -measure :: Parser Measure +measure :: Parser (MeasureL SourcePos) measure = try (floating >>= unit) <|> (CustomMeasure . mconcat) <$> manyTill latexBlockParser (lookAhead $ char ']') -unit :: Double -> Parser Measure +unit :: Double -> Parser (MeasureL SourcePos) unit f = do u1 <- anyChar u2 <- anyChar @@ -294,40 +320,42 @@ unit f = do ------------------------------------------------------------------------ -- Right or left brace or vertical ------------------------------------------------------------------------ -rbrace, lbrace,vert :: Parser LaTeX +rbrace, lbrace,vert :: Parser (LaTeXL SourcePos) lbrace = brace "{" rbrace = brace "}" vert = brace "|" -brace :: String -> Parser LaTeX -brace = return . TeXCommS -- The same as commS? +brace :: String -> Parser (LaTeXL SourcePos) +brace s = flip TeXCommSL s <$> getPosition -- The same as commS? -commS :: String -> Parser LaTeX -commS = return . TeXCommS +commS :: String -> Parser (LaTeXL SourcePos) +commS s = flip TeXCommSL s <$> getPosition ------------------------------------------------------------------------ -- Math ------------------------------------------------------------------------ -dolMath :: Parser LaTeX +dolMath :: Parser (LaTeXL SourcePos) dolMath = do + p <- getPosition _ <- char '$' choice [ do _ <- char '$' b <- mconcat <$> latexBlockParser `manyTill` try (string "$$") - return $ TeXMath DoubleDollar b + return $ TeXMathL p DoubleDollar b , do b <- mconcat <$> latexBlockParser `manyTill` char '$' - return $ TeXMath Dollar b + return $ TeXMathL p Dollar b ] -math :: MathType -> String -> Parser LaTeX +math :: MathType -> String -> Parser (LaTeXL SourcePos) math t eMath = do + p <- getPosition b <- mconcat <$> latexBlockParser `manyTill` try (string eMath) - return $ TeXMath t b + return $ TeXMathL p t b ------------------------------------------------------------------------ -- Comment ------------------------------------------------------------------------ -comment :: Parser LaTeX +comment :: Parser (LaTeXL SourcePos) comment = do _ <- char '%' c <- takeTill (== '\n')