From 295dd897297722d07ec2ce5fb82e323fe495c775 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Mon, 24 Aug 2020 17:09:34 +0200 Subject: GHC 8.8, cabal 3, pandoc 2.10 compatibility Move files around to separate Pesto (the library) and Pesto (the executables). Fixes for pandoc API changes. --- src/Codec/Pesto/Parse.lhs | 403 ---------------------------------------------- 1 file changed, 403 deletions(-) delete mode 100644 src/Codec/Pesto/Parse.lhs (limited to 'src/Codec/Pesto/Parse.lhs') diff --git a/src/Codec/Pesto/Parse.lhs b/src/Codec/Pesto/Parse.lhs deleted file mode 100644 index 518b866..0000000 --- a/src/Codec/Pesto/Parse.lhs +++ /dev/null @@ -1,403 +0,0 @@ -.. _language-syntax: - -Language syntax ---------------- - -.. class:: nodoc - -> module Codec.Pesto.Parse ( -> parse -> , test -> , Instruction(..) -> , Quantity(..) -> , Unit -> , Object -> , Approximately(..) -> , Amount(..) -> , isResult -> , isReference -> , isAlternative -> , isAnnotation -> , isAction -> , isDirective -> , isUnknown -> , spaces1 -> , notspace -> ) where -> import Data.Char (isSpace) -> import Data.Ratio ((%)) -> import Text.Parsec hiding (parse) -> import Text.ParserCombinators.Parsec.Pos (newPos) -> import Text.ParserCombinators.Parsec.Error (newErrorUnknown) -> import Test.HUnit hiding (test) -> -> import Codec.Pesto.Serialize (serialize) - -Pesto parses UTF-8_ encoded input data consisting of space-delimited -instructions. Every character within the Unicode whitespace class is -considered a space. - -.. _UTF-8: https://tools.ietf.org/html/rfc3629 -.. _spaces1: - -> stream = ((,) <$> getPosition <*> instruction) `sepEndBy` spaces1 -> "stream" -> spaces1 = many1 space - -The following instructions are supported: - -> data Instruction = -> Annotation String -> | Ingredient Quantity -> | Tool Quantity -> | Action String -> | Reference Quantity -> | Result Quantity -> | Alternative Quantity -> | Directive String -> | Unknown String -> deriving (Show, Eq) -> -> instruction = -> try annotation -> <|> try ingredient -> <|> try tool -> <|> try action -> <|> try result -> <|> try alternative -> <|> try reference -> <|> try directive -> <|> try unknown -> "instruction" - -The pesto grammar has two instruction types: The first one begins with a -start symbol (``start``) and consumes any character up to and including a -terminating symbol (``end``), which can be escaped with a backslash (``\``). - -> betweenEscaped :: Char -> Char -> Parsec String () String -> betweenEscaped start end = -> char start -> *> many (try (char '\\' *> char end) <|> satisfy (/= end)) -> <* char end - -Annotations and actions both are of this kind: - -> annotation = Annotation <$> betweenEscaped '(' ')' -> action = Action <$> betweenEscaped '[' ']' - -Here are examples for both: - -> testOpterm = [cmpInstruction "(skinless\nboneless)" (Right (Annotation "skinless\nboneless")) -> , cmpInstruction "[stir together]" (Right (Action "stir together")) -> , cmpInstruction "[stir\\]together]" (Right (Action "stir]together"))] - -The second one starts with one identifying character, ignores the following -whitespace characters and then consumes an object or a quantity. - -> oparg :: Char -> Parsec String () Instruction -> Parsec String () Instruction -> oparg ident cont = char ident *> spaces *> cont -> ingredient = oparg '+' (Ingredient <$> quantity) -> tool = oparg '&' (Tool <$> quantity) -> result = oparg '>' (Result <$> quantity) -> alternative = oparg '|' (Alternative <$> quantity) -> reference = oparg '*' (Reference <$> quantity) - -Additionally there are two special instructions. Directives are similar to the -previous instructions, but consume a qstr. - -> directive = oparg '%' (Directive <$> qstr) - -Unknown instructions are the fallthrough-case and accept anything. They must -not be discarded at this point. The point of accepting anything is to fail as -late as possible while processing input. This gives the parser a chance to -print helpful mesages that provide additional aid to the user who can then fix -the problem. - -> unknown = Unknown <$> many1 notspace - -Below are examples for these instructions: - -> testOparg = [ -> cmpInstruction "+100 g flour" -> (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour"))) -> , cmpInstruction "&oven" -> (Right (Tool (strQuantity "oven"))) -> , cmpInstruction ">dough" (Right (Result (strQuantity "dough"))) -> , cmpInstruction "|trimmings" (Right (Alternative (strQuantity "trimmings"))) -> , cmpInstruction "*fish" -> (Right (Reference (strQuantity "fish"))) -> , cmpInstruction3 "% invalid" (Right (Directive "invalid")) "%invalid" -> , cmpInstruction3 "* \t\n 1 _ cheese" -> (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) -> "*1 _ cheese" -> ] - -Qstr -++++ - -Before introducing quantities we need to have a look at qstr, which is used by -them. A qstr, short for quoted string, can be – you guessed it already – a -string enclosed in double quotes, a single word or the underscore character -that represents the empty string. - -> qstr = try (betweenEscaped '"' '"') -> <|> word -> <|> char '_' *> return "" - -A word always starts with a letter, followed by any number of non-space -characters. - -> word = (:) <$> letter <*> many notspace -> notspace = satisfy (not . isSpace) - -The empty string can be represented by two double quotes or the underscore, but -not the empty string itself. - -> testQstr = [ -> cmpQstr3 "\"\"" (Right "") "_" -> , cmpQstr "_" (Right "") -> , cmpQstr "" parseError - -Any Unicode character with a General_Category major class L (i.e. a letter, see -`Unicode standard section 4.5 -`_ for example) is -accected as first character of a word. That includes german umlauts as well as -greek or arabic script. Numbers, separators, punctuation and others are not -permitted. - -> , cmpQstr "water" (Right "water") -> , cmpQstr "Äpfel" (Right "Äpfel") -> , cmpQstr "τυρί" (Right "τυρί") -> , cmpQstr "جبن" (Right "جبن") -> , cmpQstr "1sugar" parseError -> , cmpQstr "+milk" parseError -> , cmpQstr "∀onion" parseError - -The remaining letters of a word can be any character, including symbols, -numbers, … - -> , cmpQstr "rump-roast" (Right "rump-roast") -> , cmpQstr "v1negar" (Right "v1negar") -> , cmpQstr "mush\"rooms" (Right "mush\"rooms") - -…but not spaces. - -> , cmpQstr " tomatoes" parseError -> , cmpQstr "tomatoes " parseError -> , cmpQstr "lemon juice" parseError -> , cmpQstr "sour\tcream" parseError -> , cmpQstr "white\nwine" parseError - -If a string contains spaces or starts with a special character it must be -enclosed in double quotes. - -> , cmpQstr3 "\"salt\"" (Right "salt") "salt" -> , cmpQstr "\"+milk\"" (Right "+milk") -> , cmpQstr "\"soy sauce\"" (Right "soy sauce") -> , cmpQstr "\"1sugar\"" (Right "1sugar") -> , cmpQstr "\"chicken\tbreast\nmeat\"" (Right "chicken\tbreast\nmeat") - -Double quotes within a string can be quoted by prepending a backslash. However -the usual escape codes like \\n, \\t, … will *not* be expanded. - -> , cmpQstr "\"vine\"gar\"" parseError -> , cmpQstr3 "\"vine\\\"gar\"" (Right "vine\"gar") "vine\"gar" -> , cmpQstr "\"oli\\ve oil\"" (Right "oli\\ve oil") -> , cmpQstr "\"oli\\\\\"ve oil\"" (Right "oli\\\"ve oil") -> , cmpQstr3 "\"sal\\tmon\"" (Right "sal\\tmon") "sal\\tmon" -> ] - -Quantity -++++++++ - -The instructions Ingredient, Tool and Reference accept a *quantity*, that is a -triple of Approximately, Unit and Object as parameter. - -> data Quantity = Quantity Approximately Unit Object deriving (Show, Eq) - -The syntactic construct is overloaded and accepts one to three arguments. If -just one is given it is assumed to be the Object and Approximately and Unit are -empty. Two arguments set Approximately and Unit, which is convenient when the -unit implies the object (minutes usually refer to the object time, for -example). - -> quantity = try quantityA <|> quantityB - -> quantityA = Quantity -> <$> approximately -> <* spaces1 -> <*> unit -> <*> (try (spaces1 *> object) <|> return "") - -> quantityB = Quantity -> <$> return (Exact (AmountStr "")) -> <*> return "" -> <*> object - -> testQuantityOverloaded = [ -> cmpQuantity "oven" (exactQuantity (AmountStr "") "" "oven") -> , cmpQuantity "10 min" (exactQuantity (AmountRatio (10%1)) "min" "") -> , cmpQuantity "100 g flour" (exactQuantity (AmountRatio (100%1)) "g" "flour") - -The first two are equivalent to - -> , cmpQuantity3 "_ _ oven" (exactQuantity (AmountStr "") "" "oven") "oven" -> , cmpQuantity3 "10 min _" (exactQuantity (AmountRatio (10%1)) "min" "") "10 min" - -Missing units must not be ommited. The version with underscore should be prefered. - -> , cmpQuantity3 "1 \"\" meal" (exactQuantity (AmountRatio (1%1)) "" "meal") "1 _ meal" -> , cmpQuantity "1 _ meal" (exactQuantity (AmountRatio (1%1)) "" "meal") -> ] - -Units and objects are just strings. However units should be limited to -`well-known metric units `_ and `some guidelines -`_ apply to Objects as well. - -> type Unit = String -> unit = qstr -> -> type Object = String -> object = qstr - -Approximately is a wrapper for ranges, that is two amounts separated by a dash, -approximate amounts, prepended with a tilde and exact amounts without modifier. - -> data Approximately = -> Range Amount Amount -> | Approx Amount -> | Exact Amount -> deriving (Show, Eq) -> -> approximately = try range <|> try approx <|> exact -> range = Range <$> amount <*> (char '-' *> amount) -> approx = Approx <$> (char '~' *> amount) -> exact = Exact <$> amount - -> testQuantityApprox = [ -> cmpQuantity "1-2 _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountRatio (2%1))) "" "bananas")) -> , cmpQuantity "1 - 2 _ bananas" parseError -> , cmpQuantity "1- 2 _ bananas" parseError -> , cmpQuantity "1 -2 _ bananas" parseError -> , cmpQuantity "~2 _ bananas" (Right (Quantity (Approx (AmountRatio (2%1))) "" "bananas")) -> , cmpQuantity "~ 2 _ bananas" parseError - -> ] - -Amounts are limited to rational numbers and strings. There are no real numbers -by design and implementations should avoid representing rational numbers as -IEEE float. They are not required and introduce ugly corner cases when -rounding while converting units for example. - -> data Amount = -> AmountRatio Rational -> | AmountStr String -> deriving (Show, Eq) -> -> amount = try ratio <|> AmountStr <$> qstr - -> testQuantityAmount = [ -> cmpQuantity "some _ pepper" (exactQuantity (AmountStr "some") "" "pepper") -> , cmpQuantity3 "\"some\"-\"a few\" _ bananas" (Right (Quantity (Range (AmountStr "some") (AmountStr "a few")) "" "bananas")) "some-\"a few\" _ bananas" -> , cmpQuantity "~\"the stars in your eyes\" _ bananas" (Right (Quantity (Approx (AmountStr "the stars in your eyes")) "" "bananas")) -> ] - -Rational numbers can be an integral, numerator and denominator, each separated -by a forward slash, just the numerator and denominator, again separated by a -forward slash or just a numerator with the default denominator 1 (i.e. ordinary -integral number). - -> ratio = let toRatio i num denom = AmountRatio ((i*denom+num)%denom) in -> try (toRatio <$> int <*> (char '/' *> int) <*> (char '/' *> int)) -> <|> try (toRatio <$> return 0 <*> int <*> (char '/' *> int)) -> <|> try (toRatio <$> return 0 <*> int <*> return 1) - -These are all equal. - -> testQuantityRatio = [ -> cmpQuantity "3 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") -> , cmpQuantity3 "3/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" -> , cmpQuantity3 "3/0/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" - -XXtwo is num and denom - -> , cmpQuantity "3/5 _ bananas" (exactQuantity (AmountRatio (3%5)) "" "bananas") - -three is int, num and denom - -> , cmpQuantity "3/5/7 _ bananas" (exactQuantity (AmountRatio ((3*7+5)%7)) "" "bananas") - -> , cmpQuantity3 "10/3 _ bananas" (exactQuantity (AmountRatio (10%3)) "" "bananas") "3/1/3 _ bananas" - -Can be used with ranges and approximate too. and mixed with strings - -> , cmpQuantity "1-\"a few\" _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountStr "a few")) "" "bananas")) -> , cmpQuantity "1/1/2-2 _ bananas" (Right (Quantity (Range (AmountRatio (3%2)) (AmountRatio (4%2))) "" "bananas")) -> , cmpQuantity "~1/1/2 _ bananas" (Right (Quantity (Approx (AmountRatio (3%2))) "" "bananas")) - -> ] - -Appendix -++++++++ - -> int = read <$> many1 digit -> parse = runParser stream () "" - -Test helpers: - -> isLeft (Left _) = True -> isLeft _ = False - -A generic parser error: - -> parseError = Left (newErrorUnknown (newPos "" 0 0)) - -Compare output of parser ``f`` for string ``str`` with ``expected``. The -expected result can be a parser error, which matches any actual parse error -(first case). - -> cmpParser f str (Left _) = TestCase $ assertBool str $ isLeft $ runParser (f <* eof) () "" str -> cmpParser f str expected = str ~: runParser (f <* eof) () "" str ~?= expected - -> cmpParseSerialize f str expectp@(Left _) _ = [cmpParser f str expectp] -> cmpParseSerialize f str expectp@(Right expectpval) expects = [ -> cmpParser f str expectp -> , serialize expectpval ~?= expects] - -Wrap qstr test in AmountStr to aid serialization test - -> cmpQstr input expectp = cmpQstr3 input expectp input -> cmpQstr3 input (Left expect) _ = [cmpParser (AmountStr <$> qstr) input (Left expect)] -> cmpQstr3 input (Right expect) expects = cmpParseSerialize (AmountStr <$> qstr) input (Right (AmountStr expect)) expects - -> cmpQuantity a b = cmpQuantity3 a b a -> cmpQuantity3 = cmpParseSerialize quantity - -> cmpInstruction a b = cmpInstruction3 a b a -> cmpInstruction3 = cmpParseSerialize instruction - -> exactQuantity a b c = Right (Quantity (Exact a) b c) -> strQuantity = Quantity (Exact (AmountStr "")) "" - -> test = [ -> "quantity" ~: testQuantityOverloaded ++ testQuantityApprox ++ testQuantityAmount ++ testQuantityRatio -> , "qstr" ~: testQstr -> , "oparg" ~: testOparg -> , "opterm" ~: testOpterm -> ] - -> isResult (Result _) = True -> isResult _ = False -> isReference (Reference _) = True -> isReference _ = False -> isAlternative (Alternative _) = True -> isAlternative _ = False -> isAnnotation (Annotation _) = True -> isAnnotation _ = False -> isAction (Action _) = True -> isAction _ = False -> isDirective (Directive _) = True -> isDirective _ = False -> isUnknown (Unknown _) = True -> isUnknown _ = False - -- cgit v1.2.3