path: root/src/Codec/Pesto
diff options
authorLars-Dominik Braun <>2015-06-28 16:42:19 +0200
committerLars-Dominik Braun <>2015-06-28 16:42:19 +0200
commit4c6c1bcf877017df98d4a5492a665eca12b8ba98 (patch)
treef5743e9965aac4d71ad3bd54c9ddb2ea8cd3495d /src/Codec/Pesto
parent87cccdd0d3159c8ac6730cb18dccb166653c58c5 (diff)
Replace file magic with directives
Adds the %pesto and %bonappetit directives, switches to stream-based operation.
Diffstat (limited to 'src/Codec/Pesto')
5 files changed, 104 insertions, 50 deletions
diff --git a/src/Codec/Pesto/Graph.lhs b/src/Codec/Pesto/Graph.lhs
index 4ea2886..7376c5f 100644
--- a/src/Codec/Pesto/Graph.lhs
+++ b/src/Codec/Pesto/Graph.lhs
@@ -11,18 +11,56 @@ Language semantics
> , firstNodeId
> , resolveReferences
> , test
+> , extract
> ) where
> import Data.Char (isSpace, toLower, isLetter)
> import Data.List (sort, nub)
> import Test.HUnit hiding (test)
+> import Control.Applicative ((<$>))
> import Codec.Pesto.Parse hiding (test)
-Pesto’s syntax drives a stack-based machine that transforms the linear stream
-of operations generated by the parser into a directed graph. Think of the stack
-as your kitchen’s workspace that is used to prepare the food’s
-components. You can add new ingredients, perform actions on them, put them
-aside and add them again.
+The parser’s output, a stream of operations, may contain multiple recipes. A
+recipe must start with the directive “pesto” and may end with “bonappetit”.
+This function extracts all recipes from the stream and removes both directives.
+- easily embed recipes into other documents
+> extract [] = []
+> extract (Directive "pesto":stream) = between:extract next
+> where
+> isEnd (Directive x) | x `elem` ["bonappetit", "pesto"] = True
+> isEnd _ = False
+> (between, next) = break isEnd stream
+> extract (x:xs) = extract xs
+Start and end directive are removed from the extracted operations. The
+directive “bonappetit” is optional at the end of a stream.
+> testExtract = [
+> extract [Directive "pesto", Directive "bonappetit"] ~?= [[]]
+> , extract [Directive "pesto", Action "foobar", Directive "bonappetit"] ~?= [[Action "foobar"]]
+> , extract [Directive "pesto"] ~?= [[]]
+> , extract [Directive "pesto", Directive "foobar"] ~?= [[Directive "foobar"]]
+Operations surrounding the start and end directive are removed.
+> , extract [Unknown "Something", Directive "pesto"] ~?= [[]]
+> , extract [Unknown "Something", Action "pour", Directive "pesto"] ~?= [[]]
+> , extract [Directive "pesto", Directive "bonappetit", Annotation "something"] ~?= [[]]
+The stream may contain multiple recipes. The start directive also ends the
+previous recipe and starts a new one.
+> , extract [Directive "pesto", Action "pour", Directive "bonappetit", Action "foobar", Directive "pesto", Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]]
+> , extract [Directive "pesto", Action "heat", Directive "pesto", Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]]
+> , extract [Directive "pesto", Annotation "foobar", Directive "pesto", Directive "bonappetit"] ~?= [[Annotation "foobar"], []]
+> ]
+Each recipe’s stream of operations drives a stack-based machine that transforms
+it into a directed graph. Think of the stack as your kitchen’s workspace that
+is used to prepare the food’s components. You can add new ingredients, perform
+actions on them, put them aside and add them again.
This function processes a list of nodes, that is operations uniquely identified
by an integer and returns the edges of the directed graph as a list of tuples.
@@ -74,6 +112,12 @@ used to provide more information about ingredients (so “hot water” becomes
> f ctx@(Nothing, s, edges) (_, Annotation _) = ctx
> f (Just prev, s, edges) (i, Annotation _) = (Just prev, s, (i, prev):edges)
+Unused directives or unknown operations are danging nodes with no connection to
+other nodes.
+> f ctx (_, Directive _) = ctx
+> f ctx (_, Unknown _) = ctx
These are helper functions:
> addToStack (_, stack:sx, edges) i = (Just i, (i:stack):sx, edges)
@@ -126,6 +170,11 @@ to the same node.
> , cmpGraph "+foobar >barbaz (C)" [(0, 1), (2, 1)]
> , cmpGraph "+foobar |barbaz (C)" [(0, 1), (2, 1)]
> , cmpGraph "*foobar (C)" [(1, 0)]
+Unknown directives or operations are never connected to other nodes.
+> , cmpGraph "%invalid" []
+> , cmpGraph "invalid" []
> ]
@@ -183,8 +232,8 @@ Appendix
> runGraphWith f doc expect = sort edges ~?= sort expect
> where
-> (Right op) = parse ("%pesto-1 " ++ doc)
-> nodes = (zip [firstNodeId..] . map snd . operations) op
+> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc)
+> nodes = zip [firstNodeId..] op
> edges = f nodes
> cmpGraph = runGraphWith toGraph
> cmpGraphRef = runGraphWith resolveReferences
@@ -202,5 +251,5 @@ Get all nodes with edges pointing towards nodeid
> outgoing edges (nodeid, _) = filter ((==) nodeid . fst) edges
-> test = ["graph" ~: testGraph, "ref" ~: testRef]
+> test = ["graph" ~: testGraph, "ref" ~: testRef, "extract" ~: testExtract]
diff --git a/src/Codec/Pesto/Lint.lhs b/src/Codec/Pesto/Lint.lhs
index b96c9de..e398c09 100644
--- a/src/Codec/Pesto/Lint.lhs
+++ b/src/Codec/Pesto/Lint.lhs
@@ -17,7 +17,7 @@ Not every graph generated in the previous section is a useful recipe, since
some combinations of operations just do not make sense. The linting test in
this section can detect common errors. Failing any of these tests does not
render a recipe invalid, but *useless*. Thus implementations must not create
-such recipes. They may be accepted as input from the user.
+such recipes. They may be accepted the user though.
Every lint test checks a single aspect of the graph.
@@ -32,13 +32,14 @@ Metadata
The graph must have exactly one root node (i.e. a node with incoming edges
only) and it must be a result. The result’s object name is the recipe’s title.
This also requires all results and alternatives to be referenced somewhere.
+Directives are either consumed when parsing, generating a graph and linting.
+Otherwise they are dangling as well. Unknown operations are always dangling.
> rootIsResult nodes edges = case walkRoot nodes edges of
> [] -> [LintResult NoRootNode []]
> (i, x):[] -> if isResult x then [] else [LintResult NonResultRootNode [i]]
> xs -> [LintResult MoreThanOneRootNode (map fst xs)]
Empty recipes or circular references have no root node:
> testLintMetadata = [
@@ -49,6 +50,10 @@ Empty recipes or circular references have no root node:
This recipe’s title is “Pesto”.
> , cmpLint "+foobar >Pesto" []
+Directives and unknown operations are dangling and thus root nodes.
+> , cmpLint "invalid %invalid +foo >bar" [LintResult MoreThanOneRootNode [0,1,3]]
> ]
Additional key-value metadata for the whole recipe can be provided by adding
@@ -291,6 +296,7 @@ Appendix
> | UnitNotWellKnown
> | UnknownMetadataKey
> | InvalidMetadata
+> | InvalidNode
> deriving (Show, Eq, Ord)
> lintTests = [
@@ -306,8 +312,8 @@ Appendix
> cmpLint doc expect = doc ~: sort (lint nodes edges) ~?= sort expect
> where
-> (Right op) = parse ("%pesto-1 " ++ doc)
-> nodes = (zip [firstNodeId..] . map snd . operations) op
+> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc)
+> nodes = zip [firstNodeId..] op
> edges = toGraph nodes ++ resolveReferences nodes
> test = [
diff --git a/src/Codec/Pesto/Parse.lhs b/src/Codec/Pesto/Parse.lhs
index 745d339..7deb511 100644
--- a/src/Codec/Pesto/Parse.lhs
+++ b/src/Codec/Pesto/Parse.lhs
@@ -12,12 +12,13 @@ Language syntax
> , Object(..)
> , Approximately(..)
> , Amount(..)
-> , Recipe(..)
> , isResult
> , isReference
> , isAlternative
> , isAnnotation
> , isAction
+> , isDirective
+> , isUnknown
> , spaces1
> , notspace
> ) where
@@ -33,50 +34,21 @@ Language syntax
> import Codec.Pesto.Serialize (serialize)
-XXX: magic should be an operation
-XXX: this parser should accept invalid operations
+Pesto parses UTF-8_ encoded input files into a sequence of operations.
-From the XXXsyntactic point of view a Pesto recipe is just a list of
-space-delimited operations. It is encoded with UTF-8_ and starts with a magic
-identifier (``%pesto-1``) followed by one or more spaces (spaces1_). Every
+- stream of operations
+- utf8 encoded
character within the Unicode whitespace class is considered a space.
.. _UTF-8:
.. _spaces1:
-.. _Recipe:
-> data Recipe = Recipe {
-> version :: Integer
-> , operations :: [(SourcePos, Operation)]
-> } deriving Show
-> recipe = Recipe
-> <$> magic <* spaces1
-> <*> ((,) <$> getPosition <*> operation) `sepEndBy` spaces1
-> <* eof
-> <?> "recipe"
+> stream = ((,) <$> getPosition <*> operation) `sepEndBy` spaces1
+> <?> "stream"
> spaces1 = many1 space
-The file identifier consists of the string ``%pesto-`` followed by an integral
-number and arbitrary non-space characters. They are reserved for future use and
-must be ignored by parsers implementing this version of pesto. A byte order
-mark (BOM) must not be used.
-> magic = string "%pesto-" *> int <* skipMany notspace <?> "magic"
-> notspace = satisfy (not . isSpace)
-.. _Operation:
-.. _Ingredient:
-.. _Tool:
-.. _Result:
-.. _Alternative:
-.. _Reference:
-.. _Annotation:
-.. _Action:
-The following *operations* are supported:
+The following operations are supported:
> data Operation =
> Annotation String
@@ -86,6 +58,8 @@ The following *operations* are supported:
> | Reference Quantity
> | Result Object
> | Alternative Object
+> | Directive String
+> | Unknown String
> deriving (Show, Eq)
> operation =
@@ -96,6 +70,8 @@ The following *operations* are supported:
> <|> try result
> <|> try alternative
> <|> try reference
+> <|> try directive
+> <|> try unknown
> <?> "operation"
The pesto grammar has two kinds of operations: The first one begins with a
@@ -129,6 +105,19 @@ whitespace characters and then consumes an object or a quantity.
> alternative = oparg '|' (Alternative <$> object)
> reference = oparg '*' (Reference <$> quantity)
+Additionally there are two special operations. Directives are similar to the
+previous operations, but consume a qstr.
+> directive = oparg '%' (Directive <$> qstr)
+Unknown operations are the fallthrough-case and accept anything. They must not
+be discarded at this point. The point of accepting anything is to fail as late
+as possible while processing Pesto documents. This gives us a chance to print
+helpful mesages that provide additional aid to the user who can then fix the
+> unknown = Unknown <$> many1 notspace
> testOparg = [
> cmpOperation "+100 g flour" (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour")))
@@ -136,6 +125,7 @@ whitespace characters and then consumes an object or a quantity.
> , cmpOperation ">dough" (Right (Result "dough"))
> , cmpOperation "|trimmings" (Right (Alternative "trimmings"))
> , cmpOperation "*fish" (Right (Reference (Quantity (Exact (AmountStr "")) "" "fish")))
+> , cmpOperation3 "% invalid" (Right (Directive "invalid")) "%invalid"
> , cmpOperation3 "* \t\n 1 _ cheese" (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) "*1 _ cheese"
> ]
@@ -155,6 +145,7 @@ A word always starts with a letter, followed by any number of non-space
> word = (:) <$> letter <*> many notspace
+> notspace = satisfy (not . isSpace)
The empty string can be represented by two double quotes or the underscore, but
not the empty string itself.
@@ -347,7 +338,7 @@ Appendix
> int = read <$> many1 digit
-> parse = runParser recipe () ""
+> parse = runParser stream () ""
Test helpers:
@@ -394,4 +385,8 @@ Wrap qstr test in AmountStr to aid serialization test
> isAnnotation _ = False
> isAction (Action _) = True
> isAction _ = False
+> isDirective (Directive _) = True
+> isDirective _ = False
+> isUnknown (Unknown _) = True
+> isUnknown _ = False
diff --git a/src/Codec/Pesto/Parse.lhs-boot b/src/Codec/Pesto/Parse.lhs-boot
index 6a6dee9..dab073c 100644
--- a/src/Codec/Pesto/Parse.lhs-boot
+++ b/src/Codec/Pesto/Parse.lhs-boot
@@ -8,6 +8,8 @@
> | Reference Quantity
> | Result Object
> | Alternative Object
+> | Directive String
+> | Unknown String
> data Quantity = Quantity Approximately Unit Object
> type Unit = String
> type Object = String
diff --git a/src/Codec/Pesto/Serialize.lhs b/src/Codec/Pesto/Serialize.lhs
index 5b3007e..b3cce7c 100644
--- a/src/Codec/Pesto/Serialize.lhs
+++ b/src/Codec/Pesto/Serialize.lhs
@@ -31,6 +31,8 @@ Finally transform linear stream of operations into a string again:
> serialize (Reference q) = '*':serialize q
> serialize (Result s) = '>':serializeQstr s
> serialize (Alternative s) = '|':serializeQstr s
+> serialize (Directive s) = '%':serializeQstr s
+> serialize (Unknown s) = s
> instance Serializeable Quantity where
> serialize (Quantity a b "") = serialize a ++ " " ++ serializeQstr b