6 files changed, 109 insertions, 54 deletions
diff --git a/src/Codec/Pesto/Graph.lhs b/src/Codec/Pesto/Graph.lhs
index 4ea2886..7376c5f 100644
--- a/src/Codec/Pesto/Graph.lhs
+++ b/src/Codec/Pesto/Graph.lhs
@@ -11,18 +11,56 @@ Language semantics
 > 	, firstNodeId
 > 	, resolveReferences
 > 	, test
+> 	, extract
 > 	) where
 > import Data.Char (isSpace, toLower, isLetter)
 > import Data.List (sort, nub)
 > import Test.HUnit hiding (test)
+> import Control.Applicative ((<$>))
 >
 > import Codec.Pesto.Parse hiding (test)
 
-Pesto’s syntax drives a stack-based machine that transforms the linear stream
-of operations generated by the parser into a directed graph. Think of the stack
-as your kitchen’s workspace that is used to prepare the food’s
-components. You can add new ingredients, perform actions on them, put them
-aside and add them again.
+The parser’s output, a stream of operations, may contain multiple recipes. A
+recipe must start with the directive “pesto” and may end with “bonappetit”.
+This function extracts all recipes from the stream and removes both directives.
+
+- easily embed recipes into other documents
+
+> extract [] = []
+> extract (Directive "pesto":stream) = between:extract next
+> 	where
+> 		isEnd (Directive x) | x `elem` ["bonappetit", "pesto"] = True
+> 		isEnd _ = False
+> 		(between, next) = break isEnd stream
+> extract (x:xs) = extract xs
+
+Start and end directive are removed from the extracted operations.  The
+directive “bonappetit” is optional at the end of a stream.
+
+> testExtract = [
+> 	  extract [Directive "pesto", Directive "bonappetit"] ~?= [[]]
+> 	, extract [Directive "pesto", Action "foobar", Directive "bonappetit"] ~?= [[Action "foobar"]]
+> 	, extract [Directive "pesto"] ~?= [[]]
+> 	, extract [Directive "pesto", Directive "foobar"] ~?= [[Directive "foobar"]]
+
+Operations surrounding the start and end directive are removed.
+
+> 	, extract [Unknown "Something", Directive "pesto"] ~?= [[]]
+> 	, extract [Unknown "Something", Action "pour", Directive "pesto"] ~?= [[]]
+> 	, extract [Directive "pesto", Directive "bonappetit", Annotation "something"] ~?= [[]]
+
+The stream may contain multiple recipes. The start directive also ends the
+previous recipe and starts a new one.
+
+> 	, extract [Directive "pesto", Action "pour", Directive "bonappetit", Action "foobar", Directive "pesto", Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]]
+> 	, extract [Directive "pesto", Action "heat", Directive "pesto", Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]]
+> 	, extract [Directive "pesto", Annotation "foobar", Directive "pesto", Directive "bonappetit"] ~?= [[Annotation "foobar"], []]
+> 	]
+
+Each recipe’s stream of operations drives a stack-based machine that transforms
+it into a directed graph. Think of the stack as your kitchen’s workspace that
+is used to prepare the food’s components. You can add new ingredients, perform
+actions on them, put them aside and add them again.
 
 This function processes a list of nodes, that is operations uniquely identified
 by an integer and returns the edges of the directed graph as a list of tuples.
@@ -74,6 +112,12 @@ used to provide more information about ingredients (so “hot water” becomes
 > 		f ctx@(Nothing, s, edges) (_, Annotation _) = ctx
 > 		f (Just prev, s, edges) (i, Annotation _) = (Just prev, s, (i, prev):edges)
 
+Unused directives or unknown operations are danging nodes with no connection to
+other nodes.
+
+> 		f ctx (_, Directive _) = ctx
+> 		f ctx (_, Unknown _) = ctx
+
 These are helper functions:
 
 > 		addToStack (_, stack:sx, edges) i = (Just i, (i:stack):sx, edges)
@@ -126,6 +170,11 @@ to the same node.
 > 	, cmpGraph "+foobar >barbaz (C)" [(0, 1), (2, 1)]
 > 	, cmpGraph "+foobar |barbaz (C)" [(0, 1), (2, 1)]
 > 	, cmpGraph "*foobar (C)" [(1, 0)]
+
+Unknown directives or operations are never connected to other nodes.
+
+> 	, cmpGraph "%invalid" []
+> 	, cmpGraph "invalid" []
 > 	]
 
 References
@@ -183,8 +232,8 @@ Appendix
 
 > runGraphWith f doc expect = sort edges ~?= sort expect
 > 	where
-> 		(Right op) = parse ("%pesto-1 " ++ doc)
-> 		nodes = (zip [firstNodeId..] . map snd . operations) op
+> 		(Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc)
+> 		nodes = zip [firstNodeId..] op
 > 		edges = f nodes
 > cmpGraph = runGraphWith toGraph
 > cmpGraphRef = runGraphWith resolveReferences
@@ -202,5 +251,5 @@ Get all nodes with edges pointing towards nodeid
 
 > outgoing edges (nodeid, _) = filter ((==) nodeid . fst) edges
 
-> test = ["graph" ~: testGraph, "ref" ~: testRef]
+> test = ["graph" ~: testGraph, "ref" ~: testRef, "extract" ~: testExtract]
 
diff --git a/src/Codec/Pesto/Lint.lhs b/src/Codec/Pesto/Lint.lhs
index b96c9de..e398c09 100644
--- a/src/Codec/Pesto/Lint.lhs
+++ b/src/Codec/Pesto/Lint.lhs
@@ -17,7 +17,7 @@ Not every graph generated in the previous section is a useful recipe, since
 some combinations of operations just do not make sense. The linting test in
 this section can detect common errors. Failing any of these tests does not
 render a recipe invalid, but *useless*. Thus implementations must not create
-such recipes. They may be accepted as input from the user.
+such recipes. They may be accepted the user though.
 
 Every lint test checks a single aspect of the graph.
 
@@ -32,13 +32,14 @@ Metadata
 The graph must have exactly one root node (i.e. a node with incoming edges
 only) and it must be a result. The result’s object name is the recipe’s title.
 This also requires all results and alternatives to be referenced somewhere.
+Directives are either consumed when parsing, generating a graph and linting.
+Otherwise they are dangling as well. Unknown operations are always dangling.
 
 > rootIsResult nodes edges = case walkRoot nodes edges of
 > 	[] -> [LintResult NoRootNode []]
 > 	(i, x):[] -> if isResult x then [] else [LintResult NonResultRootNode [i]]
 > 	xs -> [LintResult MoreThanOneRootNode (map fst xs)]
 
-
 Empty recipes or circular references have no root node:
 
 > testLintMetadata = [
@@ -49,6 +50,10 @@ Empty recipes or circular references have no root node:
 This recipe’s title is “Pesto”.
 
 > 	, cmpLint "+foobar >Pesto" []
+
+Directives and unknown operations are dangling and thus root nodes.
+
+> 	, cmpLint "invalid %invalid +foo >bar" [LintResult MoreThanOneRootNode [0,1,3]]
 >	]
 
 Additional key-value metadata for the whole recipe can be provided by adding
@@ -291,6 +296,7 @@ Appendix
 >	| UnitNotWellKnown
 > 	| UnknownMetadataKey
 > 	| InvalidMetadata
+> 	| InvalidNode
 > 	deriving (Show, Eq, Ord)
 
 > lintTests = [
@@ -306,8 +312,8 @@ Appendix
 
 > cmpLint doc expect = doc ~: sort (lint nodes edges) ~?= sort expect
 > 	where
-> 		(Right op) = parse ("%pesto-1 " ++ doc)
-> 		nodes = (zip [firstNodeId..] . map snd . operations) op
+> 		(Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc)
+> 		nodes = zip [firstNodeId..] op
 > 		edges = toGraph nodes ++ resolveReferences nodes
 
 > test = [
diff --git a/src/Codec/Pesto/Parse.lhs b/src/Codec/Pesto/Parse.lhs
index 745d339..7deb511 100644
--- a/src/Codec/Pesto/Parse.lhs
+++ b/src/Codec/Pesto/Parse.lhs
@@ -12,12 +12,13 @@ Language syntax
 > 	, Object(..)
 > 	, Approximately(..)
 > 	, Amount(..)
-> 	, Recipe(..)
 > 	, isResult
 > 	, isReference
 > 	, isAlternative
 > 	, isAnnotation
 > 	, isAction
+> 	, isDirective
+> 	, isUnknown
 > 	, spaces1
 > 	, notspace
 > 	) where
@@ -33,50 +34,21 @@ Language syntax
 >
 > import Codec.Pesto.Serialize (serialize)
 
-XXX: magic should be an operation
-XXX: this parser should accept invalid operations
+Pesto parses UTF-8_ encoded input files into a sequence of operations.
 
-From the XXXsyntactic point of view a Pesto recipe is just a list of
-space-delimited operations. It is encoded with UTF-8_ and starts with a magic
-identifier (``%pesto-1``) followed by one or more spaces (spaces1_). Every
+- stream of operations
+- utf8 encoded
+Every
 character within the Unicode whitespace class is considered a space.
 
 .. _UTF-8: https://tools.ietf.org/html/rfc3629
-
 .. _spaces1:
-.. _Recipe:
 
-> data Recipe = Recipe {
-> 	  version :: Integer
-> 	, operations :: [(SourcePos, Operation)]
-> 	} deriving Show
-> 
-> recipe = Recipe
-> 	<$> magic <* spaces1
-> 	<*> ((,) <$> getPosition <*> operation) `sepEndBy` spaces1
-> 	<*  eof
-> 	<?> "recipe"
-> 
+> stream = ((,) <$> getPosition <*> operation) `sepEndBy` spaces1
+> 	<?> "stream"
 > spaces1 = many1 space
 
-The file identifier consists of the string ``%pesto-`` followed by an integral
-number and arbitrary non-space characters. They are reserved for future use and
-must be ignored by parsers implementing this version of pesto. A byte order
-mark (BOM) must not be used.
-
-> magic = string "%pesto-" *> int <* skipMany notspace <?> "magic"
-> notspace = satisfy (not . isSpace)
-
-.. _Operation:
-.. _Ingredient:
-.. _Tool:
-.. _Result:
-.. _Alternative:
-.. _Reference:
-.. _Annotation:
-.. _Action:
-
-The following *operations* are supported:
+The following operations are supported:
 
 > data Operation =
 > 	  Annotation String
@@ -86,6 +58,8 @@ The following *operations* are supported:
 > 	| Reference Quantity
 > 	| Result Object
 > 	| Alternative Object
+> 	| Directive String
+> 	| Unknown String
 > 	deriving (Show, Eq)
 >
 > operation =
@@ -96,6 +70,8 @@ The following *operations* are supported:
 > 	<|> try result
 > 	<|> try alternative
 > 	<|> try reference
+> 	<|> try directive
+> 	<|> try unknown
 > 	<?> "operation"
 
 The pesto grammar has two kinds of operations: The first one begins with a
@@ -129,6 +105,19 @@ whitespace characters and then consumes an object or a quantity.
 > alternative = oparg '|' (Alternative <$> object)
 > reference = oparg '*' (Reference <$> quantity)
 
+Additionally there are two special operations. Directives are similar to the
+previous operations, but consume a qstr.
+
+> directive = oparg '%' (Directive <$> qstr)
+
+Unknown operations are the fallthrough-case and accept anything. They must not
+be discarded at this point. The point of accepting anything is to fail as late
+as possible while processing Pesto documents. This gives us a chance to print
+helpful mesages that provide additional aid to the user who can then fix the
+problem.
+
+> unknown = Unknown <$> many1 notspace
+
 > testOparg = [
 > 	  cmpOperation "+100 g flour" (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour")))
 
@@ -136,6 +125,7 @@ whitespace characters and then consumes an object or a quantity.
 > 	, cmpOperation ">dough" (Right (Result "dough"))
 > 	, cmpOperation "|trimmings" (Right (Alternative "trimmings"))
 > 	, cmpOperation "*fish" (Right (Reference (Quantity (Exact (AmountStr "")) "" "fish")))
+> 	, cmpOperation3 "% invalid" (Right (Directive "invalid")) "%invalid"
 > 	, cmpOperation3 "* \t\n 1 _ cheese" (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) "*1 _ cheese"
 > 	]
 
@@ -155,6 +145,7 @@ A word always starts with a letter, followed by any number of non-space
 characters.
 
 > word = (:) <$> letter <*> many notspace
+> notspace = satisfy (not . isSpace)
 
 The empty string can be represented by two double quotes or the underscore, but
 not the empty string itself.
@@ -347,7 +338,7 @@ Appendix
 ++++++++
 
 > int = read <$> many1 digit
-> parse = runParser recipe () ""
+> parse = runParser stream () ""
 
 Test helpers:
 
@@ -394,4 +385,8 @@ Wrap qstr test in AmountStr to aid serialization test
 > isAnnotation _ = False
 > isAction (Action _) = True
 > isAction _ = False
+> isDirective (Directive _) = True
+> isDirective _ = False
+> isUnknown (Unknown _) = True
+> isUnknown _ = False
 
diff --git a/src/Codec/Pesto/Parse.lhs-boot b/src/Codec/Pesto/Parse.lhs-boot
index 6a6dee9..dab073c 100644
--- a/src/Codec/Pesto/Parse.lhs-boot
+++ b/src/Codec/Pesto/Parse.lhs-boot
@@ -8,6 +8,8 @@
 > 	| Reference Quantity
 > 	| Result Object
 > 	| Alternative Object
+> 	| Directive String
+> 	| Unknown String
 > data Quantity = Quantity Approximately Unit Object
 > type Unit = String
 > type Object = String
diff --git a/src/Codec/Pesto/Serialize.lhs b/src/Codec/Pesto/Serialize.lhs
index 5b3007e..b3cce7c 100644
--- a/src/Codec/Pesto/Serialize.lhs
+++ b/src/Codec/Pesto/Serialize.lhs
@@ -31,6 +31,8 @@ Finally transform linear stream of operations into a string again:
 > 	serialize (Reference q) = '*':serialize q
 > 	serialize (Result s) = '>':serializeQstr s
 > 	serialize (Alternative s) = '|':serializeQstr s
+> 	serialize (Directive s) = '%':serializeQstr s
+> 	serialize (Unknown s) = s
 
 > instance Serializeable Quantity where
 > 	serialize (Quantity a b "") = serialize a ++ " " ++ serializeQstr b
diff --git a/src/Main.lhs b/src/Main.lhs
index 5b37b8e..00123d5 100644
--- a/src/Main.lhs
+++ b/src/Main.lhs
@@ -4,8 +4,8 @@ User interface
 .. class:: nodoc
 
 > module Main (main) where
-> import Codec.Pesto.Parse (parse, Recipe(..))
-> import Codec.Pesto.Graph (toGraph, firstNodeId, resolveReferences)
+> import Codec.Pesto.Parse (parse)
+> import Codec.Pesto.Graph (extract, toGraph, firstNodeId, resolveReferences)
 > import Codec.Pesto.Lint (lint)
 > import Codec.Pesto.Dot (toDot)
 
@@ -23,9 +23,10 @@ add linting information to graph
 
 > main = do
 > 	s <- getContents
->	(flip . either) malformedRecipe (parse s) $ \doc -> do
+>	(flip . either) malformedRecipe (parse s) $ \stream -> do
 > 		let
-> 			nodes = (zip [firstNodeId..] . snd . unzip . operations) doc
+> 			doc = (head . extract . snd . unzip) stream
+> 			nodes = zip [firstNodeId..] doc
 > 			edges = toGraph nodes ++ resolveReferences nodes
 > 		--print $ lint nodes edges
 > 		putStrLn $ toDot nodes edges