diff options
| -rw-r--r-- | src/Codec/Pesto/Graph.lhs | 65 | ||||
| -rw-r--r-- | src/Codec/Pesto/Lint.lhs | 14 | ||||
| -rw-r--r-- | src/Codec/Pesto/Parse.lhs | 71 | ||||
| -rw-r--r-- | src/Codec/Pesto/Parse.lhs-boot | 2 | ||||
| -rw-r--r-- | src/Codec/Pesto/Serialize.lhs | 2 | ||||
| -rw-r--r-- | src/Main.lhs | 9 | 
6 files changed, 109 insertions, 54 deletions
| diff --git a/src/Codec/Pesto/Graph.lhs b/src/Codec/Pesto/Graph.lhs index 4ea2886..7376c5f 100644 --- a/src/Codec/Pesto/Graph.lhs +++ b/src/Codec/Pesto/Graph.lhs @@ -11,18 +11,56 @@ Language semantics  > 	, firstNodeId  > 	, resolveReferences  > 	, test +> 	, extract  > 	) where  > import Data.Char (isSpace, toLower, isLetter)  > import Data.List (sort, nub)  > import Test.HUnit hiding (test) +> import Control.Applicative ((<$>))  >  > import Codec.Pesto.Parse hiding (test) -Pesto’s syntax drives a stack-based machine that transforms the linear stream -of operations generated by the parser into a directed graph. Think of the stack -as your kitchen’s workspace that is used to prepare the food’s -components. You can add new ingredients, perform actions on them, put them -aside and add them again. +The parser’s output, a stream of operations, may contain multiple recipes. A +recipe must start with the directive “pesto” and may end with “bonappetit”. +This function extracts all recipes from the stream and removes both directives. + +- easily embed recipes into other documents + +> extract [] = [] +> extract (Directive "pesto":stream) = between:extract next +> 	where +> 		isEnd (Directive x) | x `elem` ["bonappetit", "pesto"] = True +> 		isEnd _ = False +> 		(between, next) = break isEnd stream +> extract (x:xs) = extract xs + +Start and end directive are removed from the extracted operations.  The +directive “bonappetit” is optional at the end of a stream. + +> testExtract = [ +> 	  extract [Directive "pesto", Directive "bonappetit"] ~?= [[]] +> 	, extract [Directive "pesto", Action "foobar", Directive "bonappetit"] ~?= [[Action "foobar"]] +> 	, extract [Directive "pesto"] ~?= [[]] +> 	, extract [Directive "pesto", Directive "foobar"] ~?= [[Directive "foobar"]] + +Operations surrounding the start and end directive are removed. + +> 	, extract [Unknown "Something", Directive "pesto"] ~?= [[]] +> 	, extract [Unknown "Something", Action "pour", Directive "pesto"] ~?= [[]] +> 	, extract [Directive "pesto", Directive "bonappetit", Annotation "something"] ~?= [[]] + +The stream may contain multiple recipes. The start directive also ends the +previous recipe and starts a new one. + +> 	, extract [Directive "pesto", Action "pour", Directive "bonappetit", Action "foobar", Directive "pesto", Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]] +> 	, extract [Directive "pesto", Action "heat", Directive "pesto", Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]] +> 	, extract [Directive "pesto", Annotation "foobar", Directive "pesto", Directive "bonappetit"] ~?= [[Annotation "foobar"], []] +> 	] + +Each recipe’s stream of operations drives a stack-based machine that transforms +it into a directed graph. Think of the stack as your kitchen’s workspace that +is used to prepare the food’s components. You can add new ingredients, perform +actions on them, put them aside and add them again.  This function processes a list of nodes, that is operations uniquely identified  by an integer and returns the edges of the directed graph as a list of tuples. @@ -74,6 +112,12 @@ used to provide more information about ingredients (so “hot water” becomes  > 		f ctx@(Nothing, s, edges) (_, Annotation _) = ctx  > 		f (Just prev, s, edges) (i, Annotation _) = (Just prev, s, (i, prev):edges) +Unused directives or unknown operations are danging nodes with no connection to +other nodes. + +> 		f ctx (_, Directive _) = ctx +> 		f ctx (_, Unknown _) = ctx +  These are helper functions:  > 		addToStack (_, stack:sx, edges) i = (Just i, (i:stack):sx, edges) @@ -126,6 +170,11 @@ to the same node.  > 	, cmpGraph "+foobar >barbaz (C)" [(0, 1), (2, 1)]  > 	, cmpGraph "+foobar |barbaz (C)" [(0, 1), (2, 1)]  > 	, cmpGraph "*foobar (C)" [(1, 0)] + +Unknown directives or operations are never connected to other nodes. + +> 	, cmpGraph "%invalid" [] +> 	, cmpGraph "invalid" []  > 	]  References @@ -183,8 +232,8 @@ Appendix  > runGraphWith f doc expect = sort edges ~?= sort expect  > 	where -> 		(Right op) = parse ("%pesto-1 " ++ doc) -> 		nodes = (zip [firstNodeId..] . map snd . operations) op +> 		(Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> 		nodes = zip [firstNodeId..] op  > 		edges = f nodes  > cmpGraph = runGraphWith toGraph  > cmpGraphRef = runGraphWith resolveReferences @@ -202,5 +251,5 @@ Get all nodes with edges pointing towards nodeid  > outgoing edges (nodeid, _) = filter ((==) nodeid . fst) edges -> test = ["graph" ~: testGraph, "ref" ~: testRef] +> test = ["graph" ~: testGraph, "ref" ~: testRef, "extract" ~: testExtract] diff --git a/src/Codec/Pesto/Lint.lhs b/src/Codec/Pesto/Lint.lhs index b96c9de..e398c09 100644 --- a/src/Codec/Pesto/Lint.lhs +++ b/src/Codec/Pesto/Lint.lhs @@ -17,7 +17,7 @@ Not every graph generated in the previous section is a useful recipe, since  some combinations of operations just do not make sense. The linting test in  this section can detect common errors. Failing any of these tests does not  render a recipe invalid, but *useless*. Thus implementations must not create -such recipes. They may be accepted as input from the user. +such recipes. They may be accepted the user though.  Every lint test checks a single aspect of the graph. @@ -32,13 +32,14 @@ Metadata  The graph must have exactly one root node (i.e. a node with incoming edges  only) and it must be a result. The result’s object name is the recipe’s title.  This also requires all results and alternatives to be referenced somewhere. +Directives are either consumed when parsing, generating a graph and linting. +Otherwise they are dangling as well. Unknown operations are always dangling.  > rootIsResult nodes edges = case walkRoot nodes edges of  > 	[] -> [LintResult NoRootNode []]  > 	(i, x):[] -> if isResult x then [] else [LintResult NonResultRootNode [i]]  > 	xs -> [LintResult MoreThanOneRootNode (map fst xs)] -  Empty recipes or circular references have no root node:  > testLintMetadata = [ @@ -49,6 +50,10 @@ Empty recipes or circular references have no root node:  This recipe’s title is “Pesto”.  > 	, cmpLint "+foobar >Pesto" [] + +Directives and unknown operations are dangling and thus root nodes. + +> 	, cmpLint "invalid %invalid +foo >bar" [LintResult MoreThanOneRootNode [0,1,3]]  >	]  Additional key-value metadata for the whole recipe can be provided by adding @@ -291,6 +296,7 @@ Appendix  >	| UnitNotWellKnown  > 	| UnknownMetadataKey  > 	| InvalidMetadata +> 	| InvalidNode  > 	deriving (Show, Eq, Ord)  > lintTests = [ @@ -306,8 +312,8 @@ Appendix  > cmpLint doc expect = doc ~: sort (lint nodes edges) ~?= sort expect  > 	where -> 		(Right op) = parse ("%pesto-1 " ++ doc) -> 		nodes = (zip [firstNodeId..] . map snd . operations) op +> 		(Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> 		nodes = zip [firstNodeId..] op  > 		edges = toGraph nodes ++ resolveReferences nodes  > test = [ diff --git a/src/Codec/Pesto/Parse.lhs b/src/Codec/Pesto/Parse.lhs index 745d339..7deb511 100644 --- a/src/Codec/Pesto/Parse.lhs +++ b/src/Codec/Pesto/Parse.lhs @@ -12,12 +12,13 @@ Language syntax  > 	, Object(..)  > 	, Approximately(..)  > 	, Amount(..) -> 	, Recipe(..)  > 	, isResult  > 	, isReference  > 	, isAlternative  > 	, isAnnotation  > 	, isAction +> 	, isDirective +> 	, isUnknown  > 	, spaces1  > 	, notspace  > 	) where @@ -33,50 +34,21 @@ Language syntax  >  > import Codec.Pesto.Serialize (serialize) -XXX: magic should be an operation -XXX: this parser should accept invalid operations +Pesto parses UTF-8_ encoded input files into a sequence of operations. -From the XXXsyntactic point of view a Pesto recipe is just a list of -space-delimited operations. It is encoded with UTF-8_ and starts with a magic -identifier (``%pesto-1``) followed by one or more spaces (spaces1_). Every +- stream of operations +- utf8 encoded +Every  character within the Unicode whitespace class is considered a space.  .. _UTF-8: https://tools.ietf.org/html/rfc3629 -  .. _spaces1: -.. _Recipe: -> data Recipe = Recipe { -> 	  version :: Integer -> 	, operations :: [(SourcePos, Operation)] -> 	} deriving Show ->  -> recipe = Recipe -> 	<$> magic <* spaces1 -> 	<*> ((,) <$> getPosition <*> operation) `sepEndBy` spaces1 -> 	<*  eof -> 	<?> "recipe" ->  +> stream = ((,) <$> getPosition <*> operation) `sepEndBy` spaces1 +> 	<?> "stream"  > spaces1 = many1 space -The file identifier consists of the string ``%pesto-`` followed by an integral -number and arbitrary non-space characters. They are reserved for future use and -must be ignored by parsers implementing this version of pesto. A byte order -mark (BOM) must not be used. - -> magic = string "%pesto-" *> int <* skipMany notspace <?> "magic" -> notspace = satisfy (not . isSpace) - -.. _Operation: -.. _Ingredient: -.. _Tool: -.. _Result: -.. _Alternative: -.. _Reference: -.. _Annotation: -.. _Action: - -The following *operations* are supported: +The following operations are supported:  > data Operation =  > 	  Annotation String @@ -86,6 +58,8 @@ The following *operations* are supported:  > 	| Reference Quantity  > 	| Result Object  > 	| Alternative Object +> 	| Directive String +> 	| Unknown String  > 	deriving (Show, Eq)  >  > operation = @@ -96,6 +70,8 @@ The following *operations* are supported:  > 	<|> try result  > 	<|> try alternative  > 	<|> try reference +> 	<|> try directive +> 	<|> try unknown  > 	<?> "operation"  The pesto grammar has two kinds of operations: The first one begins with a @@ -129,6 +105,19 @@ whitespace characters and then consumes an object or a quantity.  > alternative = oparg '|' (Alternative <$> object)  > reference = oparg '*' (Reference <$> quantity) +Additionally there are two special operations. Directives are similar to the +previous operations, but consume a qstr. + +> directive = oparg '%' (Directive <$> qstr) + +Unknown operations are the fallthrough-case and accept anything. They must not +be discarded at this point. The point of accepting anything is to fail as late +as possible while processing Pesto documents. This gives us a chance to print +helpful mesages that provide additional aid to the user who can then fix the +problem. + +> unknown = Unknown <$> many1 notspace +  > testOparg = [  > 	  cmpOperation "+100 g flour" (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour"))) @@ -136,6 +125,7 @@ whitespace characters and then consumes an object or a quantity.  > 	, cmpOperation ">dough" (Right (Result "dough"))  > 	, cmpOperation "|trimmings" (Right (Alternative "trimmings"))  > 	, cmpOperation "*fish" (Right (Reference (Quantity (Exact (AmountStr "")) "" "fish"))) +> 	, cmpOperation3 "% invalid" (Right (Directive "invalid")) "%invalid"  > 	, cmpOperation3 "* \t\n 1 _ cheese" (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) "*1 _ cheese"  > 	] @@ -155,6 +145,7 @@ A word always starts with a letter, followed by any number of non-space  characters.  > word = (:) <$> letter <*> many notspace +> notspace = satisfy (not . isSpace)  The empty string can be represented by two double quotes or the underscore, but  not the empty string itself. @@ -347,7 +338,7 @@ Appendix  ++++++++  > int = read <$> many1 digit -> parse = runParser recipe () "" +> parse = runParser stream () ""  Test helpers: @@ -394,4 +385,8 @@ Wrap qstr test in AmountStr to aid serialization test  > isAnnotation _ = False  > isAction (Action _) = True  > isAction _ = False +> isDirective (Directive _) = True +> isDirective _ = False +> isUnknown (Unknown _) = True +> isUnknown _ = False diff --git a/src/Codec/Pesto/Parse.lhs-boot b/src/Codec/Pesto/Parse.lhs-boot index 6a6dee9..dab073c 100644 --- a/src/Codec/Pesto/Parse.lhs-boot +++ b/src/Codec/Pesto/Parse.lhs-boot @@ -8,6 +8,8 @@  > 	| Reference Quantity  > 	| Result Object  > 	| Alternative Object +> 	| Directive String +> 	| Unknown String  > data Quantity = Quantity Approximately Unit Object  > type Unit = String  > type Object = String diff --git a/src/Codec/Pesto/Serialize.lhs b/src/Codec/Pesto/Serialize.lhs index 5b3007e..b3cce7c 100644 --- a/src/Codec/Pesto/Serialize.lhs +++ b/src/Codec/Pesto/Serialize.lhs @@ -31,6 +31,8 @@ Finally transform linear stream of operations into a string again:  > 	serialize (Reference q) = '*':serialize q  > 	serialize (Result s) = '>':serializeQstr s  > 	serialize (Alternative s) = '|':serializeQstr s +> 	serialize (Directive s) = '%':serializeQstr s +> 	serialize (Unknown s) = s  > instance Serializeable Quantity where  > 	serialize (Quantity a b "") = serialize a ++ " " ++ serializeQstr b diff --git a/src/Main.lhs b/src/Main.lhs index 5b37b8e..00123d5 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -4,8 +4,8 @@ User interface  .. class:: nodoc  > module Main (main) where -> import Codec.Pesto.Parse (parse, Recipe(..)) -> import Codec.Pesto.Graph (toGraph, firstNodeId, resolveReferences) +> import Codec.Pesto.Parse (parse) +> import Codec.Pesto.Graph (extract, toGraph, firstNodeId, resolveReferences)  > import Codec.Pesto.Lint (lint)  > import Codec.Pesto.Dot (toDot) @@ -23,9 +23,10 @@ add linting information to graph  > main = do  > 	s <- getContents ->	(flip . either) malformedRecipe (parse s) $ \doc -> do +>	(flip . either) malformedRecipe (parse s) $ \stream -> do  > 		let -> 			nodes = (zip [firstNodeId..] . snd . unzip . operations) doc +> 			doc = (head . extract . snd . unzip) stream +> 			nodes = zip [firstNodeId..] doc  > 			edges = toGraph nodes ++ resolveReferences nodes  > 		--print $ lint nodes edges  > 		putStrLn $ toDot nodes edges | 
