From 4c6c1bcf877017df98d4a5492a665eca12b8ba98 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 28 Jun 2015 16:42:19 +0200 Subject: Replace file magic with directives Adds the %pesto and %bonappetit directives, switches to stream-based operation. --- src/Codec/Pesto/Graph.lhs | 65 +++++++++++++++++++++++++++++++++----- src/Codec/Pesto/Lint.lhs | 14 ++++++--- src/Codec/Pesto/Parse.lhs | 71 ++++++++++++++++++++---------------------- src/Codec/Pesto/Parse.lhs-boot | 2 ++ src/Codec/Pesto/Serialize.lhs | 2 ++ src/Main.lhs | 9 +++--- 6 files changed, 109 insertions(+), 54 deletions(-) diff --git a/src/Codec/Pesto/Graph.lhs b/src/Codec/Pesto/Graph.lhs index 4ea2886..7376c5f 100644 --- a/src/Codec/Pesto/Graph.lhs +++ b/src/Codec/Pesto/Graph.lhs @@ -11,18 +11,56 @@ Language semantics > , firstNodeId > , resolveReferences > , test +> , extract > ) where > import Data.Char (isSpace, toLower, isLetter) > import Data.List (sort, nub) > import Test.HUnit hiding (test) +> import Control.Applicative ((<$>)) > > import Codec.Pesto.Parse hiding (test) -Pesto’s syntax drives a stack-based machine that transforms the linear stream -of operations generated by the parser into a directed graph. Think of the stack -as your kitchen’s workspace that is used to prepare the food’s -components. You can add new ingredients, perform actions on them, put them -aside and add them again. +The parser’s output, a stream of operations, may contain multiple recipes. A +recipe must start with the directive “pesto” and may end with “bonappetit”. +This function extracts all recipes from the stream and removes both directives. + +- easily embed recipes into other documents + +> extract [] = [] +> extract (Directive "pesto":stream) = between:extract next +> where +> isEnd (Directive x) | x `elem` ["bonappetit", "pesto"] = True +> isEnd _ = False +> (between, next) = break isEnd stream +> extract (x:xs) = extract xs + +Start and end directive are removed from the extracted operations. The +directive “bonappetit” is optional at the end of a stream. + +> testExtract = [ +> extract [Directive "pesto", Directive "bonappetit"] ~?= [[]] +> , extract [Directive "pesto", Action "foobar", Directive "bonappetit"] ~?= [[Action "foobar"]] +> , extract [Directive "pesto"] ~?= [[]] +> , extract [Directive "pesto", Directive "foobar"] ~?= [[Directive "foobar"]] + +Operations surrounding the start and end directive are removed. + +> , extract [Unknown "Something", Directive "pesto"] ~?= [[]] +> , extract [Unknown "Something", Action "pour", Directive "pesto"] ~?= [[]] +> , extract [Directive "pesto", Directive "bonappetit", Annotation "something"] ~?= [[]] + +The stream may contain multiple recipes. The start directive also ends the +previous recipe and starts a new one. + +> , extract [Directive "pesto", Action "pour", Directive "bonappetit", Action "foobar", Directive "pesto", Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]] +> , extract [Directive "pesto", Action "heat", Directive "pesto", Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]] +> , extract [Directive "pesto", Annotation "foobar", Directive "pesto", Directive "bonappetit"] ~?= [[Annotation "foobar"], []] +> ] + +Each recipe’s stream of operations drives a stack-based machine that transforms +it into a directed graph. Think of the stack as your kitchen’s workspace that +is used to prepare the food’s components. You can add new ingredients, perform +actions on them, put them aside and add them again. This function processes a list of nodes, that is operations uniquely identified by an integer and returns the edges of the directed graph as a list of tuples. @@ -74,6 +112,12 @@ used to provide more information about ingredients (so “hot water” becomes > f ctx@(Nothing, s, edges) (_, Annotation _) = ctx > f (Just prev, s, edges) (i, Annotation _) = (Just prev, s, (i, prev):edges) +Unused directives or unknown operations are danging nodes with no connection to +other nodes. + +> f ctx (_, Directive _) = ctx +> f ctx (_, Unknown _) = ctx + These are helper functions: > addToStack (_, stack:sx, edges) i = (Just i, (i:stack):sx, edges) @@ -126,6 +170,11 @@ to the same node. > , cmpGraph "+foobar >barbaz (C)" [(0, 1), (2, 1)] > , cmpGraph "+foobar |barbaz (C)" [(0, 1), (2, 1)] > , cmpGraph "*foobar (C)" [(1, 0)] + +Unknown directives or operations are never connected to other nodes. + +> , cmpGraph "%invalid" [] +> , cmpGraph "invalid" [] > ] References @@ -183,8 +232,8 @@ Appendix > runGraphWith f doc expect = sort edges ~?= sort expect > where -> (Right op) = parse ("%pesto-1 " ++ doc) -> nodes = (zip [firstNodeId..] . map snd . operations) op +> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> nodes = zip [firstNodeId..] op > edges = f nodes > cmpGraph = runGraphWith toGraph > cmpGraphRef = runGraphWith resolveReferences @@ -202,5 +251,5 @@ Get all nodes with edges pointing towards nodeid > outgoing edges (nodeid, _) = filter ((==) nodeid . fst) edges -> test = ["graph" ~: testGraph, "ref" ~: testRef] +> test = ["graph" ~: testGraph, "ref" ~: testRef, "extract" ~: testExtract] diff --git a/src/Codec/Pesto/Lint.lhs b/src/Codec/Pesto/Lint.lhs index b96c9de..e398c09 100644 --- a/src/Codec/Pesto/Lint.lhs +++ b/src/Codec/Pesto/Lint.lhs @@ -17,7 +17,7 @@ Not every graph generated in the previous section is a useful recipe, since some combinations of operations just do not make sense. The linting test in this section can detect common errors. Failing any of these tests does not render a recipe invalid, but *useless*. Thus implementations must not create -such recipes. They may be accepted as input from the user. +such recipes. They may be accepted the user though. Every lint test checks a single aspect of the graph. @@ -32,13 +32,14 @@ Metadata The graph must have exactly one root node (i.e. a node with incoming edges only) and it must be a result. The result’s object name is the recipe’s title. This also requires all results and alternatives to be referenced somewhere. +Directives are either consumed when parsing, generating a graph and linting. +Otherwise they are dangling as well. Unknown operations are always dangling. > rootIsResult nodes edges = case walkRoot nodes edges of > [] -> [LintResult NoRootNode []] > (i, x):[] -> if isResult x then [] else [LintResult NonResultRootNode [i]] > xs -> [LintResult MoreThanOneRootNode (map fst xs)] - Empty recipes or circular references have no root node: > testLintMetadata = [ @@ -49,6 +50,10 @@ Empty recipes or circular references have no root node: This recipe’s title is “Pesto”. > , cmpLint "+foobar >Pesto" [] + +Directives and unknown operations are dangling and thus root nodes. + +> , cmpLint "invalid %invalid +foo >bar" [LintResult MoreThanOneRootNode [0,1,3]] > ] Additional key-value metadata for the whole recipe can be provided by adding @@ -291,6 +296,7 @@ Appendix > | UnitNotWellKnown > | UnknownMetadataKey > | InvalidMetadata +> | InvalidNode > deriving (Show, Eq, Ord) > lintTests = [ @@ -306,8 +312,8 @@ Appendix > cmpLint doc expect = doc ~: sort (lint nodes edges) ~?= sort expect > where -> (Right op) = parse ("%pesto-1 " ++ doc) -> nodes = (zip [firstNodeId..] . map snd . operations) op +> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> nodes = zip [firstNodeId..] op > edges = toGraph nodes ++ resolveReferences nodes > test = [ diff --git a/src/Codec/Pesto/Parse.lhs b/src/Codec/Pesto/Parse.lhs index 745d339..7deb511 100644 --- a/src/Codec/Pesto/Parse.lhs +++ b/src/Codec/Pesto/Parse.lhs @@ -12,12 +12,13 @@ Language syntax > , Object(..) > , Approximately(..) > , Amount(..) -> , Recipe(..) > , isResult > , isReference > , isAlternative > , isAnnotation > , isAction +> , isDirective +> , isUnknown > , spaces1 > , notspace > ) where @@ -33,50 +34,21 @@ Language syntax > > import Codec.Pesto.Serialize (serialize) -XXX: magic should be an operation -XXX: this parser should accept invalid operations +Pesto parses UTF-8_ encoded input files into a sequence of operations. -From the XXXsyntactic point of view a Pesto recipe is just a list of -space-delimited operations. It is encoded with UTF-8_ and starts with a magic -identifier (``%pesto-1``) followed by one or more spaces (spaces1_). Every +- stream of operations +- utf8 encoded +Every character within the Unicode whitespace class is considered a space. .. _UTF-8: https://tools.ietf.org/html/rfc3629 - .. _spaces1: -.. _Recipe: -> data Recipe = Recipe { -> version :: Integer -> , operations :: [(SourcePos, Operation)] -> } deriving Show -> -> recipe = Recipe -> <$> magic <* spaces1 -> <*> ((,) <$> getPosition <*> operation) `sepEndBy` spaces1 -> <* eof -> "recipe" -> +> stream = ((,) <$> getPosition <*> operation) `sepEndBy` spaces1 +> "stream" > spaces1 = many1 space -The file identifier consists of the string ``%pesto-`` followed by an integral -number and arbitrary non-space characters. They are reserved for future use and -must be ignored by parsers implementing this version of pesto. A byte order -mark (BOM) must not be used. - -> magic = string "%pesto-" *> int <* skipMany notspace "magic" -> notspace = satisfy (not . isSpace) - -.. _Operation: -.. _Ingredient: -.. _Tool: -.. _Result: -.. _Alternative: -.. _Reference: -.. _Annotation: -.. _Action: - -The following *operations* are supported: +The following operations are supported: > data Operation = > Annotation String @@ -86,6 +58,8 @@ The following *operations* are supported: > | Reference Quantity > | Result Object > | Alternative Object +> | Directive String +> | Unknown String > deriving (Show, Eq) > > operation = @@ -96,6 +70,8 @@ The following *operations* are supported: > <|> try result > <|> try alternative > <|> try reference +> <|> try directive +> <|> try unknown > "operation" The pesto grammar has two kinds of operations: The first one begins with a @@ -129,6 +105,19 @@ whitespace characters and then consumes an object or a quantity. > alternative = oparg '|' (Alternative <$> object) > reference = oparg '*' (Reference <$> quantity) +Additionally there are two special operations. Directives are similar to the +previous operations, but consume a qstr. + +> directive = oparg '%' (Directive <$> qstr) + +Unknown operations are the fallthrough-case and accept anything. They must not +be discarded at this point. The point of accepting anything is to fail as late +as possible while processing Pesto documents. This gives us a chance to print +helpful mesages that provide additional aid to the user who can then fix the +problem. + +> unknown = Unknown <$> many1 notspace + > testOparg = [ > cmpOperation "+100 g flour" (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour"))) @@ -136,6 +125,7 @@ whitespace characters and then consumes an object or a quantity. > , cmpOperation ">dough" (Right (Result "dough")) > , cmpOperation "|trimmings" (Right (Alternative "trimmings")) > , cmpOperation "*fish" (Right (Reference (Quantity (Exact (AmountStr "")) "" "fish"))) +> , cmpOperation3 "% invalid" (Right (Directive "invalid")) "%invalid" > , cmpOperation3 "* \t\n 1 _ cheese" (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) "*1 _ cheese" > ] @@ -155,6 +145,7 @@ A word always starts with a letter, followed by any number of non-space characters. > word = (:) <$> letter <*> many notspace +> notspace = satisfy (not . isSpace) The empty string can be represented by two double quotes or the underscore, but not the empty string itself. @@ -347,7 +338,7 @@ Appendix ++++++++ > int = read <$> many1 digit -> parse = runParser recipe () "" +> parse = runParser stream () "" Test helpers: @@ -394,4 +385,8 @@ Wrap qstr test in AmountStr to aid serialization test > isAnnotation _ = False > isAction (Action _) = True > isAction _ = False +> isDirective (Directive _) = True +> isDirective _ = False +> isUnknown (Unknown _) = True +> isUnknown _ = False diff --git a/src/Codec/Pesto/Parse.lhs-boot b/src/Codec/Pesto/Parse.lhs-boot index 6a6dee9..dab073c 100644 --- a/src/Codec/Pesto/Parse.lhs-boot +++ b/src/Codec/Pesto/Parse.lhs-boot @@ -8,6 +8,8 @@ > | Reference Quantity > | Result Object > | Alternative Object +> | Directive String +> | Unknown String > data Quantity = Quantity Approximately Unit Object > type Unit = String > type Object = String diff --git a/src/Codec/Pesto/Serialize.lhs b/src/Codec/Pesto/Serialize.lhs index 5b3007e..b3cce7c 100644 --- a/src/Codec/Pesto/Serialize.lhs +++ b/src/Codec/Pesto/Serialize.lhs @@ -31,6 +31,8 @@ Finally transform linear stream of operations into a string again: > serialize (Reference q) = '*':serialize q > serialize (Result s) = '>':serializeQstr s > serialize (Alternative s) = '|':serializeQstr s +> serialize (Directive s) = '%':serializeQstr s +> serialize (Unknown s) = s > instance Serializeable Quantity where > serialize (Quantity a b "") = serialize a ++ " " ++ serializeQstr b diff --git a/src/Main.lhs b/src/Main.lhs index 5b37b8e..00123d5 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -4,8 +4,8 @@ User interface .. class:: nodoc > module Main (main) where -> import Codec.Pesto.Parse (parse, Recipe(..)) -> import Codec.Pesto.Graph (toGraph, firstNodeId, resolveReferences) +> import Codec.Pesto.Parse (parse) +> import Codec.Pesto.Graph (extract, toGraph, firstNodeId, resolveReferences) > import Codec.Pesto.Lint (lint) > import Codec.Pesto.Dot (toDot) @@ -23,9 +23,10 @@ add linting information to graph > main = do > s <- getContents -> (flip . either) malformedRecipe (parse s) $ \doc -> do +> (flip . either) malformedRecipe (parse s) $ \stream -> do > let -> nodes = (zip [firstNodeId..] . snd . unzip . operations) doc +> doc = (head . extract . snd . unzip) stream +> nodes = zip [firstNodeId..] doc > edges = toGraph nodes ++ resolveReferences nodes > --print $ lint nodes edges > putStrLn $ toDot nodes edges -- cgit v1.2.3