From b282af35ad4b0bb8d90e517f4b9ff03c22234090 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Wed, 7 Sep 2022 15:07:04 +0200 Subject: Copy-edit specification --- src/lib/Codec/Pesto.lhs | 163 ++++++++++++++++++-------------------- src/lib/Codec/Pesto/Graph.lhs | 96 +++++++++++++--------- src/lib/Codec/Pesto/Lint.lhs | 104 ++++++++++++------------ src/lib/Codec/Pesto/Parse.lhs | 115 +++++++++++++++------------ src/lib/Codec/Pesto/Serialize.lhs | 7 +- 5 files changed, 257 insertions(+), 228 deletions(-) (limited to 'src/lib') diff --git a/src/lib/Codec/Pesto.lhs b/src/lib/Codec/Pesto.lhs index 7a2b615..ba8e332 100644 --- a/src/lib/Codec/Pesto.lhs +++ b/src/lib/Codec/Pesto.lhs @@ -1,15 +1,10 @@ -========================= -Pesto specification draft -========================= +============================ +Pesto language specification +============================ -Pesto is a text-based human-editable and machine-transformable cooking recipe +Pesto is a text-based, human-editable, and machine-transformable cooking recipe interchange format. -.. warning:: - - This specification is work-in-progress and thus neither stable, consistent or - complete. - .. class:: nodoc > module Codec.Pesto where @@ -19,8 +14,8 @@ About this document This section contains various information about this document. The `second section`__ motivates why inventing another file format is necessary, followed -by the goals__ of Pesto. After a short Pesto primer__ intended for the casual -user the language’s syntax__ and semantics__ are presented. The `linting +by the goals__ of Pesto. After a short Pesto primer__ for the casual +user, the language’s syntax__ and semantics__ are presented. The `linting section`__ limits the language to useful cooking recipes. Examples for user presentation of recipes and serialization follow. @@ -31,10 +26,10 @@ __ #language-syntax __ #language-semantics __ #linting -Being a literate program this document is specification and reference -implementation at the same time. The code is written in Haskell_ and uses the +Being a literate program, this document is specification and reference +implementation simultaneously. The code is written in Haskell_ and uses the parsec_ parser combinator library, as well as HUnit_ for unit tests. Even -without knowing Haskell’s syntax you should be able to understand this +without knowing Haskell’s syntax, you should be able to understand this specification. There’s a description above every code snippet explaining what is going on. @@ -52,8 +47,6 @@ interpreted as described in `RFC 2119`_. :License: CC0_ :Website: https://6xq.net/pesto/ :Source code: https://codeberg.org/ldb/pesto -:Contributors: - - `Lars-Dominik Braun `_ .. _CC0: https://creativecommons.org/publicdomain/zero/1.0/ @@ -62,13 +55,12 @@ interpreted as described in `RFC 2119`_. Motivation ---------- -The landscape of recipe interchange formats is quite fragmented. First of all -there’s HTML microdata. `Google rich snippets`_, which are equivalent to the +The landscape of recipe interchange formats is quite fragmented. First, +there’s HTML microdata: `Google rich snippets`_, equivalent to the schema.org_ microdata vocabulary, are widely used by commercial recipe sites. Although the main objective of microdata is to make content machine-readable -most sites will probably use it, because it is considered a search-engine -optimization (SEO). Additionally parsing HTML pulled from the web is a -nightmare and thus not a real option for sharing recipes. h-recipe_ provides a +most sites will probably use it because it is considered a search-engine +optimization (SEO) and not a method for sharing recipes. h-recipe_ provides a second vocabulary that has not been adopted widely yet. .. _Google rich snippets: https://developers.google.com/structured-data/rich-snippets/recipes @@ -77,13 +69,12 @@ second vocabulary that has not been adopted widely yet. .. _formats-by-software: -Most cooking-related software comes with its own recipe file format. Some of -them, due to their age, can be imported by other programs. - -Meal-Master_ is one of these widely supported formats. A huge trove of recipe files -is `available in this format `_. There does -not seem to be any official documentation for the format, but inofficial -`ABNF grammar`_ and `format description `_ +Most cooking-related software comes with custom recipe file formats. Some +of them can be imported by other programs. Meal-Master_ is one of these +widely supported formats. A vast trove of recipe files is `available in +this format `_. There does not seem +to be any official documentation for the format, but inofficial `ABNF +grammar`_ and `format description `_ exist. A Meal-Master recipe template might look like this: .. _MasterCook: http://mastercook.com/ @@ -109,41 +100,41 @@ exist. A Meal-Master recipe template might look like this: ----- Rezkonv_ aims to improve the Mealmaster format by lifting some of its character -limits, adding new syntax and translating it to german. However the +limits, adding new syntax, and translating it to german. However, the specification is available on request only. -A second format some programs can import is MasterCook_’s MXP_ file format, as -well as its XML-based successor MX2. And then there’s a whole bunch of -more-or-less proprietary formats: +A second format some programs can import is MasterCook_’s MXP_ file +format and its XML-based successor, MX2. Beyond that there exist numerous +application-specific, proprietary formats: `Living Cookbook`_ Uses a XML-based format called fdx version 1.1. There’s no specification to be found, but a few `examples `_ - are available and those are dated 2006. + are available, and those are dated 2006. `My CookBook`_ Uses the file extension .mcb. A specification `is available `_. KRecipes_ - Uses its own export format. However there is no documentation whatsoever. + Uses its own export format. However, there is no documentation whatsoever. Gourmet_ The program’s export format suffers from the same problem. The only document available is the `DTD `_. CookML_ Last updated in 2006 (version 1.0.4) for the german-language shareware - program Kalorio has a custom and restrictive licence that requires + program Kalorio has a custom and restrictive license that requires attribution and forbids derivate works. Paprika_ - Cross-platform application, supports its own “emailed recipe format” and a + Cross-platform application which supports its own “emailed recipe format” and a simple YAML-based format. .. _Paprika: https://paprikaapp.com/help/android/#importrecipes .. _xml-formats: -Between 2002 and 2005 a bunch of XML-based exchange formats were created. They -are not tied to a specific software, so none of them seems to be actively used +Between 2002 and 2005, a bunch of XML-based exchange formats were created. They +are not tied to specific software, so none of them seems to be actively used nowadays: RecipeML_ @@ -155,30 +146,30 @@ eatdrinkfeelgood_ anymore. The DTD is licensed under the `CC by-sa`_ license. REML_ Released in 2005 (version 0.5), aims to improve support for commercial uses - (restaurant menus and cookbooks). The XSD’s license permits free use and + (restaurant menus and cookbooks). The XSD’s license allows free use and redistribution, but the reference implementation has no licensing information. `RecipeBook XML`_ - Released 2005 as well and shared unter the terms of `CC by-sa`_ is not - available on the web any more. + Released 2005 as well and shared under the terms of `CC by-sa`_ is not + available on the web anymore. .. _CC by-sa: https://creativecommons.org/licenses/by-sa/2.5/ .. _obscure-formats: Finally, a few non-XML or obscure exchange formats have been created in the past: -YumML_ is an approach similar to those listed above, but based on YAML instead +YumML_ is an approach similar to those listed above but based on YAML instead of XML. The specification has been removed from the web and is available through the Web Archive only. `Cordon Bleu`_ (1999) encodes recipes as programs for a cooking machine and -defines a Pascal-like language. Being so close to real programming languages -Cordon Bleu is barely useable by anyone except programmers. Additionally the -language is poorly-designed, since its syntax is inconsistent and the user is +defines a Pascal-like language. Being so close to real programming languages, +Cordon Bleu is barely usable by anyone except programmers. Additionally, the +language is poorly-designed since its syntax is inconsistent, and the user is limited to a set of predefined functions. -Finally there is RxOL_, created in 1985. It constructs a graph from recipes -written down with a few operators and postfix notation. It does not separate +Finally, there is RxOL_, created in 1985. It constructs a graph from recipes +written down with a few operators and postfix notation, and does not separate ingredients and cooking instructions like every other syntax introduced before. Although Pesto is not a direct descendant of RxOL both share many ideas. @@ -211,31 +202,33 @@ microformats.org_ has a similar list of recipe interchange formats. Goals ----- -First of all recipes are written *by* humans *for* humans. Thus a -human-readable recipe interchange format is not enough. The recipes need to be -human-editable without guidance like a GUI or assistant. That’s why, for -instance, XML is not suitable and the interchange formats listed `above -`_ have largely failed to gain traction. XML, even though simple -itself, is still too complicated for the ordinary user. Instead a format needs -to be as simple as possible, with as little markup as possible. A human editor -must be able to remember the entire syntax. This works best if the file -contents “make sense”. A good example for this is Markdown_. +First of all, recipes are written *by* humans *for* humans. Thus a +human-readable recipe interchange format is not enough. The recipes need +to be human-editable without guidance like a GUI or assistant. That’s +why, for instance, XML is unsuitable, and the interchange formats listed +`above `_ have largely failed to gain traction. Even though +simple, XML is still too complicated for the ordinary user. Instead, +a format needs to be as simple as possible, with as little markup as +possible. A human editor must be able to remember the entire syntax. This +works best if the file contents “make sense.” An excellent example +of this is Markdown_. .. _Markdown: https://daringfireball.net/projects/markdown/syntax -We also have to acknowledge that machines play an important role in our daily -life. They can help us, the users, accomplish our goals if they are able to -understand the recipes as well. Thus they too need to be able to read and write -recipes. Again, designing a machine-readable format is not enough. Recipes must -be machine-transformable. A computer program should be able to create a new -recipe from two existing ones, look up the ingredients and tell us how many -joules one piece of that cake will have. And so on. +We also must acknowledge that machines play an important role in our +daily lives. They can help us, the users, accomplish our goals if they +can also understand the recipes. Thus they, too, need to be able to +read and write recipes. Again, designing a machine-readable format is +not enough. Recipes must be machine-transformable. A computer program +should be able to create a new recipe from two existing ones, look up +the ingredients and tell us how many joules one piece of that cake will +have. And so on. That being said, Pesto does not aim to carry additional information about -ingredients or recipes itself. Nutrition data for each ingredient should be -maintained in a separate database. Due to its minimal syntax Pesto is also not -suitable for extensive guides on cooking or the usual chitchat found in cooking -books. +ingredients or recipes themselves. Nutrition data for each ingredient +should be maintained in a separate database. Due to its minimal syntax, +Pesto is also not suitable for extensive guides on cooking or the usual +chitchat found in cooking books. .. _introduction-by-example: @@ -244,33 +237,33 @@ Introduction by example .. code:: - So let’s start by introducing Pesto by example. This text does not belong - to the recipe and is ignored by any software. The following line starts the - recipe: + So let’s start by introducing Pesto by example. This text does not belong + to the recipe and is ignored by any software. The following line starts the + recipe: - %pesto + %pesto - &pot - +1 l water - +salt - [boil] + &pot + +1 l water + +salt + [boil] - +100 g penne - &10 min - [cook] + +100 g penne + &10 min + [cook] - >1 serving pasta - (language: en) + >1 serving pasta + (language: en) And that’s how you make pasta: Boil one liter of water in a pot with a little -bit of salt. Then add 100 g penne, cook them for ten minutes and you get one +bit of salt. Then add 100 g penne, cook them for ten minutes, and you get one serving pasta. That’s all. There’s more syntax available to express alternatives (either penne or -tagliatelle), ranges (1–2 l water or approximately 1 liter water) and metadata. -But now you can have a first peek at `my own recipe collection`_. +tagliatelle), ranges (1–2 l water or approximately 1 liter water), and metadata. +But now you can have a first peek at `my recipe collection`_. -.. _my own recipe collection: https://codeberg.org/ldb/rezepte +.. _my recipe collection: https://codeberg.org/ldb/rezepte .. include:: Pesto/Parse.lhs .. include:: Pesto/Graph.lhs @@ -280,7 +273,7 @@ But now you can have a first peek at `my own recipe collection`_. Using this project ------------------ -This project uses cabal. It provides the Codec.Pesto library that implements +This project uses cabal. It provides the ``Codec.Pesto`` library that implements the Pesto language as described in the previous sections. It also comes with three binaries. diff --git a/src/lib/Codec/Pesto/Graph.lhs b/src/lib/Codec/Pesto/Graph.lhs index a9d42a7..43142b6 100644 --- a/src/lib/Codec/Pesto/Graph.lhs +++ b/src/lib/Codec/Pesto/Graph.lhs @@ -45,7 +45,7 @@ both directives. > (between, next) = break isEnd stream > extract (_:xs) = extract xs -Start and end directive are removed from the extracted instructions. The +Start and end directives are removed from the extracted instructions. The directive “buonappetito” is optional at the end of a stream. > testExtract = [ @@ -63,62 +63,80 @@ Instructions surrounding the start and end directive are removed. The stream may contain multiple recipes. The start directive also ends the previous recipe and starts a new one. -> , extract [startDirective, Action "pour", endDirective, Action "foobar", startDirective, Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]] -> , extract [startDirective, Action "heat", startDirective, Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]] -> , extract [startDirective, Annotation "foobar", startDirective, endDirective] ~?= [[Annotation "foobar"], []] +> , extract [ +> startDirective +> , Action "pour" +> , endDirective +> , Action "foobar" +> , startDirective +> , Annotation "something"] +> ~?= [[Action "pour"], [Annotation "something"]] +> , extract [ +> startDirective +> , Action "heat" +> , startDirective +> , Annotation "something"] +> ~?= [[Action "heat"], [Annotation "something"]] +> , extract [ +> startDirective +> , Annotation "foobar" +> , startDirective +> , endDirective] +> ~?= [[Annotation "foobar"], []] > ] Each recipe’s stream of instructions drives a stack-based machine that transforms it into a directed graph. Think of the stack as your kitchen’s workspace that is used to prepare the food’s components. You can add new ingredients, perform -actions on them, put them aside and add them again. +actions on them, put them aside, and add them again. -This function processes a list of nodes, that is instructions uniquely identified -by an integer and returns the edges of the directed graph as a list of tuples. +This function processes a list of nodes, that is, instructions uniquely identified +by an integer, and returns the edges of the directed graph as a list of tuples. +> toGraph :: Nodes Instruction -> Edges > toGraph nodes = third $ foldl f (Nothing, [[]], []) nodes > where -Ingredients are simply added to the current workspace. They should for example +Ingredients are simply added to the current workspace. They should, for example, appear on the shopping list. > f ctx (i, Ingredient _) = addToStack ctx i -The same happens for for tools. However they are not part of the final product, -but used in the process of making it. For instance they do not appear on the -shopping list. `Time is a tool <#time-is-a-tool>`_. +The same happens for tools. However, they are not part of the final product +but are used in the process of making it. Thus, they do not appear on the +shopping list. `Time is considered a tool <#time-is-a-tool>`_. > f ctx (i, Tool _) = addToStack ctx i Actions take all ingredients and tools currently on the workspace, perform some -action with them and put the product back onto the workspace. +action with them, and put the product back onto the workspace. > f (_, stack:sx, edges) (i, Action _) = (Just i, [i]:stack:sx, edgesTo i stack ++ edges) > f (_, [], _) (_, Action _) = undefined -- never reached Results add a label to the current workspace’s contents and move them out of -the way. It should be a meaningful name, not just A and B obviously. -Consecutive Results add different labels to the same workspace. That’s useful +the way. It should be a meaningful name, not just A and B. +Consecutive results add different labels to the same workspace. That’s useful when an action yields multiple results at once that are processed in different ways. > f ctx (i, Result _) = consumeStack ctx i -Alternatives too add a label to the current workspace’s content, but they pick -one of things on the workspace and throw everything else away. This allows -adding optional or equivalent ingredients to a recipe (i.e. margarine or butter). +Alternatives, too, add a label to the current workspace’s content, but they pick +one of the things on the workspace and throw everything else away. This allows +adding optional or equivalent ingredients to a recipe (i.e., margarine or butter). > f ctx (i, Alternative _) = consumeStack ctx i References are similar to ingredients. They are used to add items from a -workspace labeled with Result or Alternative. More on that `in the next section +workspace labeled with ``Result`` or ``Alternative``. More on that `in the next section <#references>`_. > f ctx (i, Reference _) = addToStack ctx i Annotations add a description to any of the previous instructions. They can be used to provide more information about ingredients (so “hot water” becomes -``+water (hot)``, tools (``&oven (200 °C)``) or actions (``[cook] +``+water (hot)``, tools (``&oven (200 °C)``), or actions (``[cook] (until brown)``). > f ctx@(Nothing, _, _) (_, Annotation _) = ctx @@ -141,14 +159,14 @@ These are helper functions: > in (Just i, []:top:sx, edgesTo i top ++ edges) > edgesTo i = map (\x -> (x, i)) -Here are a few example of how this stack-machine works. Each edge is a tuple of +Here are a few examples of how this stack-machine works. Each edge is a tuple of two integer numbers. These are the nodes it connects, starting with zero. -Ingredient, Tool and Reference itself do not create any edges: +``Ingredient``, ``Tool``, and ``Reference`` itself do not create any edges: > testGraph = [ > cmpGraph "+ketchup &spoon *foobar" [] -But Action, Alternative and Result do in combination with them: +But ``Action``, ``Alternative`` and ``Result`` do in combination with them: > , cmpGraph "+foobar [barbaz]" [(0, 1)] > , cmpGraph "+foobar |barbaz" [(0, 1)] @@ -158,11 +176,12 @@ But Action, Alternative and Result do in combination with them: > , cmpGraph "+foobar [barbaz] +foobar >barbaz" [(0, 1), (1, 3), (2, 3)] > , cmpGraph "&foobar [barbaz] [C] >D" [(0, 1), (1, 2), (2, 3)] -If the stack is empty, i.e. it was cleared by a Result or Alternative +If the stack is empty, i.e. it was cleared by a ``Result`` or ``Alternative`` instruction, consecutive results or alternatives operate on the *previous*, non-empty stack. > , cmpGraph "+foobar >barbaz >C" [(0, 1), (0, 2)] +> , cmpGraph "+foobar >barbaz >C >D" [(0, 1), (0, 2), (0, 3)] > , cmpGraph "+foobar |barbaz |C" [(0, 1), (0, 2)] > , cmpGraph "+foobar >barbaz |C" [(0, 1), (0, 2)] @@ -173,7 +192,7 @@ Unless that stack too is empty. Then they do nothing: > , cmpGraph "(foobar) (foobar)" [] > , cmpGraph "[foobar]" [] -The Annotation instruction always creates an edge to the most-recently processed +The ``Annotation`` instruction always creates an edge to the most-recently processed node that was not an annotation. Thus two consecutive annotations create edges to the same node. @@ -195,30 +214,33 @@ Unknown directives or instructions are never connected to other nodes. References ++++++++++ -Results and alternatives can be referenced with the Reference instruction. -Resolving these references does not happen while buiding the graph, but -afterwards. This allows referencing an a result or alternative before its -definition with regard to the their processing order. +Results and alternatives can be referenced with the ``Reference`` instruction. +Resolving these references does not happen while building the graph but +afterward. This allows referencing a result or alternative before its +definition with regard to their processing order. -Resolving references is fairly simple: For every reference its object name a -case-insensitive looked is performed in a table containing all results and -alternatives. If it succeeds an edge from every result and alternative returned +Resolving references is fairly simple: For every reference’s object name, a +case-insensitive lookup is performed in a table containing all results and +alternatives. If it succeeds, an edge from every result and alternative returned to the reference in question is created. +> resolveReferences :: Nodes Instruction -> Edges > resolveReferences nodes = foldl f [] nodes > where -> f edges (i, ref@(Reference _)) = map (\x -> (x, i)) (findTarget nodes ref) ++ edges +> f edges (i, ref@(Reference _)) = map (\x -> (x, i)) (findTargets nodes ref) ++ edges > f edges _ = edges -> findTarget nodes (Reference (Quantity _ _ a)) = map fst $ filter (isTarget a) nodes +> findTargets :: Nodes Instruction -> Instruction -> [NodeId] +> findTargets nodes (Reference (Quantity _ _ a)) = map fst $ filter (isTarget a) nodes > where > lc = map toLower > isTarget dest (_, Result (Quantity _ _ x)) = lc x == lc dest -> isTarget dest (_, Alternative (Quantity _ _ x)) = lc x == lc dest +> isTarget dest (_, Alternative (Quantity _ _ x)) = lc x == lc dest > isTarget _ _ = False -> findTarget _ _ = [] +> findTargets _ _ = [] -References works before or after the result instruction. +References are position-independent and can be used before or after the +result instruction they are referencing. > testRef = [ > cmpGraphRef ">foobar *foobar" [(0, 1)] @@ -263,7 +285,7 @@ Appendix Find graph’s root node(s), that is a node without outgoing edges: > walkRoot nodes edges = let out = nub $ map fst edges -> in filter (\(x, _) -> notElem x out) nodes +> in filter (\(x, _) -> notElem x out) nodes Get all nodes with edges pointing towards nodeid diff --git a/src/lib/Codec/Pesto/Lint.lhs b/src/lib/Codec/Pesto/Lint.lhs index 58f9ab0..3ecdfa1 100644 --- a/src/lib/Codec/Pesto/Lint.lhs +++ b/src/lib/Codec/Pesto/Lint.lhs @@ -25,11 +25,11 @@ Linting Not every graph generated in the previous section is a useful recipe. Some instruction sequences just do not make sense. The tests in this section can detect those. Failing any of them does not render a stream of instructions or -graph invalid. They just does not describe a *useful* recipe. Thus -implementations must not generate or export such documents. However they should +graph invalid. They just do not describe a *useful* recipe. Thus +implementations must not generate or export such documents. However, they should accept input that fails any of the tests and warn the user about the failure. -Additionally this section provides guidance on how to use the instructions +Additionally, this section provides guidance on how to use the instructions provided by the Pesto language properly. Graph properties @@ -37,9 +37,9 @@ Graph properties .. _resultsused: -The graph must have exactly one root node (i.e. a node with incoming edges +The graph must have exactly one root node (i.e., a node with incoming edges only). This also requires all results and alternatives to be referenced -somewhere. Directives are either consumed when parsing, generating a graph and +somewhere. Directives are either consumed when parsing, generating a graph, and linting. Otherwise they are dangling as well. Unknown instructions are always dangling. @@ -54,15 +54,15 @@ Empty recipes or circular references have no root node: > testConnectivity = [ > cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []] > , cmpLint "*foobar >foobar" -> [LintResult NoRootNode [], LintResult CircularLoop [0, 1], LintResult NoMetadata []] +> [LintResult NoRootNode [], LintResult CircularLoop [0, 1], LintResult NoMetadata []] > , cmpLint "+foobar" -> [LintResult NonResultRootNode [0], LintResult NoMetadata []] +> [LintResult NonResultRootNode [0], LintResult NoMetadata []] Directives and unknown instructions are dangling and thus root nodes. > , cmpLint "invalid %invalid +foo >bar" > [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []] -> ] +> ] Metadata ++++++++ @@ -75,11 +75,11 @@ title (object) of the recipe. > Just $ (i, ("title", MetaStr title)) > :(i, ("yield", MetaQty q)) > :foldl f [] (incomingNodes nodes edges i) -> _ -> Nothing +> _ -> Nothing > where Additional key-value metadata for the whole recipe can be added as annotations -to the root node. If multiple annotations with the same key exist the key maps +to the root node. If multiple annotations with the same key exist, the key maps to a list of those values. Annotations that are unparseable key-value pairs are added as recipe description instead. @@ -103,8 +103,8 @@ colon char. A value may be empty. > checkKey xs (_, (k, _)) | isKeyKnown k = xs > checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs -Valid metadata keys are listed below. Additionally applications may add keys by -prefixing them with “x-myapp-”, thus an application called “basil” adding +Valid metadata keys are listed below. Additionally, applications may add keys by +prefixing them with “x-myapp-”. Thus an application called “basil” adding “some-key” would use the full key “x-basil-some-key”. > isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k @@ -115,7 +115,7 @@ The following metadata keys are permitted: The title, description and yield are implicit. -> "title" +> "title" > , "description" > , "yield" @@ -137,11 +137,11 @@ An image can be a relative file reference or URI For instance a german language recipe for one person would look like this: > testMetadata = [ -> cmpLintMeta "+foo >1 ml foobar (language: de) (x-app-key: value)" -> [] -> (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "ml" "foobar"))) -> , (2, ("language", MetaStr "de")) +> cmpLintMeta "+foo >1 _ foobar (language: de) (x-app-key: value)" +> [] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "" "foobar"))) +> , (2, ("language", MetaStr "de")) > , (3, ("x-app-key", MetaStr "value"))]) Unparseable annotations or unknown keys are linting errors: @@ -149,7 +149,7 @@ Unparseable annotations or unknown keys are linting errors: > , cmpLintMeta "+foo >foobar (unknown-key: value)" > [LintResult UnknownMetadataKey [2]] > (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (strQuantity "foobar"))) +> , (1, ("yield", MetaQty (strQuantity "foobar"))) > , (2, ("unknown-key", MetaStr "value"))]) Root node annotations not containing a parseable key-value pair are assigned @@ -169,7 +169,7 @@ the key “description”. Time is a tool ++++++++++++++ -By definition time is a tool and not an ingredient. +By definition, time is a tool and not an ingredient. > timeUnits = ["s", "min", "h", "d"] > @@ -179,7 +179,7 @@ By definition time is a tool and not an ingredient. > timeIsATool nodes _ = foldl f [] nodes > where > f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintQuantity = [ > cmpLint "+10 min >foo" [LintResult TimeIsATool [0]] @@ -189,9 +189,9 @@ By definition time is a tool and not an ingredient. > , cmpLint "&10 min [bar] >foo" [] > ] -Only actions can be annotated with a time. It can be used to indicate how long -a certain action is *expected* to take (i.e. peeling potatoes takes two -minutes) or how long the action is supposed to be executed (i.e. cook five +Only actions can be annotated like this. It can be used to indicate how long +a particular action is *expected* to take (i.e., peeling potatoes takes two +minutes) or how long the action is supposed to be executed (i.e. cook for five minutes). More time annotations improve the software’s scheduling capabilities. > timeAnnotatesAction nodes edges = foldl f [] nodes @@ -201,7 +201,7 @@ minutes). More time annotations improve the software’s scheduling capabilities > toNodelist = (!!) nodes . snd > allActions = all (isAction . snd . toNodelist) -For example “cook 10 minutes” can be expressed with +For example, “cook 10 minutes” can be expressed with: > testTimeAnnotatesAction = [ > cmpLint "&10 min [cook] >soup" [] @@ -216,10 +216,10 @@ For example “cook 10 minutes” can be expressed with Well-known units ++++++++++++++++ -Units can be an arbitrary strings, but implementations should recognize the -common metric units g (gram), l (litre) and m (metre). One of these prefixes -may be used with each of them: m (milli-), c (centi-), d (dezi-) and k (kilo-). -Additionally time in s (second), min (minute), h (hour), d (day) should be +Units can be arbitrary strings, but implementations should recognize the +standard metric units g (gram), l (liter), and m (meter). One of these prefixes +may be used with each of them: m (milli-), c (centi-), d (dezi-), and k (kilo-). +Additionally, time in s (second), min (minute), h (hour), and d (day) should be accepted. > wellKnownUnit nodes _ = foldl f [] nodes @@ -232,7 +232,7 @@ accepted. > extractQty _ = Nothing > f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) = > LintResult UnitNotWellKnown [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > known (Quantity _ unit _) = unit `elem` knownUnits > knownUnits = [ > "" @@ -241,9 +241,9 @@ accepted. > , "cm", "dm", "m" > ] ++ timeUnits -Usage of imperial units (inch, pound, …) as well as non-standard -units like “teaspoon”, “cup” or similar is discouraged because -the former is used by just three countries in the world right now and +Usage of imperial units (inch, pound, …), non-standard +units like “teaspoon,” “cup,” or similar is discouraged because +the former is used by just three countries in the world right now, and the latter is language- and country-dependent. The implementation may provide the user with a conversion utility. @@ -268,14 +268,14 @@ The unit is case-sensitive, thus References ++++++++++ -All references must be resolved. An `earlier check <#resultsused>`_ makes sure +All references must be resolved. An `earlier check <#resultsused>`_ ensures all results and alternatives are referenced at some point. > referencesResolved nodes edges = foldl f [] nodes > where > f xs (nodeid, Reference _) | null (incomingEdges edges nodeid) = > LintResult UndefinedReference [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintRefs = [ > cmpLint "*foobar >foobar >barbaz" [LintResult CircularLoop [0, 1]] @@ -283,8 +283,8 @@ all results and alternatives are referenced at some point. > ] Results and alternatives must not have duplicate names, so collect -their lower-case object names into map and flag those, which reference -multiple nodes. +their lower-case object names into a ``Map`` and flag those which +reference multiple nodes. > uniqueNames nodes _ = M.foldl f [] nameMap > where @@ -312,7 +312,7 @@ only occur at the beginning of a recipe. > where > f xs (nodeid, Result _) | null (incomingEdges edges nodeid) = > LintResult TooFewChildren [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintResultNonempty = [ > cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]] @@ -325,9 +325,9 @@ make the alternative pointless. > twoAlternatives nodes edges = foldl f [] nodes > where -> f xs (nodeid, Alternative _) | length (incomingEdges edges nodeid) < 2 = +> f xs (nodeid, Alternative _) | length (incomingEdges edges nodeid) < 2 = > LintResult TooFewChildren [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintTwoAlternatives = [ > cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]] @@ -335,9 +335,11 @@ make the alternative pointless. > , cmpLint "+A &B |foo *foo >bar" [] > ] -References cannot loop, because, well, you cannot cook something and +.. _reject-loops: + +References cannot loop because, well, you cannot cook something and use an ingredient you have not made yet. It is possible to branch out -and merge again though if an ingredient is split into multiple parts +and merge again if an ingredient is split into multiple parts and added to different outputs. > circularLoops nodes edges = map (LintResult CircularLoop) circles @@ -376,11 +378,11 @@ This limitation is not enforced for ranges containing strings. > rangeFromLargerThanTo nodes _ = foldl f [] nodes > where -> f xs (nodeid, Ingredient q) | not $ rangeOk q = +> f xs (nodeid, Ingredient q) | not $ rangeOk q = > LintResult RangeFromLargerThanTo [nodeid]:xs -> f xs (nodeid, Reference q) | not $ rangeOk q = +> f xs (nodeid, Reference q) | not $ rangeOk q = > LintResult RangeFromLargerThanTo [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b > rangeOk _ = True @@ -404,9 +406,9 @@ Appendix > | DuplicateReferenceName > | CircularLoop > | TooFewChildren -> | TimeIsATool +> | TimeIsATool > | TimeAnnotatesAction -> | UnitNotWellKnown +> | UnitNotWellKnown > | InvalidNode > | RangeFromLargerThanTo > | NoMetadata @@ -419,11 +421,11 @@ Every lint test checks a single aspect of the graph. > lintTests = [ > rootIsResult -> , referencesResolved +> , referencesResolved > , uniqueNames > , circularLoops -> , resultNonempty -> , twoAlternatives +> , resultNonempty +> , twoAlternatives > , timeIsATool > , timeAnnotatesAction > , wellKnownUnit @@ -432,7 +434,7 @@ Every lint test checks a single aspect of the graph. > ] > withGraph doc f = f nodes edges -> where +> where > (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) > nodes = zip [firstNodeId..] op > edges = toGraph nodes ++ resolveReferences nodes diff --git a/src/lib/Codec/Pesto/Parse.lhs b/src/lib/Codec/Pesto/Parse.lhs index ef9a908..762fff4 100644 --- a/src/lib/Codec/Pesto/Parse.lhs +++ b/src/lib/Codec/Pesto/Parse.lhs @@ -34,7 +34,7 @@ Language syntax > import Codec.Pesto.Serialize (serialize) Pesto parses `UTF-8 `_ encoded input data -consisting of space-delimited instructions. Every character within the Unicode +consisting of space-delimited token. Every character within the Unicode whitespace class is considered a space. .. _spaces1: @@ -88,28 +88,30 @@ Here are examples for both: > testOpterm = [cmpInstruction "(skinless\nboneless)" (Right (Annotation "skinless\nboneless")) > , cmpInstruction "[stir together]" (Right (Action "stir together")) -> , cmpInstruction "[stir\\]together]" (Right (Action "stir]together"))] +> , cmpInstruction "[stir\\]together]" (Right (Action "stir]together")) +> , cmpInstruction "[stir [together]" (Right (Action "stir [together"))] The second one starts with one identifying character, ignores the following -whitespace characters and then consumes an object or a quantity. +whitespace characters, and then consumes a ``Quantity``. > oparg :: Char -> Parsec String () Instruction -> Parsec String () Instruction > oparg ident cont = char ident *> spaces *> cont +> > ingredient = oparg '+' (Ingredient <$> quantity) > tool = oparg '&' (Tool <$> quantity) > result = oparg '>' (Result <$> quantity) > alternative = oparg '|' (Alternative <$> quantity) > reference = oparg '*' (Reference <$> quantity) -Additionally there are two special instructions. Directives are similar to the -previous instructions, but consume a qstr. +Additionally, there are two special instructions. Directives are similar to the +previous instructions but consume a quoted string (``qstr``). > directive = oparg '%' (Directive <$> qstr) Unknown instructions are the fallthrough-case and accept anything. They must not be discarded at this point. The point of accepting anything is to fail as late as possible while processing input. This gives the parser a chance to -print helpful mesages that provide additional aid to the user who can then fix +print helpful messages that provide additional aid to the user, who can then fix the problem. > unknown = Unknown <$> many1 notspace @@ -129,15 +131,16 @@ Below are examples for these instructions: > , cmpInstruction3 "* \t\n 1 _ cheese" > (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) > "*1 _ cheese" +> , cmpInstruction3 "!invalid" (Right (Unknown "!invalid")) "!invalid" > ] Qstr ++++ Before introducing quantities we need to have a look at qstr, which is used by -them. A qstr, short for quoted string, can be – you guessed it already – a -string enclosed in double quotes, a single word or the underscore character -that represents the empty string. +them. A qstr, short for quoted string, can be a string enclosed in double +quotes, a single word or the underscore character that represents the +empty string. > qstr = try (betweenEscaped '"' '"') > <|> word @@ -157,11 +160,11 @@ not the empty string itself. > , cmpQstr "_" (Right "") > , cmpQstr "" parseError -Any Unicode character with a General_Category major class L (i.e. a letter, see +Any Unicode character with a General_Category major class L (i.e., a letter, see `Unicode standard section 4.5 `_ for example) is -accected as first character of a word. That includes german umlauts as well as -greek or arabic script. Numbers, separators, punctuation and others are not +accepted as first character of a word. That includes german umlauts as well as +greek or arabic script. Numbers, separators, punctuation, and others are not permitted. > , cmpQstr "water" (Right "water") @@ -187,7 +190,7 @@ numbers, … > , cmpQstr "sour\tcream" parseError > , cmpQstr "white\nwine" parseError -If a string contains spaces or starts with a special character it must be +If a string contains spaces or starts with a special character, it must be enclosed in double quotes. > , cmpQstr3 "\"salt\"" (Right "salt") "salt" @@ -196,7 +199,7 @@ enclosed in double quotes. > , cmpQstr "\"1sugar\"" (Right "1sugar") > , cmpQstr "\"chicken\tbreast\nmeat\"" (Right "chicken\tbreast\nmeat") -Double quotes within a string can be quoted by prepending a backslash. However +Doublequotes within a string can be quoted by prepending a backslash. However, the usual escape codes like \\n, \\t, … will *not* be expanded. > , cmpQstr "\"vine\"gar\"" parseError @@ -204,21 +207,20 @@ the usual escape codes like \\n, \\t, … will *not* be expanded. > , cmpQstr "\"oli\\ve oil\"" (Right "oli\\ve oil") > , cmpQstr "\"oli\\\\\"ve oil\"" (Right "oli\\\"ve oil") > , cmpQstr3 "\"sal\\tmon\"" (Right "sal\\tmon") "sal\\tmon" -> ] +> ] Quantity ++++++++ -The instructions Ingredient, Tool and Reference accept a *quantity*, that is a -triple of Approximately, Unit and Object as parameter. +A ``Quantity`` is a triple of ``Approximately``, ``Unit`` and ``Object`` as parameter. > data Quantity = Quantity Approximately Unit Object deriving (Show, Eq) -The syntactic construct is overloaded and accepts one to three arguments. If -just one is given it is assumed to be the Object and Approximately and Unit are -empty. Two arguments set Approximately and Unit, which is convenient when the -unit implies the object (minutes usually refer to the object time, for -example). +The syntactic construct is overloaded and accepts one to three +arguments. If just one is given, it is assumed to be the ``Object`` +and ``Approximately`` and ``Unit`` are empty. Two arguments set +``Approximately`` and ``Unit``, which is convenient when the unit implies +the object (minutes usually refer to the object time, for example). > quantity = try quantityA <|> quantityB @@ -243,13 +245,13 @@ The first two are equivalent to > , cmpQuantity3 "_ _ oven" (exactQuantity (AmountStr "") "" "oven") "oven" > , cmpQuantity3 "10 min _" (exactQuantity (AmountRatio (10%1)) "min" "") "10 min" -Missing units must not be ommited. The version with underscore should be prefered. +Missing units must not be omitted. The version with underscore should be preferred. -> , cmpQuantity3 "1 \"\" meal" (exactQuantity (AmountRatio (1%1)) "" "meal") "1 _ meal" -> , cmpQuantity "1 _ meal" (exactQuantity (AmountRatio (1%1)) "" "meal") -> ] +> , cmpQuantity3 "1 \"\" meal" (exactQuantity (AmountRatio (1%1)) "" "meal") "1 _ meal" +> , cmpQuantity "1 _ meal" (exactQuantity (AmountRatio (1%1)) "" "meal") +> ] -Units and objects are just strings. However units should be limited to +Units and objects are just strings. However, units should be limited to `well-known metric units <#well-known-units>`_. > type Unit = String @@ -258,8 +260,8 @@ Units and objects are just strings. However units should be limited to > type Object = String > object = qstr -Approximately is a wrapper for ranges, that is two amounts separated by a dash, -approximate amounts, prepended with a tilde and exact amounts without modifier. +``Approximately`` is a wrapper for ranges, that is, two amounts separated by a dash, +approximate amounts, prepended with a tilde, and exact amounts without a modifier. > data Approximately = > Range Amount Amount @@ -279,13 +281,12 @@ approximate amounts, prepended with a tilde and exact amounts without modifier. > , cmpQuantity "1 -2 _ bananas" parseError > , cmpQuantity "~2 _ bananas" (Right (Quantity (Approx (AmountRatio (2%1))) "" "bananas")) > , cmpQuantity "~ 2 _ bananas" parseError - -> ] +> ] Amounts are limited to rational numbers and strings. There are no real numbers -by design and implementations should avoid representing rational numbers as -IEEE float. They are not required and introduce ugly corner cases when -rounding while converting units for example. +by design, and implementations should avoid representing rational numbers as +floating point numbers. They are not required and introduce ugly corner cases when +rounding while converting units, for example. > data Amount = > AmountRatio Rational @@ -300,9 +301,9 @@ rounding while converting units for example. > , cmpQuantity "~\"the stars in your eyes\" _ bananas" (Right (Quantity (Approx (AmountStr "the stars in your eyes")) "" "bananas")) > ] -Rational numbers can be an integral, numerator and denominator, each separated +Rational numbers can be an integral, numerator, and denominator, each separated by a forward slash, just the numerator and denominator, again separated by a -forward slash or just a numerator with the default denominator 1 (i.e. ordinary +forward slash, or just a numerator with the default denominator 1 (i.e., ordinary integral number). > ratio = let toRatio i num denom = AmountRatio ((i*denom+num)%denom) in @@ -310,36 +311,43 @@ integral number). > <|> try (toRatio <$> return 0 <*> int <*> (char '/' *> int)) > <|> try (toRatio <$> return 0 <*> int <*> return 1) -These are all equal. +The following representations are all equal with the first one being +the preferred one: > testQuantityRatio = [ > cmpQuantity "3 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") -> , cmpQuantity3 "3/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" -> , cmpQuantity3 "3/0/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" +> , cmpQuantity3 "3/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") +> "3 _ bananas" +> , cmpQuantity3 "3/0/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") +> "3 _ bananas" -XXtwo is num and denom +Two numbers are numerator and denominator: > , cmpQuantity "3/5 _ bananas" (exactQuantity (AmountRatio (3%5)) "" "bananas") -three is int, num and denom +Three numbers add an integral part: > , cmpQuantity "3/5/7 _ bananas" (exactQuantity (AmountRatio ((3*7+5)%7)) "" "bananas") +> , cmpQuantity3 "10/3 _ bananas" (exactQuantity (AmountRatio (10%3)) "" "bananas") +> "3/1/3 _ bananas" -> , cmpQuantity3 "10/3 _ bananas" (exactQuantity (AmountRatio (10%3)) "" "bananas") "3/1/3 _ bananas" - -Can be used with ranges and approximate too. and mixed with strings - -> , cmpQuantity "1-\"a few\" _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountStr "a few")) "" "bananas")) -> , cmpQuantity "1/1/2-2 _ bananas" (Right (Quantity (Range (AmountRatio (3%2)) (AmountRatio (4%2))) "" "bananas")) -> , cmpQuantity "~1/1/2 _ bananas" (Right (Quantity (Approx (AmountRatio (3%2))) "" "bananas")) +Rational numbers can be used in ranges and mixed with strings too. +> , cmpQuantity "1-\"a few\" _ bananas" (Right (Quantity +> (Range (AmountRatio (1%1)) (AmountStr "a few")) "" "bananas")) +> , cmpQuantity "1/1/2-2 _ bananas" (Right (Quantity +> (Range (AmountRatio (3%2)) (AmountRatio (4%2))) "" "bananas")) +> , cmpQuantity "~1/1/2 _ bananas" (Right (Quantity +> (Approx (AmountRatio (3%2))) "" "bananas")) > ] Appendix ++++++++ -> int = read <$> many1 digit +Parser main entry point. + > parse = runParser stream () "" +> int = read <$> many1 digit Test helpers: @@ -378,9 +386,12 @@ Wrap qstr test in AmountStr to aid serialization test > strQuantity = Quantity (Exact (AmountStr "")) "" > test = [ -> "quantity" ~: testQuantityOverloaded ++ testQuantityApprox ++ testQuantityAmount ++ testQuantityRatio -> , "qstr" ~: testQstr -> , "oparg" ~: testOparg +> "quantity" ~: testQuantityOverloaded +> ++ testQuantityApprox +> ++ testQuantityAmount +> ++ testQuantityRatio +> , "qstr" ~: testQstr +> , "oparg" ~: testOparg > , "opterm" ~: testOpterm > ] diff --git a/src/lib/Codec/Pesto/Serialize.lhs b/src/lib/Codec/Pesto/Serialize.lhs index 4a74b88..17a0fcc 100644 --- a/src/lib/Codec/Pesto/Serialize.lhs +++ b/src/lib/Codec/Pesto/Serialize.lhs @@ -9,11 +9,12 @@ Serializing > > import {-# SOURCE #-} Codec.Pesto.Parse +Serialization turns a linear list of instructions back into a human +representation. + > class Serializeable a where > serialize :: a -> String - -Finally transform linear stream of instructions into a string again: - +> > instance Serializeable a => Serializeable [a] where > serialize ops = unlines $ map serialize ops -- cgit v1.2.3