From 295dd897297722d07ec2ce5fb82e323fe495c775 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Mon, 24 Aug 2020 17:09:34 +0200 Subject: GHC 8.8, cabal 3, pandoc 2.10 compatibility Move files around to separate Pesto (the library) and Pesto (the executables). Fixes for pandoc API changes. --- src/Codec/Pesto.lhs | 286 ------------------------ src/Codec/Pesto/Graph.lhs | 276 ----------------------- src/Codec/Pesto/Lint.lhs | 433 ------------------------------------- src/Codec/Pesto/Parse.lhs | 403 ---------------------------------- src/Codec/Pesto/Parse.lhs-boot | 22 -- src/Codec/Pesto/Serialize.lhs | 70 ------ src/Doc.lhs | 61 ------ src/Main.lhs | 107 --------- src/Test.lhs | 22 -- src/exe/Doc.lhs | 66 ++++++ src/exe/Main.lhs | 106 +++++++++ src/exe/Test.lhs | 22 ++ src/lib/Codec/Pesto.lhs | 286 ++++++++++++++++++++++++ src/lib/Codec/Pesto/Graph.lhs | 276 +++++++++++++++++++++++ src/lib/Codec/Pesto/Lint.lhs | 432 ++++++++++++++++++++++++++++++++++++ src/lib/Codec/Pesto/Parse.lhs | 403 ++++++++++++++++++++++++++++++++++ src/lib/Codec/Pesto/Parse.lhs-boot | 22 ++ src/lib/Codec/Pesto/Serialize.lhs | 70 ++++++ 18 files changed, 1683 insertions(+), 1680 deletions(-) delete mode 100644 src/Codec/Pesto.lhs delete mode 100644 src/Codec/Pesto/Graph.lhs delete mode 100644 src/Codec/Pesto/Lint.lhs delete mode 100644 src/Codec/Pesto/Parse.lhs delete mode 100644 src/Codec/Pesto/Parse.lhs-boot delete mode 100644 src/Codec/Pesto/Serialize.lhs delete mode 100644 src/Doc.lhs delete mode 100644 src/Main.lhs delete mode 100644 src/Test.lhs create mode 100644 src/exe/Doc.lhs create mode 100644 src/exe/Main.lhs create mode 100644 src/exe/Test.lhs create mode 100644 src/lib/Codec/Pesto.lhs create mode 100644 src/lib/Codec/Pesto/Graph.lhs create mode 100644 src/lib/Codec/Pesto/Lint.lhs create mode 100644 src/lib/Codec/Pesto/Parse.lhs create mode 100644 src/lib/Codec/Pesto/Parse.lhs-boot create mode 100644 src/lib/Codec/Pesto/Serialize.lhs (limited to 'src') diff --git a/src/Codec/Pesto.lhs b/src/Codec/Pesto.lhs deleted file mode 100644 index 098f9cb..0000000 --- a/src/Codec/Pesto.lhs +++ /dev/null @@ -1,286 +0,0 @@ -========================= -Pesto specification draft -========================= - -Pesto is a text-based human-editable and machine-transformable cooking recipe -interchange format. - -.. warning:: - - This specification is work-in-progress and thus neither stable, consistent or - complete. - -.. class:: nodoc - -> module Codec.Pesto where - -About this document -------------------- - -This section contains various information about this document. The `second -section`__ motivates why inventing another file format is necessary, followed -by the goals_ of Pesto. After a short Pesto primer__ intended for the casual -user the language’s syntax__ and semantics__ are presented. The `linting -section`__ limits the language to useful cooking recipes. Examples for user -presentation of recipes and serialization follow. - -__ motivation_ -__ introduction-by-example_ -__ language-syntax_ -__ language-semantics_ -__ linting_ - -Being a literate program this document is specification and reference -implementation at the same time. The code is written in Haskell_ and uses the -parsec_ parser combinator library, as well as HUnit_ for unit tests. Even -without knowing Haskell’s syntax you should be able to understand this -specification. There’s a description above every code snippet explaining what -is going on. - -.. _Haskell: http://learnyouahaskell.com/ -.. _HUnit: http://hackage.haskell.org/package/HUnit -.. _parsec: http://hackage.haskell.org/package/parsec - -The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, -“SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be -interpreted as described in `RFC 2119`_. - -.. _RFC 2119: http://tools.ietf.org/html/rfc2119 - -:Version: 1-draft -:License: CC0_ -:Website: https://6xq.net/pesto/ -:Discussion: https://github.com/PromyLOPh/pesto -:Contributors: - - `Lars-Dominik Braun `_ - -.. _CC0: https://creativecommons.org/publicdomain/zero/1.0/ - -.. _motivation: - -Motivation ----------- - -The landscape of recipe interchange formats is quite fragmented. First of all -there’s HTML microdata. `Google rich snippets`_, which are equivalent to the -schema.org_ microdata vocabulary, are widely used by commercial recipe sites. -Although the main objective of microdata is to make content machine-readable -most sites will probably use it, because it is considered a search-engine -optimization (SEO). Additionally parsing HTML pulled from the web is a -nightmare and thus not a real option for sharing recipes. h-recipe_ provides a -second vocabulary that has not been adopted widely yet. - -.. _Google rich snippets: https://developers.google.com/structured-data/rich-snippets/recipes -.. _schema.org: http://schema.org/Recipe -.. _h-recipe: http://microformats.org/wiki/h-recipe - -.. _formats-by-software: - -Most cooking-related software comes with its own recipe file format. Some of -them, due to their age, can be imported by other programs. - -Meal-Master_ is one of these widely supported formats. A huge trove of recipe files -is `available in this format `_. There does -not seem to be any official documentation for the format, but inofficial -`ABNF grammar`_ and `format description `_ -exist. A Meal-Master recipe template might look like this: - -.. _MasterCook: http://mastercook.com/ -.. _MXP: http://www.oocities.org/heartland/woods/2073/Appendix.htm -.. _ABNF grammar: http://web.archive.org/web/20161002135718/http://www.wedesoft.de/anymeal-api/mealmaster.html - -.. code:: mealmaster - - ---------- Recipe via Meal-Master (tm) - - Title: - Categories: <Categories> - Yield: <N servings> - - <N> <unit> <ingredient> - … - - -------------------------------<Section name>----------------------------- - <More ingredients> - - <Instructions> - - ----- - -Rezkonv_ aims to improve the Mealmaster format by lifting some of its character -limits, adding new syntax and translating it to german. However the -specification is available on request only. - -A second format some programs can import is MasterCook_’s MXP_ file format, as -well as its XML-based successor MX2. And then there’s a whole bunch of -more-or-less proprietary formats: - -`Living Cookbook`_ - Uses a XML-based format called fdx version 1.1. There’s no specification to - be found, but a few examples__ are available and those are dated 2006. -`My CookBook`_ - Uses the file extension .mcb. A specification `is available - <http://mycookbook-android.com/site/my-cookbook-xml-schema/>`_. -KRecipes_ - Uses its own export format. However there is no documentation whatsoever. -Gourmet_ - The program’s export format suffers from the same problem. The only - document available is the `DTD - <https://github.com/thinkle/gourmet/blob/7715c6ef87ee8c106f0a021972cd70d61d83cadb/data/recipe.dtd>`_. -CookML_ - Last updated in 2006 (version 1.0.4) for the german-language shareware - program Kalorio has a custom and restrictive licence that requires - attribution and forbids derivate works. -Paprika_ - Cross-platform application, supports its own “emailed recipe format” and a - simple YAML-based format. - -__ http://livingcookbook.com/Resource/DownloadableRecipes -.. _Paprika: https://paprikaapp.com/help/android/#importrecipes - -.. _xml-formats: - -Between 2002 and 2005 a bunch of XML-based exchange formats were created. They -are not tied to a specific software, so none of them seems to be actively used -nowadays: - -RecipeML_ - Formerly known as DESSERT and released in 2002 (version 0.5). The - license requires attribution and – at the same time – forbids using the name - RecipeML for promotion without written permission. -eatdrinkfeelgood_ - Version 1.1 was released in 2002 as well, but the site is not online - anymore. The DTD is licensed under the `CC by-sa`_ license. -REML_ - Released in 2005 (version 0.5), aims to improve support for commercial uses - (restaurant menus and cookbooks). The XSD’s license permits free use and - redistribution, but the reference implementation has no licensing - information. -`RecipeBook XML`_ - Released 2005 as well and shared unter the terms of `CC by-sa`_ is not - available on the web any more. - -.. _CC by-sa: https://creativecommons.org/licenses/by-sa/2.5/ - -.. _obscure-formats: - -Finally, a few non-XML or obscure exchange formats have been created in the past: -YumML_ is an approach similar to those listed above, but based on YAML instead -of XML. The specification has been removed from the web and is available -through the Web Archive only. - -`Cordon Bleu`_ (1999) encodes recipes as programs for a cooking machine and -defines a Pascal-like language. Being so close to real programming languages -Cordon Bleu is barely useable by anyone except programmers. Additionally the -language is poorly-designed, since its syntax is inconsistent and the user is -limited to a set of predefined functions. - -Finally there is RxOL_, created in 1985. It constructs a graph from recipes -written down with a few operators and postfix notation. It does not separate -ingredients and cooking instructions like every other syntax introduced before. -Although Pesto is not a direct descendant of RxOL both share many ideas. - -microformats.org_ has a similar list of recipe interchange formats. - -.. _REML: http://reml.sourceforge.net/ -.. _eatdrinkfeelgood: https://web.archive.org/web/20070109085643/http://eatdrinkfeelgood.org/1.1/ -.. _RecipeML: http://www.formatdata.com/recipeml/index.html -.. _CookML: http://www.kalorio.de/index.php?Mod=Ac&Cap=CE&SCa=../cml/CookML_EN -.. _Meal-Master: http://web.archive.org/web/20151029032924/http://episoft.home.comcast.net:80/~episoft/ -.. _RecipeBook XML: http://web.archive.org/web/20141101132332/http://www.happy-monkey.net/recipebook/ -.. _YumML: http://web.archive.org/web/20140703234140/http://vikingco.de/yumml.html -.. _Rezkonv: http://www.rezkonv.de/software/rksuite/rkformat.html -.. _RxOL: http://www.dodomagnifico.com/641/Recipes/CompCook.html -.. _Gourmet: http://thinkle.github.io/gourmet/ -.. _KRecipes: http://krecipes.sourceforge.net/ -.. _Cordon Bleu: http://www.inf.unideb.hu/~bognar/ps_ek/cb_lang.ps -.. _microformats.org: http://microformats.org/wiki/recipe-formats -.. _Living Cookbook: http://livingcookbook.com/ -.. _My CookBook: http://mycookbook-android.com/ - -.. There is a copy at http://diyhpl.us/~bryan/papers2/CompCook.html as well - -.. More interesting stuff: -.. - http://blog.moertel.com/posts/2010-01-08-a-formal-language-for-recipes-brain-dump.html -.. - http://www.dangermouse.net/esoteric/chef.html - -Goals ------ - -First of all recipes are written *by* humans *for* humans. Thus a -human-readable recipe interchange format is not enough. The recipes need to be -human-editable without guidance like a GUI or assistant. That’s why, for -instance, XML is not suitable and the interchange formats listed `above -<xml-formats_>`_ have largely failed to gain traction. XML, even though simple -itself, is still too complicated for the ordinary user. Instead a format needs -to be as simple as possible, with as little markup as possible. A human editor -must be able to remember the entire syntax. This works best if the file -contents “make sense”. A good example for this is Markdown_. - -.. _Markdown: https://daringfireball.net/projects/markdown/syntax - -We also have to acknowledge that machines play an important role in our daily -life. They can help us, the users, accomplish our goals if they are able to -understand the recipes as well. Thus they too need to be able to read and write -recipes. Again, designing a machine-readable format is not enough. Recipes must -be machine-transformable. A computer program should be able to create a new -recipe from two existing ones, look up the ingredients and tell us how many -joules one piece of that cake will have. And so on. - -That being said, Pesto does not aim to carry additional information about -ingredients or recipes itself. Nutrition data for each ingredient should be -maintained in a separate database. Due to its minimal syntax Pesto is also not -suitable for extensive guides on cooking or the usual chitchat found in cooking -books. - -.. _introduction-by-example: - -Introduction by example ------------------------ - -.. code:: - - So let’s start by introducing Pesto by example. This text does not belong - to the recipe and is ignored by any software. The following line starts the - recipe: - - %pesto - - &pot - +1 l water - +salt - [boil] - - +100 g penne - &10 min - [cook] - - >1 serving pasta - (language: en) - -And that’s how you make pasta: Boil one liter of water in a pot with a little -bit of salt. Then add 100 g penne, cook them for ten minutes and you get one -serving pasta. That’s all. - -There’s more syntax available to express alternatives (either penne or -tagliatelle), ranges (1–2 l water or approximately 1 liter water) and metadata. -But now you can have a first peek at `my own recipe collection`_. - -.. _my own recipe collection: https://github.com/PromyLOPh/rezepte - -.. include:: Pesto/Parse.lhs -.. include:: Pesto/Graph.lhs -.. include:: Pesto/Lint.lhs -.. include:: Pesto/Serialize.lhs - -Using this project ------------------- - -This project uses cabal. It provides the Codec.Pesto library that implements -the Pesto language as described in the previous sections. It also comes with -three binaries. - -.. include:: ../Main.lhs -.. include:: ../Test.lhs -.. include:: ../Doc.lhs - diff --git a/src/Codec/Pesto/Graph.lhs b/src/Codec/Pesto/Graph.lhs deleted file mode 100644 index 511adca..0000000 --- a/src/Codec/Pesto/Graph.lhs +++ /dev/null @@ -1,276 +0,0 @@ -.. _language-semantics: - -Language semantics ------------------- - -.. class:: nodoc - -> module Codec.Pesto.Graph ( -> toGraph -> , walkRoot -> , outgoingEdges -> , outgoingNodes -> , incomingEdges -> , incomingNodes -> , firstNodeId -> , resolveReferences -> , test -> , extract -> , NodeId -> , Node -> , Nodes -> , Edge -> , Edges -> ) where -> import Data.Char (toLower) -> import Data.List (sort, nub) -> import Test.HUnit hiding (test, Node) -> -> import Codec.Pesto.Parse hiding (test) - -The parser’s output, a stream of instructions, may contain multiple recipes. A -recipe must start with the directive “pesto” and may end with “buonappetito”. -This function extracts all recipes from the stream and removes both directives. - -- easily embed recipes into other documents - -> startDirective = Directive "pesto" -> endDirective = Directive "buonappetito" - -> extract [] = [] -> extract (s:stream) | s == startDirective = between:extract next -> where -> isEnd x | x `elem` [startDirective, endDirective] = True -> isEnd _ = False -> (between, next) = break isEnd stream -> extract (_:xs) = extract xs - -Start and end directive are removed from the extracted instructions. The -directive “buonappetito” is optional at the end of a stream. - -> testExtract = [ -> extract [startDirective, endDirective] ~?= [[]] -> , extract [startDirective, Action "foobar", endDirective] ~?= [[Action "foobar"]] -> , extract [startDirective] ~?= [[]] -> , extract [startDirective, Directive "foobar"] ~?= [[Directive "foobar"]] - -Instructions surrounding the start and end directive are removed. - -> , extract [Unknown "Something", startDirective] ~?= [[]] -> , extract [Unknown "Something", Action "pour", startDirective] ~?= [[]] -> , extract [startDirective, endDirective, Annotation "something"] ~?= [[]] - -The stream may contain multiple recipes. The start directive also ends the -previous recipe and starts a new one. - -> , extract [startDirective, Action "pour", endDirective, Action "foobar", startDirective, Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]] -> , extract [startDirective, Action "heat", startDirective, Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]] -> , extract [startDirective, Annotation "foobar", startDirective, endDirective] ~?= [[Annotation "foobar"], []] -> ] - -Each recipe’s stream of instructions drives a stack-based machine that transforms -it into a directed graph. Think of the stack as your kitchen’s workspace that -is used to prepare the food’s components. You can add new ingredients, perform -actions on them, put them aside and add them again. - -This function processes a list of nodes, that is instructions uniquely identified -by an integer and returns the edges of the directed graph as a list of tuples. - -> toGraph nodes = third $ foldl f (Nothing, [[]], []) nodes -> where - -Ingredients are simply added to the current workspace. They should for example -appear on the shopping list. - -> f ctx (i, Ingredient _) = addToStack ctx i - -The same happens for for tools. However they are not part of the final product, -but used in the process of making it. For instance they do not appear on the -shopping list. `Time is a tool <time-is-a-tool_>`_. - -> f ctx (i, Tool _) = addToStack ctx i - -Actions take all ingredients and tools currently on the workspace, perform some -action with them and put the product back onto the workspace. - -> f (_, stack:sx, edges) (i, Action _) = (Just i, [i]:stack:sx, edgesTo i stack ++ edges) -> f (_, [], _) (_, Action _) = undefined -- never reached - -Results add a label to the current workspace’s contents and move them out of -the way. It should be a meaningful name, not just A and B obviously. -Consecutive Results add different labels to the same workspace. That’s useful -when an action yields multiple results at once that are processed in different -ways. - -> f ctx (i, Result _) = consumeStack ctx i - -Alternatives too add a label to the current workspace’s content, but they pick -one of things on the workspace and throw everything else away. This allows -adding optional or equivalent ingredients to a recipe (i.e. margarine or butter). - -> f ctx (i, Alternative _) = consumeStack ctx i - -References are similar to ingredients. They are used to add items from a -workspace labeled with Result or Alternative. More on that `in the next section -<references_>`_. - -> f ctx (i, Reference _) = addToStack ctx i - -Annotations add a description to any of the previous instructions. They can be -used to provide more information about ingredients (so “hot water” becomes -“+water (hot)”, tools (“&oven (200 °C)”) or actions (“[cook] (XXX)”). - -> f ctx@(Nothing, _, _) (_, Annotation _) = ctx -> f (Just prev, s, edges) (i, Annotation _) = (Just prev, s, (i, prev):edges) - -Unused directives or unknown instructions are danging nodes with no connection to -other nodes. - -> f ctx (_, Directive _) = ctx -> f ctx (_, Unknown _) = ctx - -These are helper functions: - -> addToStack (_, stack:sx, edges) i = (Just i, (i:stack):sx, edges) -> addToStack (_, [], _) _ = undefined -- never reached -> consumeStack (_, s, edges) i = -> let -> stack = dropWhile null s -> (top:sx) = if null stack then [[]] else stack -> in (Just i, []:top:sx, edgesTo i top ++ edges) -> edgesTo i = map (\x -> (x, i)) - -Here are a few example of how this stack-machine works. Each edge is a tuple of -two integer numbers. These are the nodes it connects, starting with zero. -Ingredient, Tool and Reference itself do not create any edges: - -> testGraph = [ -> cmpGraph "+ketchup &spoon *foobar" [] - -But Action, Alternative and Result do in combination with them: - -> , cmpGraph "+foobar [barbaz]" [(0, 1)] -> , cmpGraph "+foobar |barbaz" [(0, 1)] -> , cmpGraph "+foobar >barbaz" [(0, 1)] -> , cmpGraph "+foobar +B >barbaz" [(0, 2), (1, 2)] -> , cmpGraph "+foobar >barbaz +foobar >barbaz" [(0, 1), (2, 3)] -> , cmpGraph "+foobar [barbaz] +foobar >barbaz" [(0, 1), (1, 3), (2, 3)] -> , cmpGraph "&foobar [barbaz] [C] >D" [(0, 1), (1, 2), (2, 3)] - -If the stack is empty, i.e. it was cleared by a Result or Alternative -instruction, consecutive results or alternatives operate on the *previous*, -non-empty stack. - -> , cmpGraph "+foobar >barbaz >C" [(0, 1), (0, 2)] -> , cmpGraph "+foobar |barbaz |C" [(0, 1), (0, 2)] -> , cmpGraph "+foobar >barbaz |C" [(0, 1), (0, 2)] - -Unless that stack too is empty. Then they do nothing: - -> , cmpGraph ">foobar >foobar" [] -> , cmpGraph "|foobar |foobar" [] -> , cmpGraph "(foobar) (foobar)" [] -> , cmpGraph "[foobar]" [] - -The Annotation instruction always creates an edge to the most-recently processed -node that was not an annotation. Thus two consecutive annotations create edges -to the same node. - -> , cmpGraph "+foobar (barbaz)" [(1, 0)] -> , cmpGraph "+foobar (barbaz) (C)" [(1, 0), (2, 0)] -> , cmpGraph "+foobar (barbaz) >barbaz" [(1, 0), (0, 2)] -> , cmpGraph "+foobar >barbaz (C)" [(0, 1), (2, 1)] -> , cmpGraph "+foobar |barbaz (C)" [(0, 1), (2, 1)] -> , cmpGraph "*foobar (C)" [(1, 0)] - -Unknown directives or instructions are never connected to other nodes. - -> , cmpGraph "%invalid" [] -> , cmpGraph "invalid" [] -> ] - -References -++++++++++ - -Results and alternatives can be referenced with the Reference instruction. -Resolving these references does not happen while buiding the graph, but -afterwards. This allows referencing an a result or alternative before its -definition with regard to the their processing order. - -Resolving references is fairly simple: For every reference its object name a -case-insensitive looked is performed in a table containing all results and -alternatives. If it succeeds an edge from every result and alternative returned -to the reference in question is created. - -> resolveReferences nodes = foldl f [] nodes -> where -> f edges (i, ref@(Reference _)) = map (\x -> (x, i)) (findTarget nodes ref) ++ edges -> f edges _ = edges - -> findTarget nodes (Reference (Quantity _ _ a)) = map fst $ filter (isTarget a) nodes -> where -> lc = map toLower -> isTarget dest (_, Result (Quantity _ _ x)) = lc x == lc dest -> isTarget dest (_, Alternative (Quantity _ _ x)) = lc x == lc dest -> isTarget _ _ = False -> findTarget _ _ = [] - -References works before or after the result instruction. - -> testRef = [ -> cmpGraphRef ">foobar *foobar" [(0, 1)] -> , cmpGraphRef ">foobar |foobar *foobar" [(0, 2), (1, 2)] -> , cmpGraphRef "+A >foobar +B >barbaz *foobar *barbaz" [(1, 4), (3, 5)] -> , cmpGraphRef "*foobar >foobar" [(1, 0)] - -Nonexistent references do not create an edge. - -> , cmpGraphRef ">foobar *barbaz" [] - -References can use amounts and units. - -> , cmpGraphRef ">foobar *1 _ foobar *2 _ foobar" [(0, 1), (0, 2)] - -There are a few cases that do not make sense here (like loops or multiple -results with the same name). They are permitted at this stage, but rejected -`later <reject-loops_>`_. - -> , cmpGraphRef "*foobar |foobar >foobar" [(1, 0), (2, 0)] -> , cmpGraphRef "|foobar *foobar >foobar *foobar" [(0, 1), (0, 3), (2, 1), (2, 3)] -> ] - -Appendix -++++++++ - -> runGraphWith f doc expect = sort edges ~?= sort expect -> where -> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) -> nodes = zip [firstNodeId..] op -> edges = f nodes -> cmpGraph = runGraphWith toGraph -> cmpGraphRef = runGraphWith resolveReferences - -> type NodeId = Int -> type Node a = (NodeId, a) -> type Nodes a = [Node a] -> type Edge = (NodeId, NodeId) -> type Edges = [Edge] -> firstNodeId = 0 :: NodeId - -Find graph’s root node(s), that is a node without outgoing edges: - -> walkRoot nodes edges = let out = nub $ map fst edges -> in filter (\(x, _) -> notElem x out) nodes - -Get all nodes with edges pointing towards nodeid - -> incomingEdges edges (nodeid, _) = filter ((==) nodeid . snd) edges -> incomingNodes nodes edges n = map ((!!) nodes . fst) $ incomingEdges edges n - -> outgoingEdges edges (nodeid, _) = filter ((==) nodeid . fst) edges -> outgoingNodes nodes edges n = map ((!!) nodes . snd) $ outgoingEdges edges n - -> test = ["graph" ~: testGraph, "ref" ~: testRef, "extract" ~: testExtract] - -> third (_, _, x) = x - diff --git a/src/Codec/Pesto/Lint.lhs b/src/Codec/Pesto/Lint.lhs deleted file mode 100644 index 81cb5d6..0000000 --- a/src/Codec/Pesto/Lint.lhs +++ /dev/null @@ -1,433 +0,0 @@ -Linting -------- - -.. class:: nodoc - -> module Codec.Pesto.Lint (lint -> , test -> , parseMetadata -> , extractMetadata -> , Metadata(..) -> , LintResult(..)) where -> import Test.HUnit hiding (test, Node) -> import Data.List (sort, isPrefixOf) -> import Text.Parsec hiding (parse) -> import Data.Char (isSpace) -> import Data.Ratio ((%)) -> import Data.Maybe (fromMaybe) -> -> import Codec.Pesto.Graph hiding (test) -> import Codec.Pesto.Parse hiding (test) - -Not every graph generated in the previous section is a useful recipe. Some -instruction sequences just do not make sense. The tests in this section can -detect those. Failing any of them does not render a stream of instructions or -graph invalid. They just does not describe a *useful* recipe. Thus -implementations must not generate or export such documents. However they should -accept input that fails any of the tests and warn the user about the failure. - -Additionally this section provides guidance on how to use the instructions -provided by the Pesto language properly. - -Graph properties -++++++++++++++++ - -- weakly connected, no dangling nodes/subgraphs -- acyclic - -The graph must have exactly one root node (i.e. a node with incoming edges -only). This also requires all results and alternatives to be referenced -somewhere. Directives are either consumed when parsing, generating a graph and -linting. Otherwise they are dangling as well. Unknown instructions are always -dangling. - -> rootIsResult nodes edges = case walkRoot nodes edges of -> [] -> [LintResult NoRootNode []] -> (_, Result _):[] -> [] -> (i, _):[] -> [LintResult NonResultRootNode [i]] -> xs -> [LintResult MoreThanOneRootNode (map fst xs)] - -Empty recipes or circular references have no root node: - -> testConnectivity = [ -> cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []] -> , cmpLint "*foobar >foobar" -> [LintResult NoRootNode [], LintResult NoMetadata []] -> , cmpLint "+foobar" -> [LintResult NonResultRootNode [0], LintResult NoMetadata []] - -Directives and unknown instructions are dangling and thus root nodes. - -> , cmpLint "invalid %invalid +foo >bar" -> [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []] -> ] - -Metadata -++++++++ - -.. _resultsused: - -.. class:: todo - -root node can be alternative too? - -The graph’s root node must be a result. It contains yield (amount and unit) and -title (object) of the recipe. - -> extractMetadata nodes edges = case walkRoot nodes edges of -> [n@(i, Result q@(Quantity _ _ title))] -> -> Just $ (i, ("title", MetaStr title)) -> :(i, ("yield", MetaQty q)) -> :foldl f [] (incomingNodes nodes edges n) -> _ -> Nothing -> where - -Additional key-value metadata for the whole recipe can be added as annotations -to the root node. If multiple annotations with the same key exist the key maps -to a list of those values. Annotations that are unparseable key-value pairs are -added as recipe description instead. - -> f xs (i, Annotation s) = case parseMetadata s of -> Left _ -> (i, ("description", MetaStr s)):xs -> Right (k, v) -> (i, (k, MetaStr v)):xs -> f xs _ = xs - -Key and value are separated by a colon. Keys must not contain whitespace or the -colon char. A value may be empty. - -> parseMetadata = runParser metadata () "" -> metadata = let keychars = satisfy (\x -> not (isSpace x) && x /= ':') in (,) -> <$> many1 keychars -> <*> (char ':' *> spaces *> many anyChar) - -> lintMetadata nodes edges = case extractMetadata nodes edges of -> Just result -> foldl checkKey [] result -> Nothing -> [LintResult NoMetadata []] -> where -> checkKey xs (_, (k, _)) | isKeyKnown k = xs -> checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs - -Valid metadata keys are listed below. Additionally applications may add keys by -prefixing them with “x-myapp-”, thus an application called “basil” adding -“some-key” would use the full key “x-basil-some-key”. - -> isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k - -The following metadata keys are permitted: - -> knownKeys = [ - -Both, title and description, are implicit. - -> "title" -> , "description" - -The recipe’s language, as 2 character code (`ISO 639-1`__). - -__ http://www.loc.gov/standards/iso639-2/php/English_list.php - -> , "language" - -Yield and time both must be a quantity. - -> , "yield" -> , "time" - -An image can be a relative file reference or URI - -> , "image" -> , "author" -> ] - -.. class:: todo - -Check the metadata’s value format. I.e. yield/time must be quantity - -For instance a german language recipe for one person would look like this: - -> testMetadata = [ -> cmpLintMeta "+foo >1 ml foobar (language: de) (x-app-key: value)" -> [] -> (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "ml" "foobar"))) -> , (2, ("language", MetaStr "de")) -> , (3, ("x-app-key", MetaStr "value"))]) - -Unparseable annotations or unknown keys are linting errors: - -> , cmpLintMeta "+foo >foobar (unknown-key: value)" -> [LintResult UnknownMetadataKey [2]] -> (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (strQuantity "foobar"))) -> , (2, ("unknown-key", MetaStr "value"))]) - -Root node annotations not containing a parseable key-value pair are assigned -the key “description”. - -> , cmpLintMeta "+foo >foobar ( some description ) (another one: with colon) (another: valid key-value)" -> [LintResult UnknownMetadataKey [4]] -> (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (strQuantity "foobar"))) -> , (2, ("description", MetaStr " some description ")) -> , (3, ("description", MetaStr "another one: with colon")) -> , (4, ("another", MetaStr "valid key-value"))]) -> ] - -.. _time-is-a-tool: - -Time is a tool -++++++++++++++ - -By definition time is a tool and not an ingredient. - -> timeUnits = ["s", "min", "h", "d"] -> -> isTime (Quantity _ unit "") | unit `elem` timeUnits = True -> isTime _ = False - -> timeIsATool nodes _ = foldl f [] nodes -> where -> f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs -> f xs _ = xs - -> testLintQuantity = [ -> cmpLint "+10 min >foo" [LintResult TimeIsATool [0]] -> , cmpLint "+10-12 h >foo" [LintResult TimeIsATool [0]] -> , cmpLint "+90/120 s >foo" [LintResult TimeIsATool [0]] -> , cmpLint "+~12 s >foo" [LintResult TimeIsATool [0]] -> , cmpLint "&10 min [bar] >foo" [] -> ] - -Only actions can be annotated with a time. It can be used to indicate how long -a certain action is *expected* to take (i.e. peeling potatoes takes two -minutes) or how long the action is supposed to be executed (i.e. cook five -minutes). More time annotations improve the software’s scheduling capabilities. - -> timeAnnotatesAction nodes edges = foldl f [] nodes -> where -> f xs n@(nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges n) = LintResult TimeAnnotatesAction [nodeid]:xs -> f xs _ = xs -> toNodelist = (!!) nodes . snd -> allActions = all (isAction . snd . toNodelist) - -For example “cook 10 minutes” can be expressed with - -> testTimeAnnotatesAction = [ -> cmpLint "&10 min [cook] >soup" [] -> , cmpLint "&10 min [cook] &5-6 h [cook again] >soup" [] -> , cmpLint "&10 min >soup" [LintResult TimeAnnotatesAction [0]] -> , cmpLint "&10 min &15 min |time *time [cook] >soup" -> [LintResult TimeAnnotatesAction [0], LintResult TimeAnnotatesAction [1]] -> ] - -.. _well-known-units: - -Well-known units -++++++++++++++++ - -Units can be an arbitrary strings, but implementations should recognize the -common metric units g (gram), l (litre) and m (metre). One of these prefixes -may be used with each of them: m (milli-), c (centi-), d (dezi-) and k (kilo-). -Additionally time in s (second), min (minute), h (hour), d (day) should be -accepted. - -> wellKnownUnit nodes _ = foldl f [] nodes -> where -> extractQty (Ingredient q) = Just q -> extractQty (Tool q) = Just q -> extractQty (Result q) = Just q -> extractQty (Alternative q) = Just q -> extractQty (Reference q) = Just q -> extractQty _ = Nothing -> f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) = -> LintResult UnitNotWellKnown [nodeid]:xs -> f xs _ = xs -> known (Quantity _ unit _) = unit `elem` knownUnits -> knownUnits = [ -> "" -> , "mg", "g", "kg" -> , "ml", "cl", "dl", "l" -> , "cm", "dm", "m" -> ] ++ timeUnits - -Usage of imperial units (inch, pound, …) as well as non-XXX units like -“teaspoon”, “cup”, … is discouraged because the former is used by just three -countries in the world right now and the latter is language- and -country-dependent. The implementation may provide the user with a conversion -utility. - -.. class:: todo - -- example: 1 oz ~= 28.349523125 g, can only be approximated by rational number, for instance 29767/1050 g -- 15 oz would are :math:`\frac{29767}{70} \mathrm{g} = 425+\frac{17}{70} \mathrm{g}`, since nobody sells 17/70 g the implementation would round down to ~425 g (although <1g is not really enough to justify adding approx) - -> testLintWellKnownUnit = [ -> cmpLint "+1 in foobar >bar" [LintResult UnitNotWellKnown [0]] -> , cmpLint "+2 teaspoons foobar >bar" [LintResult UnitNotWellKnown [0]] -> , cmpLint "+3 cups foobar >bar" [LintResult UnitNotWellKnown [0]] -> , cmpLint "+1 ml foobar >bar" [] -> , cmpLint "+1 cl foobar >bar" [] -> , cmpLint "+1 dl foobar >bar" [] -> , cmpLint "+1 l foobar >bar" [] -> , cmpLint "+2 _ something >bar" [] -> , cmpLint "&1 min [foo] >bar" [] - -The unit is case-sensitive, thus - -.. class:: todo - -Should we allow case-insensitive units? References are case-insensitive as -well… - -> , cmpLint "+1 Mg foobar >bar" [LintResult UnitNotWellKnown [0]] -> , cmpLint "+1 kG foobar >bar" [LintResult UnitNotWellKnown [0]] -> , cmpLint "&1 MIN [foo] >bar" [LintResult UnitNotWellKnown [0]] -> ] - -References -++++++++++ - -All references must be resolved. An `earlier check <resultsused_>`_ makes sure -all results and alternatives are referenced at some point. - -> referencesResolved nodes edges = foldl f [] nodes -> where -> f xs n@(nodeid, Reference _) | null (incomingEdges edges n) = -> LintResult UndefinedReference [nodeid]:xs -> f xs _ = xs - -> testLintRefs = [ -> cmpLint "*foobar >foobar >barbaz" [] -> , cmpLint "*foobar >foo" [LintResult UndefinedReference [0]] -> ] - -A result must have at least one incoming edge. This is a special case and can -only occur at the beginning of a recipe. - -> resultNonempty nodes edges = foldl f [] nodes -> where -> f xs n@(nodeid, Result _) | null (incomingEdges edges n) = -> LintResult TooFewChildren [nodeid]:xs -> f xs _ = xs - -> testLintResultNonempty = [ -> cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]] -> , cmpLint "+A >bar *bar >baz" [] -> , cmpLint "+A >bar >foo *bar *foo >baz" [] -> ] - -Alternatives must have at least two incoming edges since a smaller amount would -make the alternative pointless. - -> twoAlternatives nodes edges = foldl f [] nodes -> where -> f xs n@(nodeid, Alternative _) | length (incomingEdges edges n) < 2 = -> LintResult TooFewChildren [nodeid]:xs -> f xs _ = xs - -> testLintTwoAlternatives = [ -> cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]] -> , cmpLint "+A +B |foo *foo >bar" [] - -.. class:: todo - -should we allow this? it does not make sense imo - -> , cmpLint "+A &B |foo *foo >bar" [] -> ] - -.. _reject-loops: - -.. class:: todo - -- reject loops -- reject multiple results/alternatives with the same name - -Ranges -++++++ - -The first amount of a range ratio must be strictly smaller than the second. -This limitation is not enforced for ranges containing strings. - -> rangeFromLargerThanTo nodes _ = foldl f [] nodes -> where -> f xs (nodeid, Ingredient q) | not $ rangeOk q = -> LintResult RangeFromLargerThanTo [nodeid]:xs -> f xs (nodeid, Reference q) | not $ rangeOk q = -> LintResult RangeFromLargerThanTo [nodeid]:xs -> f xs _ = xs -> rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b -> rangeOk _ = True - -> testRangeFromLargerThanTo = [ -> cmpLint "+2-3 l water >bar" [] -> , cmpLint "+3-2 l water >bar" [LintResult RangeFromLargerThanTo [0]] -> , cmpLint "+2/3-1/3 l water >bar" [LintResult RangeFromLargerThanTo [0]] -> , cmpLint "+some-many _ eggs >bar" [] -> , cmpLint "+1-\"a few\" _ eggs >bar" [] -> ] - -Appendix -++++++++ - -> data LintResult = LintResult LintStatus [NodeId] deriving (Show, Eq, Ord) -> data LintStatus = -> NoRootNode -> | NonResultRootNode -> | MoreThanOneRootNode -> | UndefinedReference -> | TooFewChildren -> | TimeIsATool -> | TimeAnnotatesAction -> | UnitNotWellKnown -> | InvalidNode -> | RangeFromLargerThanTo -> | NoMetadata -> | UnknownMetadataKey -> deriving (Show, Eq, Ord) - -Every lint test checks a single aspect of the graph. - -> lint nodes edges = concatMap (\f -> f nodes edges) lintTests - -> lintTests = [ -> rootIsResult -> , referencesResolved -> , resultNonempty -> , twoAlternatives -> , timeIsATool -> , timeAnnotatesAction -> , wellKnownUnit -> , lintMetadata -> , rangeFromLargerThanTo -> ] - -> withGraph doc f = f nodes edges -> where -> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) -> nodes = zip [firstNodeId..] op -> edges = toGraph nodes ++ resolveReferences nodes - -> cmpLint doc expect = withGraph doc (\nodes edges -> doc ~: sort (lint nodes edges) ~?= sort expect) - - -> data Metadata = MetaQty Quantity | MetaStr String deriving (Show, Eq) - -> cmpLintMeta doc expectLint expectMeta = withGraph doc (\nodes edges -> doc ~: [ -> sort (lint nodes edges) ~?= sort expectLint -> , extractMetadata nodes edges ~?= expectMeta -> ]) -> strQuantity = Quantity (Exact (AmountStr "")) "" - -> test = [ -> testConnectivity -> , testMetadata -> , testLintRefs -> , testLintQuantity -> , testLintWellKnownUnit -> , testTimeAnnotatesAction -> , testLintTwoAlternatives -> , testLintResultNonempty -> , testRangeFromLargerThanTo -> ] - diff --git a/src/Codec/Pesto/Parse.lhs b/src/Codec/Pesto/Parse.lhs deleted file mode 100644 index 518b866..0000000 --- a/src/Codec/Pesto/Parse.lhs +++ /dev/null @@ -1,403 +0,0 @@ -.. _language-syntax: - -Language syntax ---------------- - -.. class:: nodoc - -> module Codec.Pesto.Parse ( -> parse -> , test -> , Instruction(..) -> , Quantity(..) -> , Unit -> , Object -> , Approximately(..) -> , Amount(..) -> , isResult -> , isReference -> , isAlternative -> , isAnnotation -> , isAction -> , isDirective -> , isUnknown -> , spaces1 -> , notspace -> ) where -> import Data.Char (isSpace) -> import Data.Ratio ((%)) -> import Text.Parsec hiding (parse) -> import Text.ParserCombinators.Parsec.Pos (newPos) -> import Text.ParserCombinators.Parsec.Error (newErrorUnknown) -> import Test.HUnit hiding (test) -> -> import Codec.Pesto.Serialize (serialize) - -Pesto parses UTF-8_ encoded input data consisting of space-delimited -instructions. Every character within the Unicode whitespace class is -considered a space. - -.. _UTF-8: https://tools.ietf.org/html/rfc3629 -.. _spaces1: - -> stream = ((,) <$> getPosition <*> instruction) `sepEndBy` spaces1 -> <?> "stream" -> spaces1 = many1 space - -The following instructions are supported: - -> data Instruction = -> Annotation String -> | Ingredient Quantity -> | Tool Quantity -> | Action String -> | Reference Quantity -> | Result Quantity -> | Alternative Quantity -> | Directive String -> | Unknown String -> deriving (Show, Eq) -> -> instruction = -> try annotation -> <|> try ingredient -> <|> try tool -> <|> try action -> <|> try result -> <|> try alternative -> <|> try reference -> <|> try directive -> <|> try unknown -> <?> "instruction" - -The pesto grammar has two instruction types: The first one begins with a -start symbol (``start``) and consumes any character up to and including a -terminating symbol (``end``), which can be escaped with a backslash (``\``). - -> betweenEscaped :: Char -> Char -> Parsec String () String -> betweenEscaped start end = -> char start -> *> many (try (char '\\' *> char end) <|> satisfy (/= end)) -> <* char end - -Annotations and actions both are of this kind: - -> annotation = Annotation <$> betweenEscaped '(' ')' -> action = Action <$> betweenEscaped '[' ']' - -Here are examples for both: - -> testOpterm = [cmpInstruction "(skinless\nboneless)" (Right (Annotation "skinless\nboneless")) -> , cmpInstruction "[stir together]" (Right (Action "stir together")) -> , cmpInstruction "[stir\\]together]" (Right (Action "stir]together"))] - -The second one starts with one identifying character, ignores the following -whitespace characters and then consumes an object or a quantity. - -> oparg :: Char -> Parsec String () Instruction -> Parsec String () Instruction -> oparg ident cont = char ident *> spaces *> cont -> ingredient = oparg '+' (Ingredient <$> quantity) -> tool = oparg '&' (Tool <$> quantity) -> result = oparg '>' (Result <$> quantity) -> alternative = oparg '|' (Alternative <$> quantity) -> reference = oparg '*' (Reference <$> quantity) - -Additionally there are two special instructions. Directives are similar to the -previous instructions, but consume a qstr. - -> directive = oparg '%' (Directive <$> qstr) - -Unknown instructions are the fallthrough-case and accept anything. They must -not be discarded at this point. The point of accepting anything is to fail as -late as possible while processing input. This gives the parser a chance to -print helpful mesages that provide additional aid to the user who can then fix -the problem. - -> unknown = Unknown <$> many1 notspace - -Below are examples for these instructions: - -> testOparg = [ -> cmpInstruction "+100 g flour" -> (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour"))) -> , cmpInstruction "&oven" -> (Right (Tool (strQuantity "oven"))) -> , cmpInstruction ">dough" (Right (Result (strQuantity "dough"))) -> , cmpInstruction "|trimmings" (Right (Alternative (strQuantity "trimmings"))) -> , cmpInstruction "*fish" -> (Right (Reference (strQuantity "fish"))) -> , cmpInstruction3 "% invalid" (Right (Directive "invalid")) "%invalid" -> , cmpInstruction3 "* \t\n 1 _ cheese" -> (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) -> "*1 _ cheese" -> ] - -Qstr -++++ - -Before introducing quantities we need to have a look at qstr, which is used by -them. A qstr, short for quoted string, can be – you guessed it already – a -string enclosed in double quotes, a single word or the underscore character -that represents the empty string. - -> qstr = try (betweenEscaped '"' '"') -> <|> word -> <|> char '_' *> return "" - -A word always starts with a letter, followed by any number of non-space -characters. - -> word = (:) <$> letter <*> many notspace -> notspace = satisfy (not . isSpace) - -The empty string can be represented by two double quotes or the underscore, but -not the empty string itself. - -> testQstr = [ -> cmpQstr3 "\"\"" (Right "") "_" -> , cmpQstr "_" (Right "") -> , cmpQstr "" parseError - -Any Unicode character with a General_Category major class L (i.e. a letter, see -`Unicode standard section 4.5 -<http://www.unicode.org/versions/Unicode7.0.0/ch04.pdf>`_ for example) is -accected as first character of a word. That includes german umlauts as well as -greek or arabic script. Numbers, separators, punctuation and others are not -permitted. - -> , cmpQstr "water" (Right "water") -> , cmpQstr "Äpfel" (Right "Äpfel") -> , cmpQstr "τυρί" (Right "τυρί") -> , cmpQstr "جبن" (Right "جبن") -> , cmpQstr "1sugar" parseError -> , cmpQstr "+milk" parseError -> , cmpQstr "∀onion" parseError - -The remaining letters of a word can be any character, including symbols, -numbers, … - -> , cmpQstr "rump-roast" (Right "rump-roast") -> , cmpQstr "v1negar" (Right "v1negar") -> , cmpQstr "mush\"rooms" (Right "mush\"rooms") - -…but not spaces. - -> , cmpQstr " tomatoes" parseError -> , cmpQstr "tomatoes " parseError -> , cmpQstr "lemon juice" parseError -> , cmpQstr "sour\tcream" parseError -> , cmpQstr "white\nwine" parseError - -If a string contains spaces or starts with a special character it must be -enclosed in double quotes. - -> , cmpQstr3 "\"salt\"" (Right "salt") "salt" -> , cmpQstr "\"+milk\"" (Right "+milk") -> , cmpQstr "\"soy sauce\"" (Right "soy sauce") -> , cmpQstr "\"1sugar\"" (Right "1sugar") -> , cmpQstr "\"chicken\tbreast\nmeat\"" (Right "chicken\tbreast\nmeat") - -Double quotes within a string can be quoted by prepending a backslash. However -the usual escape codes like \\n, \\t, … will *not* be expanded. - -> , cmpQstr "\"vine\"gar\"" parseError -> , cmpQstr3 "\"vine\\\"gar\"" (Right "vine\"gar") "vine\"gar" -> , cmpQstr "\"oli\\ve oil\"" (Right "oli\\ve oil") -> , cmpQstr "\"oli\\\\\"ve oil\"" (Right "oli\\\"ve oil") -> , cmpQstr3 "\"sal\\tmon\"" (Right "sal\\tmon") "sal\\tmon" -> ] - -Quantity -++++++++ - -The instructions Ingredient, Tool and Reference accept a *quantity*, that is a -triple of Approximately, Unit and Object as parameter. - -> data Quantity = Quantity Approximately Unit Object deriving (Show, Eq) - -The syntactic construct is overloaded and accepts one to three arguments. If -just one is given it is assumed to be the Object and Approximately and Unit are -empty. Two arguments set Approximately and Unit, which is convenient when the -unit implies the object (minutes usually refer to the object time, for -example). - -> quantity = try quantityA <|> quantityB - -> quantityA = Quantity -> <$> approximately -> <* spaces1 -> <*> unit -> <*> (try (spaces1 *> object) <|> return "") - -> quantityB = Quantity -> <$> return (Exact (AmountStr "")) -> <*> return "" -> <*> object - -> testQuantityOverloaded = [ -> cmpQuantity "oven" (exactQuantity (AmountStr "") "" "oven") -> , cmpQuantity "10 min" (exactQuantity (AmountRatio (10%1)) "min" "") -> , cmpQuantity "100 g flour" (exactQuantity (AmountRatio (100%1)) "g" "flour") - -The first two are equivalent to - -> , cmpQuantity3 "_ _ oven" (exactQuantity (AmountStr "") "" "oven") "oven" -> , cmpQuantity3 "10 min _" (exactQuantity (AmountRatio (10%1)) "min" "") "10 min" - -Missing units must not be ommited. The version with underscore should be prefered. - -> , cmpQuantity3 "1 \"\" meal" (exactQuantity (AmountRatio (1%1)) "" "meal") "1 _ meal" -> , cmpQuantity "1 _ meal" (exactQuantity (AmountRatio (1%1)) "" "meal") -> ] - -Units and objects are just strings. However units should be limited to -`well-known metric units <well-known-units_>`_ and `some guidelines -<objects-and-annotations_>`_ apply to Objects as well. - -> type Unit = String -> unit = qstr -> -> type Object = String -> object = qstr - -Approximately is a wrapper for ranges, that is two amounts separated by a dash, -approximate amounts, prepended with a tilde and exact amounts without modifier. - -> data Approximately = -> Range Amount Amount -> | Approx Amount -> | Exact Amount -> deriving (Show, Eq) -> -> approximately = try range <|> try approx <|> exact -> range = Range <$> amount <*> (char '-' *> amount) -> approx = Approx <$> (char '~' *> amount) -> exact = Exact <$> amount - -> testQuantityApprox = [ -> cmpQuantity "1-2 _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountRatio (2%1))) "" "bananas")) -> , cmpQuantity "1 - 2 _ bananas" parseError -> , cmpQuantity "1- 2 _ bananas" parseError -> , cmpQuantity "1 -2 _ bananas" parseError -> , cmpQuantity "~2 _ bananas" (Right (Quantity (Approx (AmountRatio (2%1))) "" "bananas")) -> , cmpQuantity "~ 2 _ bananas" parseError - -> ] - -Amounts are limited to rational numbers and strings. There are no real numbers -by design and implementations should avoid representing rational numbers as -IEEE float. They are not required and introduce ugly corner cases when -rounding while converting units for example. - -> data Amount = -> AmountRatio Rational -> | AmountStr String -> deriving (Show, Eq) -> -> amount = try ratio <|> AmountStr <$> qstr - -> testQuantityAmount = [ -> cmpQuantity "some _ pepper" (exactQuantity (AmountStr "some") "" "pepper") -> , cmpQuantity3 "\"some\"-\"a few\" _ bananas" (Right (Quantity (Range (AmountStr "some") (AmountStr "a few")) "" "bananas")) "some-\"a few\" _ bananas" -> , cmpQuantity "~\"the stars in your eyes\" _ bananas" (Right (Quantity (Approx (AmountStr "the stars in your eyes")) "" "bananas")) -> ] - -Rational numbers can be an integral, numerator and denominator, each separated -by a forward slash, just the numerator and denominator, again separated by a -forward slash or just a numerator with the default denominator 1 (i.e. ordinary -integral number). - -> ratio = let toRatio i num denom = AmountRatio ((i*denom+num)%denom) in -> try (toRatio <$> int <*> (char '/' *> int) <*> (char '/' *> int)) -> <|> try (toRatio <$> return 0 <*> int <*> (char '/' *> int)) -> <|> try (toRatio <$> return 0 <*> int <*> return 1) - -These are all equal. - -> testQuantityRatio = [ -> cmpQuantity "3 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") -> , cmpQuantity3 "3/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" -> , cmpQuantity3 "3/0/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" - -XXtwo is num and denom - -> , cmpQuantity "3/5 _ bananas" (exactQuantity (AmountRatio (3%5)) "" "bananas") - -three is int, num and denom - -> , cmpQuantity "3/5/7 _ bananas" (exactQuantity (AmountRatio ((3*7+5)%7)) "" "bananas") - -> , cmpQuantity3 "10/3 _ bananas" (exactQuantity (AmountRatio (10%3)) "" "bananas") "3/1/3 _ bananas" - -Can be used with ranges and approximate too. and mixed with strings - -> , cmpQuantity "1-\"a few\" _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountStr "a few")) "" "bananas")) -> , cmpQuantity "1/1/2-2 _ bananas" (Right (Quantity (Range (AmountRatio (3%2)) (AmountRatio (4%2))) "" "bananas")) -> , cmpQuantity "~1/1/2 _ bananas" (Right (Quantity (Approx (AmountRatio (3%2))) "" "bananas")) - -> ] - -Appendix -++++++++ - -> int = read <$> many1 digit -> parse = runParser stream () "" - -Test helpers: - -> isLeft (Left _) = True -> isLeft _ = False - -A generic parser error: - -> parseError = Left (newErrorUnknown (newPos "" 0 0)) - -Compare output of parser ``f`` for string ``str`` with ``expected``. The -expected result can be a parser error, which matches any actual parse error -(first case). - -> cmpParser f str (Left _) = TestCase $ assertBool str $ isLeft $ runParser (f <* eof) () "" str -> cmpParser f str expected = str ~: runParser (f <* eof) () "" str ~?= expected - -> cmpParseSerialize f str expectp@(Left _) _ = [cmpParser f str expectp] -> cmpParseSerialize f str expectp@(Right expectpval) expects = [ -> cmpParser f str expectp -> , serialize expectpval ~?= expects] - -Wrap qstr test in AmountStr to aid serialization test - -> cmpQstr input expectp = cmpQstr3 input expectp input -> cmpQstr3 input (Left expect) _ = [cmpParser (AmountStr <$> qstr) input (Left expect)] -> cmpQstr3 input (Right expect) expects = cmpParseSerialize (AmountStr <$> qstr) input (Right (AmountStr expect)) expects - -> cmpQuantity a b = cmpQuantity3 a b a -> cmpQuantity3 = cmpParseSerialize quantity - -> cmpInstruction a b = cmpInstruction3 a b a -> cmpInstruction3 = cmpParseSerialize instruction - -> exactQuantity a b c = Right (Quantity (Exact a) b c) -> strQuantity = Quantity (Exact (AmountStr "")) "" - -> test = [ -> "quantity" ~: testQuantityOverloaded ++ testQuantityApprox ++ testQuantityAmount ++ testQuantityRatio -> , "qstr" ~: testQstr -> , "oparg" ~: testOparg -> , "opterm" ~: testOpterm -> ] - -> isResult (Result _) = True -> isResult _ = False -> isReference (Reference _) = True -> isReference _ = False -> isAlternative (Alternative _) = True -> isAlternative _ = False -> isAnnotation (Annotation _) = True -> isAnnotation _ = False -> isAction (Action _) = True -> isAction _ = False -> isDirective (Directive _) = True -> isDirective _ = False -> isUnknown (Unknown _) = True -> isUnknown _ = False - diff --git a/src/Codec/Pesto/Parse.lhs-boot b/src/Codec/Pesto/Parse.lhs-boot deleted file mode 100644 index 9096ad7..0000000 --- a/src/Codec/Pesto/Parse.lhs-boot +++ /dev/null @@ -1,22 +0,0 @@ -> module Codec.Pesto.Parse where - -> data Instruction = -> Annotation String -> | Ingredient Quantity -> | Tool Quantity -> | Action String -> | Reference Quantity -> | Result Quantity -> | Alternative Quantity -> | Directive String -> | Unknown String -> data Quantity = Quantity Approximately Unit Object -> type Unit = String -> type Object = String -> data Approximately = -> Range Amount Amount -> | Approx Amount -> | Exact Amount -> data Amount = -> AmountRatio Rational -> | AmountStr String diff --git a/src/Codec/Pesto/Serialize.lhs b/src/Codec/Pesto/Serialize.lhs deleted file mode 100644 index f07e871..0000000 --- a/src/Codec/Pesto/Serialize.lhs +++ /dev/null @@ -1,70 +0,0 @@ -Serializing ------------ - -.. class:: nodoc - -> module Codec.Pesto.Serialize (serialize) where -> import Data.Char (isSpace, isLetter) -> import Data.Ratio (numerator, denominator) -> -> import {-# SOURCE #-} Codec.Pesto.Parse - -> class Serializeable a where -> serialize :: a -> String - -.. class:: todo - -- Add instance for graph -- use :math:`\mathcal{O}(1)` string builder - -Finally transform linear stream of instructions into a string again: - -> instance Serializeable a => Serializeable [a] where -> serialize ops = unlines $ map serialize ops - -> instance Serializeable Instruction where -> serialize (Annotation s) = quote '(' ')' s -> serialize (Ingredient q) = '+':serialize q -> serialize (Tool q) = '&':serialize q -> serialize (Action s) = quote '[' ']' s -> serialize (Reference q) = '*':serialize q -> serialize (Result q) = '>':serialize q -> serialize (Alternative q) = '|':serialize q -> serialize (Directive s) = '%':serializeQstr s -> serialize (Unknown s) = s - -> instance Serializeable Quantity where -> serialize (Quantity a b "") = serialize a ++ " " ++ serializeQstr b -> serialize (Quantity (Exact (AmountStr "")) "" c) = serializeQstr c -> serialize (Quantity a "" c) = serialize a ++ " _ " ++ serializeQstr c -> serialize (Quantity a b c) = serialize a ++ " " ++ serializeQstr b ++ " " ++ serializeQstr c - -> instance Serializeable Approximately where -> serialize (Range a b) = serialize a ++ "-" ++ serialize b -> serialize (Approx a) = '~':serialize a -> serialize (Exact a) = serialize a - -There are two special cases here, both for aesthetic reasons: - -1) If the denominator is one we can just skip printing it, because - :math:`\frac{2}{1} = 2` and -2) if the numerator is larger than the denominator use mixed fraction notation, - because :math:`\frac{7}{2} = 3+\frac{1}{2}` - -> instance Serializeable Amount where -> serialize (AmountRatio a) | denominator a == 1 = show (numerator a) -> serialize (AmountRatio a) | numerator a > denominator a = -> show full ++ "/" ++ show num ++ "/" ++ show denom -> where -> full = numerator a `div` denom -> num = numerator a - full * denom -> denom = denominator a -> serialize (AmountRatio a) = show (numerator a) ++ "/" ++ show (denominator a) -> serialize (AmountStr s) = serializeQstr s - -> serializeQstr "" = "_" -> serializeQstr s | (not . isLetter . head) s || hasSpaces s = quote '"' '"' s -> serializeQstr s = s -> hasSpaces = any isSpace -> quote start end s = [start] ++ concatMap (\c -> if c == end then ['\\', end] else [c]) s ++ [end] - diff --git a/src/Doc.lhs b/src/Doc.lhs deleted file mode 100644 index a0d5121..0000000 --- a/src/Doc.lhs +++ /dev/null @@ -1,61 +0,0 @@ -Building documentation -++++++++++++++++++++++ - -.. class:: nodoc - -> {-# LANGUAGE OverloadedStrings #-} -> import Text.Pandoc -> import Text.Pandoc.Error (handleError) -> import Text.Pandoc.Extensions (extensionsFromList) -> import Text.Pandoc.Highlighting (tango) -> import qualified Data.Text.IO as TIO -> import System.Directory (setCurrentDirectory) - -The documentation can be generated running ``cabal run pesto-doc``. It is -exclusively based on the restructuredText inside this packages’ literal Haskell -source code. - -.. _restructuredText: http://docutils.sourceforge.net/rst.html - -> readDoc = readRST def { -> readerExtensions = extensionsFromList [ -> Ext_literate_haskell -> , Ext_implicit_header_references -> ] -> , readerStandalone = True } - -.. _Pandoc: http://www.pandoc.org/ - -Pandoc_ outputs a single HTML5 page with syntax highlighting and MathJax for -formulas. - -> writeDoc tpl = writeHtml5String def { -> writerTemplate = Just tpl -> , writerHighlightStyle = Just tango -> , writerNumberSections = True -> , writerSectionDivs = True -> , writerTabStop = 4 -> , writerHTMLMathMethod = MathJax "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" -> , writerVariables = [("css", "pesto.css"), ("lang", "en")] -> } - -A slightly customized template is used. - -> main = do -> tpl <- readFile "template.html" - -The module Codec.Pesto serves as starting point and it includes every other -module in a sensible order. For the relative includes to work, we need to -change our current working directory. - -> setCurrentDirectory "src/Codec" -> doc <- TIO.readFile "Pesto.lhs" -> result <- runIO $ readDoc doc >>= writeDoc tpl -> setCurrentDirectory "../../" -> html <- handleError result - -Output is written to the directory ``_build``, which contains the corresponding -stylesheet. - -> TIO.writeFile "_build/index.html" html - diff --git a/src/Main.lhs b/src/Main.lhs deleted file mode 100644 index 61ed180..0000000 --- a/src/Main.lhs +++ /dev/null @@ -1,107 +0,0 @@ -User interface -++++++++++++++ - -.. class:: nodoc - -> module Main (main) where -> import System.Environment (getArgs) -> import Data.List (intercalate) -> import Data.Monoid ((<>)) -> -> import Codec.Pesto.Parse (parse, Instruction (Ingredient), Quantity (..)) -> import Codec.Pesto.Graph (extract, toGraph, firstNodeId, resolveReferences) -> import Codec.Pesto.Lint (lint, extractMetadata, Metadata(..), LintResult (LintResult)) -> import Codec.Pesto.Serialize (serialize) - -The user-interface has different modes of operation. All of them read a single -recipe from the standard input. - -> main = do -> (op:_) <- getArgs -> s <- getContents -> either malformedRecipe (run op) (parse s) - -> run "dot" = runDot -> run "metadata" = runMeta -> run "ingredients" = runIngredients -> run _ = const (putStrLn "unknown operation") - -> malformedRecipe = print - -> streamToGraph stream = (nodes, edges) -> where -> doc = (head . extract . snd . unzip) stream -> nodes = zip [firstNodeId..] doc -> edges = toGraph nodes ++ resolveReferences nodes - -dot -^^^ - -Since each recipe is just a directed graph (digraph), GraphViz’ dot language -can represent recipes as well. Example: - -.. code:: bash - - cabal run --verbose=0 pesto dot < spaghetti.pesto | dot -Tpng > spaghetti.png - -> runDot stream = putStrLn $ toDot dotNodes dotEdges -> where -> (nodes, edges) = streamToGraph stream -> maxId = (maximum $ map fst nodes) + 1 -> (lintNodes, lintEdges) = unzip $ map (uncurry lintToNodesEdges) -> $ zip [maxId..] (lint nodes edges) -> dotNodes = concat [ -> [("node", [("fontname", "Roboto Semi-Light")])] -> , map (\(a, label) -> (show a, [("label", serialize label)])) nodes -> , lintNodes -> ] -> dotEdges = concat [ -> map (both show) edges -> , concat lintEdges -> ] - -> lintToNodesEdges nodeid (LintResult t nodes) = let -> n = (show nodeid, [("label", show t), ("color", "red")]) -> e = map (\i -> both show (nodeid, i)) nodes -> in (n, e) - -> both f (a, b) = (f a, f b) - -> toDot nodes edges = "digraph a {" -> <> mconcat (map nodeToDot nodes) -> <> mconcat (map edgeToDot edges) -> <> "}" -> where -> edgeToDot (a, b) = a <> " -> " <> b <> ";" -> nodeToDot (a, b) = a <> " [" <> mconcat (mapToDot b) <> "];" - -> mapToDot = map kvToDot -> kvToDot (k, v) = k <> "=\"" <> quoteString v <> "\"" -> quoteString s = mconcat $ map quoteChar s -> quoteChar '\n' = "\\n" -> quoteChar '"' = "\\\"" -> quoteChar x = [x] - -metadata -^^^^^^^^ - -Print metadata as key-value pairs, separated by ``=``. - -> runMeta stream = maybe (return ()) (mapM_ printMeta) $ uncurry extractMetadata $ streamToGraph stream - -ingredients -^^^^^^^^^^^ - -Extract ingredients and print them in CSV format. This does not take -alternatives into account yet. - -> runIngredients stream = mapM_ (putStrLn . csvQty) $ reverse $ foldl getIngredient [] stream -> where -> getIngredient xs (_, Ingredient q) = q:xs -> getIngredient xs _ = xs - -> printMeta (_, (key, MetaStr value)) = putStrLn $ key ++ "=" ++ value -> printMeta (_, (key, MetaQty q)) = putStrLn $ key ++ "=" ++ csvQty q - -> csvQty (Quantity a b c) = intercalate "," [serialize a, b, c] - diff --git a/src/Test.lhs b/src/Test.lhs deleted file mode 100644 index 400192e..0000000 --- a/src/Test.lhs +++ /dev/null @@ -1,22 +0,0 @@ -Running tests -+++++++++++++ - -.. class:: nodoc - -> import Test.HUnit -> import System.Exit (exitFailure, exitSuccess) -> import Codec.Pesto.Parse (test) -> import Codec.Pesto.Lint (test) -> import Codec.Pesto.Graph (test) - -The testcases can be run with ``cabal test``. This runs *all* testcases from -all modules and prints a summary. - -> main = runTestTT tests >>= \c -> if errors c + failures c > 0 then exitFailure else exitSuccess - -> tests = TestList [ -> "parse" ~: Codec.Pesto.Parse.test -> , "graph" ~: Codec.Pesto.Graph.test -> , "lint" ~: Codec.Pesto.Lint.test -> ] - diff --git a/src/exe/Doc.lhs b/src/exe/Doc.lhs new file mode 100644 index 0000000..63e9847 --- /dev/null +++ b/src/exe/Doc.lhs @@ -0,0 +1,66 @@ +Building documentation +++++++++++++++++++++++ + +.. class:: nodoc + +> {-# LANGUAGE OverloadedStrings #-} +> import Text.Pandoc +> import Text.Pandoc.Highlighting (tango) +> import qualified Data.Text.IO as TIO +> import System.Directory (setCurrentDirectory) +> import Data.Map as M +> import Text.DocTemplates (ToContext(toVal), Context(..)) +> import Data.Text (pack) +> import Data.Either.Combinators (rightToMaybe) + +The documentation can be generated running ``cabal run pesto-doc``. It is +exclusively based on the restructuredText inside this packages’ literal Haskell +source code. + +.. _restructuredText: http://docutils.sourceforge.net/rst.html + +> readDoc = readRST def { +> readerExtensions = extensionsFromList [ +> Ext_literate_haskell +> , Ext_implicit_header_references +> ] +> , readerStandalone = True } + +.. _Pandoc: http://www.pandoc.org/ + +Pandoc_ outputs a single HTML5 page with syntax highlighting and MathJax for +formulas. + +> writeDoc tpl = writeHtml5String def { +> writerTemplate = tpl +> , writerHighlightStyle = Just tango +> , writerNumberSections = True +> , writerSectionDivs = True +> , writerTabStop = 4 +> , writerHTMLMathMethod = MathJax "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" +> , writerVariables = Context $ M.fromList [ +> ("css", toVal $ pack "pesto.css") +> , ("lang", toVal $ pack "en") +> , ("include-before", toVal $ pack "<div class=\"wrapper\">") +> , ("include-after", toVal $ pack "</div>") +> ] +> } +> +> main = do + +The module Codec.Pesto serves as starting point and it includes every other +module in a sensible order. For the relative includes to work, we need to +change our current working directory. + +> tpl <- runIO $ compileDefaultTemplate "html5" +> setCurrentDirectory "src/lib/Codec" +> doc <- TIO.readFile "Pesto.lhs" +> result <- runIO $ readDoc doc >>= writeDoc (rightToMaybe tpl) +> setCurrentDirectory "../../../" +> html <- handleError result + +Output is written to the directory ``_build``, which contains the corresponding +stylesheet. + +> TIO.writeFile "_build/index.html" html + diff --git a/src/exe/Main.lhs b/src/exe/Main.lhs new file mode 100644 index 0000000..2f67ffd --- /dev/null +++ b/src/exe/Main.lhs @@ -0,0 +1,106 @@ +User interface +++++++++++++++ + +.. class:: nodoc + +> module Main (main) where +> import System.Environment (getArgs) +> import Data.List (intercalate) +> +> import Codec.Pesto.Parse (parse, Instruction (Ingredient), Quantity (..)) +> import Codec.Pesto.Graph (extract, toGraph, firstNodeId, resolveReferences) +> import Codec.Pesto.Lint (lint, extractMetadata, Metadata(..), LintResult (LintResult)) +> import Codec.Pesto.Serialize (serialize) + +The user-interface has different modes of operation. All of them read a single +recipe from the standard input. + +> main = do +> (op:_) <- getArgs +> s <- getContents +> either malformedRecipe (run op) (parse s) + +> run "dot" = runDot +> run "metadata" = runMeta +> run "ingredients" = runIngredients +> run _ = const (putStrLn "unknown operation") + +> malformedRecipe = print + +> streamToGraph stream = (nodes, edges) +> where +> doc = (head . extract . snd . unzip) stream +> nodes = zip [firstNodeId..] doc +> edges = toGraph nodes ++ resolveReferences nodes + +dot +^^^ + +Since each recipe is just a directed graph (digraph), GraphViz’ dot language +can represent recipes as well. Example: + +.. code:: bash + + cabal run --verbose=0 pesto dot < spaghetti.pesto | dot -Tpng > spaghetti.png + +> runDot stream = putStrLn $ toDot dotNodes dotEdges +> where +> (nodes, edges) = streamToGraph stream +> maxId = (maximum $ map fst nodes) + 1 +> (lintNodes, lintEdges) = unzip $ map (uncurry lintToNodesEdges) +> $ zip [maxId..] (lint nodes edges) +> dotNodes = concat [ +> [("node", [("fontname", "Roboto Semi-Light")])] +> , map (\(a, label) -> (show a, [("label", serialize label)])) nodes +> , lintNodes +> ] +> dotEdges = concat [ +> map (both show) edges +> , concat lintEdges +> ] + +> lintToNodesEdges nodeid (LintResult t nodes) = let +> n = (show nodeid, [("label", show t), ("color", "red")]) +> e = map (\i -> both show (nodeid, i)) nodes +> in (n, e) + +> both f (a, b) = (f a, f b) + +> toDot nodes edges = "digraph a {" +> <> mconcat (map nodeToDot nodes) +> <> mconcat (map edgeToDot edges) +> <> "}" +> where +> edgeToDot (a, b) = a <> " -> " <> b <> ";" +> nodeToDot (a, b) = a <> " [" <> mconcat (mapToDot b) <> "];" + +> mapToDot = map kvToDot +> kvToDot (k, v) = k <> "=\"" <> quoteString v <> "\"" +> quoteString s = mconcat $ map quoteChar s +> quoteChar '\n' = "\\n" +> quoteChar '"' = "\\\"" +> quoteChar x = [x] + +metadata +^^^^^^^^ + +Print metadata as key-value pairs, separated by ``=``. + +> runMeta stream = maybe (return ()) (mapM_ printMeta) $ uncurry extractMetadata $ streamToGraph stream + +ingredients +^^^^^^^^^^^ + +Extract ingredients and print them in CSV format. This does not take +alternatives into account yet. + +> runIngredients stream = mapM_ (putStrLn . csvQty) $ reverse $ foldl getIngredient [] stream +> where +> getIngredient xs (_, Ingredient q) = q:xs +> getIngredient xs _ = xs + +> printMeta (_, (key, MetaStr value)) = putStrLn $ key ++ "=" ++ value +> printMeta (_, (key, MetaQty q)) = putStrLn $ key ++ "=" ++ csvQty q + +> csvQty (Quantity a b c) = intercalate "," [serialize a, b, c] + diff --git a/src/exe/Test.lhs b/src/exe/Test.lhs new file mode 100644 index 0000000..400192e --- /dev/null +++ b/src/exe/Test.lhs @@ -0,0 +1,22 @@ +Running tests ++++++++++++++ + +.. class:: nodoc + +> import Test.HUnit +> import System.Exit (exitFailure, exitSuccess) +> import Codec.Pesto.Parse (test) +> import Codec.Pesto.Lint (test) +> import Codec.Pesto.Graph (test) + +The testcases can be run with ``cabal test``. This runs *all* testcases from +all modules and prints a summary. + +> main = runTestTT tests >>= \c -> if errors c + failures c > 0 then exitFailure else exitSuccess + +> tests = TestList [ +> "parse" ~: Codec.Pesto.Parse.test +> , "graph" ~: Codec.Pesto.Graph.test +> , "lint" ~: Codec.Pesto.Lint.test +> ] + diff --git a/src/lib/Codec/Pesto.lhs b/src/lib/Codec/Pesto.lhs new file mode 100644 index 0000000..6940b4c --- /dev/null +++ b/src/lib/Codec/Pesto.lhs @@ -0,0 +1,286 @@ +========================= +Pesto specification draft +========================= + +Pesto is a text-based human-editable and machine-transformable cooking recipe +interchange format. + +.. warning:: + + This specification is work-in-progress and thus neither stable, consistent or + complete. + +.. class:: nodoc + +> module Codec.Pesto where + +About this document +------------------- + +This section contains various information about this document. The `second +section`__ motivates why inventing another file format is necessary, followed +by the goals_ of Pesto. After a short Pesto primer__ intended for the casual +user the language’s syntax__ and semantics__ are presented. The `linting +section`__ limits the language to useful cooking recipes. Examples for user +presentation of recipes and serialization follow. + +__ motivation_ +__ introduction-by-example_ +__ language-syntax_ +__ language-semantics_ +__ linting_ + +Being a literate program this document is specification and reference +implementation at the same time. The code is written in Haskell_ and uses the +parsec_ parser combinator library, as well as HUnit_ for unit tests. Even +without knowing Haskell’s syntax you should be able to understand this +specification. There’s a description above every code snippet explaining what +is going on. + +.. _Haskell: http://learnyouahaskell.com/ +.. _HUnit: http://hackage.haskell.org/package/HUnit +.. _parsec: http://hackage.haskell.org/package/parsec + +The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, +“SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be +interpreted as described in `RFC 2119`_. + +.. _RFC 2119: http://tools.ietf.org/html/rfc2119 + +:Version: 1-draft +:License: CC0_ +:Website: https://6xq.net/pesto/ +:Discussion: https://github.com/PromyLOPh/pesto +:Contributors: + - `Lars-Dominik Braun <mailto:lars+pesto@6xq.net>`_ + +.. _CC0: https://creativecommons.org/publicdomain/zero/1.0/ + +.. _motivation: + +Motivation +---------- + +The landscape of recipe interchange formats is quite fragmented. First of all +there’s HTML microdata. `Google rich snippets`_, which are equivalent to the +schema.org_ microdata vocabulary, are widely used by commercial recipe sites. +Although the main objective of microdata is to make content machine-readable +most sites will probably use it, because it is considered a search-engine +optimization (SEO). Additionally parsing HTML pulled from the web is a +nightmare and thus not a real option for sharing recipes. h-recipe_ provides a +second vocabulary that has not been adopted widely yet. + +.. _Google rich snippets: https://developers.google.com/structured-data/rich-snippets/recipes +.. _schema.org: http://schema.org/Recipe +.. _h-recipe: http://microformats.org/wiki/h-recipe + +.. _formats-by-software: + +Most cooking-related software comes with its own recipe file format. Some of +them, due to their age, can be imported by other programs. + +Meal-Master_ is one of these widely supported formats. A huge trove of recipe files +is `available in this format <http://www.ffts.com/recipes.htm>`_. There does +not seem to be any official documentation for the format, but inofficial +`ABNF grammar`_ and `format description <http://www.ffts.com/mmformat.txt>`_ +exist. A Meal-Master recipe template might look like this: + +.. _MasterCook: http://mastercook.com/ +.. _MXP: http://www.oocities.org/heartland/woods/2073/Appendix.htm +.. _ABNF grammar: http://web.archive.org/web/20161002135718/http://www.wedesoft.de/anymeal-api/mealmaster.html + +.. code:: mealmaster + + ---------- Recipe via Meal-Master (tm) + + Title: <Title> + Categories: <Categories> + Yield: <N servings> + + <N> <unit> <ingredient> + … + + -------------------------------<Section name>----------------------------- + <More ingredients> + + <Instructions> + + ----- + +Rezkonv_ aims to improve the Mealmaster format by lifting some of its character +limits, adding new syntax and translating it to german. However the +specification is available on request only. + +A second format some programs can import is MasterCook_’s MXP_ file format, as +well as its XML-based successor MX2. And then there’s a whole bunch of +more-or-less proprietary formats: + +`Living Cookbook`_ + Uses a XML-based format called fdx version 1.1. There’s no specification to + be found, but a few examples__ are available and those are dated 2006. +`My CookBook`_ + Uses the file extension .mcb. A specification `is available + <http://mycookbook-android.com/site/my-cookbook-xml-schema/>`_. +KRecipes_ + Uses its own export format. However there is no documentation whatsoever. +Gourmet_ + The program’s export format suffers from the same problem. The only + document available is the `DTD + <https://github.com/thinkle/gourmet/blob/7715c6ef87ee8c106f0a021972cd70d61d83cadb/data/recipe.dtd>`_. +CookML_ + Last updated in 2006 (version 1.0.4) for the german-language shareware + program Kalorio has a custom and restrictive licence that requires + attribution and forbids derivate works. +Paprika_ + Cross-platform application, supports its own “emailed recipe format” and a + simple YAML-based format. + +__ http://livingcookbook.com/Resource/DownloadableRecipes +.. _Paprika: https://paprikaapp.com/help/android/#importrecipes + +.. _xml-formats: + +Between 2002 and 2005 a bunch of XML-based exchange formats were created. They +are not tied to a specific software, so none of them seems to be actively used +nowadays: + +RecipeML_ + Formerly known as DESSERT and released in 2002 (version 0.5). The + license requires attribution and – at the same time – forbids using the name + RecipeML for promotion without written permission. +eatdrinkfeelgood_ + Version 1.1 was released in 2002 as well, but the site is not online + anymore. The DTD is licensed under the `CC by-sa`_ license. +REML_ + Released in 2005 (version 0.5), aims to improve support for commercial uses + (restaurant menus and cookbooks). The XSD’s license permits free use and + redistribution, but the reference implementation has no licensing + information. +`RecipeBook XML`_ + Released 2005 as well and shared unter the terms of `CC by-sa`_ is not + available on the web any more. + +.. _CC by-sa: https://creativecommons.org/licenses/by-sa/2.5/ + +.. _obscure-formats: + +Finally, a few non-XML or obscure exchange formats have been created in the past: +YumML_ is an approach similar to those listed above, but based on YAML instead +of XML. The specification has been removed from the web and is available +through the Web Archive only. + +`Cordon Bleu`_ (1999) encodes recipes as programs for a cooking machine and +defines a Pascal-like language. Being so close to real programming languages +Cordon Bleu is barely useable by anyone except programmers. Additionally the +language is poorly-designed, since its syntax is inconsistent and the user is +limited to a set of predefined functions. + +Finally there is RxOL_, created in 1985. It constructs a graph from recipes +written down with a few operators and postfix notation. It does not separate +ingredients and cooking instructions like every other syntax introduced before. +Although Pesto is not a direct descendant of RxOL both share many ideas. + +microformats.org_ has a similar list of recipe interchange formats. + +.. _REML: http://reml.sourceforge.net/ +.. _eatdrinkfeelgood: https://web.archive.org/web/20070109085643/http://eatdrinkfeelgood.org/1.1/ +.. _RecipeML: http://www.formatdata.com/recipeml/index.html +.. _CookML: http://www.kalorio.de/index.php?Mod=Ac&Cap=CE&SCa=../cml/CookML_EN +.. _Meal-Master: http://web.archive.org/web/20151029032924/http://episoft.home.comcast.net:80/~episoft/ +.. _RecipeBook XML: http://web.archive.org/web/20141101132332/http://www.happy-monkey.net/recipebook/ +.. _YumML: http://web.archive.org/web/20140703234140/http://vikingco.de/yumml.html +.. _Rezkonv: http://www.rezkonv.de/software/rksuite/rkformat.html +.. _RxOL: http://www.dodomagnifico.com/641/Recipes/CompCook.html +.. _Gourmet: http://thinkle.github.io/gourmet/ +.. _KRecipes: http://krecipes.sourceforge.net/ +.. _Cordon Bleu: http://www.inf.unideb.hu/~bognar/ps_ek/cb_lang.ps +.. _microformats.org: http://microformats.org/wiki/recipe-formats +.. _Living Cookbook: http://livingcookbook.com/ +.. _My CookBook: http://mycookbook-android.com/ + +.. There is a copy at http://diyhpl.us/~bryan/papers2/CompCook.html as well + +.. More interesting stuff: +.. - http://blog.moertel.com/posts/2010-01-08-a-formal-language-for-recipes-brain-dump.html +.. - http://www.dangermouse.net/esoteric/chef.html + +Goals +----- + +First of all recipes are written *by* humans *for* humans. Thus a +human-readable recipe interchange format is not enough. The recipes need to be +human-editable without guidance like a GUI or assistant. That’s why, for +instance, XML is not suitable and the interchange formats listed `above +<xml-formats_>`_ have largely failed to gain traction. XML, even though simple +itself, is still too complicated for the ordinary user. Instead a format needs +to be as simple as possible, with as little markup as possible. A human editor +must be able to remember the entire syntax. This works best if the file +contents “make sense”. A good example for this is Markdown_. + +.. _Markdown: https://daringfireball.net/projects/markdown/syntax + +We also have to acknowledge that machines play an important role in our daily +life. They can help us, the users, accomplish our goals if they are able to +understand the recipes as well. Thus they too need to be able to read and write +recipes. Again, designing a machine-readable format is not enough. Recipes must +be machine-transformable. A computer program should be able to create a new +recipe from two existing ones, look up the ingredients and tell us how many +joules one piece of that cake will have. And so on. + +That being said, Pesto does not aim to carry additional information about +ingredients or recipes itself. Nutrition data for each ingredient should be +maintained in a separate database. Due to its minimal syntax Pesto is also not +suitable for extensive guides on cooking or the usual chitchat found in cooking +books. + +.. _introduction-by-example: + +Introduction by example +----------------------- + +.. code:: + + So let’s start by introducing Pesto by example. This text does not belong + to the recipe and is ignored by any software. The following line starts the + recipe: + + %pesto + + &pot + +1 l water + +salt + [boil] + + +100 g penne + &10 min + [cook] + + >1 serving pasta + (language: en) + +And that’s how you make pasta: Boil one liter of water in a pot with a little +bit of salt. Then add 100 g penne, cook them for ten minutes and you get one +serving pasta. That’s all. + +There’s more syntax available to express alternatives (either penne or +tagliatelle), ranges (1–2 l water or approximately 1 liter water) and metadata. +But now you can have a first peek at `my own recipe collection`_. + +.. _my own recipe collection: https://github.com/PromyLOPh/rezepte + +.. include:: Pesto/Parse.lhs +.. include:: Pesto/Graph.lhs +.. include:: Pesto/Lint.lhs +.. include:: Pesto/Serialize.lhs + +Using this project +------------------ + +This project uses cabal. It provides the Codec.Pesto library that implements +the Pesto language as described in the previous sections. It also comes with +three binaries. + +.. include:: ../../exe/Main.lhs +.. include:: ../../exe/Test.lhs +.. include:: ../../exe/Doc.lhs + diff --git a/src/lib/Codec/Pesto/Graph.lhs b/src/lib/Codec/Pesto/Graph.lhs new file mode 100644 index 0000000..511adca --- /dev/null +++ b/src/lib/Codec/Pesto/Graph.lhs @@ -0,0 +1,276 @@ +.. _language-semantics: + +Language semantics +------------------ + +.. class:: nodoc + +> module Codec.Pesto.Graph ( +> toGraph +> , walkRoot +> , outgoingEdges +> , outgoingNodes +> , incomingEdges +> , incomingNodes +> , firstNodeId +> , resolveReferences +> , test +> , extract +> , NodeId +> , Node +> , Nodes +> , Edge +> , Edges +> ) where +> import Data.Char (toLower) +> import Data.List (sort, nub) +> import Test.HUnit hiding (test, Node) +> +> import Codec.Pesto.Parse hiding (test) + +The parser’s output, a stream of instructions, may contain multiple recipes. A +recipe must start with the directive “pesto” and may end with “buonappetito”. +This function extracts all recipes from the stream and removes both directives. + +- easily embed recipes into other documents + +> startDirective = Directive "pesto" +> endDirective = Directive "buonappetito" + +> extract [] = [] +> extract (s:stream) | s == startDirective = between:extract next +> where +> isEnd x | x `elem` [startDirective, endDirective] = True +> isEnd _ = False +> (between, next) = break isEnd stream +> extract (_:xs) = extract xs + +Start and end directive are removed from the extracted instructions. The +directive “buonappetito” is optional at the end of a stream. + +> testExtract = [ +> extract [startDirective, endDirective] ~?= [[]] +> , extract [startDirective, Action "foobar", endDirective] ~?= [[Action "foobar"]] +> , extract [startDirective] ~?= [[]] +> , extract [startDirective, Directive "foobar"] ~?= [[Directive "foobar"]] + +Instructions surrounding the start and end directive are removed. + +> , extract [Unknown "Something", startDirective] ~?= [[]] +> , extract [Unknown "Something", Action "pour", startDirective] ~?= [[]] +> , extract [startDirective, endDirective, Annotation "something"] ~?= [[]] + +The stream may contain multiple recipes. The start directive also ends the +previous recipe and starts a new one. + +> , extract [startDirective, Action "pour", endDirective, Action "foobar", startDirective, Annotation "something"] ~?= [[Action "pour"], [Annotation "something"]] +> , extract [startDirective, Action "heat", startDirective, Annotation "something"] ~?= [[Action "heat"], [Annotation "something"]] +> , extract [startDirective, Annotation "foobar", startDirective, endDirective] ~?= [[Annotation "foobar"], []] +> ] + +Each recipe’s stream of instructions drives a stack-based machine that transforms +it into a directed graph. Think of the stack as your kitchen’s workspace that +is used to prepare the food’s components. You can add new ingredients, perform +actions on them, put them aside and add them again. + +This function processes a list of nodes, that is instructions uniquely identified +by an integer and returns the edges of the directed graph as a list of tuples. + +> toGraph nodes = third $ foldl f (Nothing, [[]], []) nodes +> where + +Ingredients are simply added to the current workspace. They should for example +appear on the shopping list. + +> f ctx (i, Ingredient _) = addToStack ctx i + +The same happens for for tools. However they are not part of the final product, +but used in the process of making it. For instance they do not appear on the +shopping list. `Time is a tool <time-is-a-tool_>`_. + +> f ctx (i, Tool _) = addToStack ctx i + +Actions take all ingredients and tools currently on the workspace, perform some +action with them and put the product back onto the workspace. + +> f (_, stack:sx, edges) (i, Action _) = (Just i, [i]:stack:sx, edgesTo i stack ++ edges) +> f (_, [], _) (_, Action _) = undefined -- never reached + +Results add a label to the current workspace’s contents and move them out of +the way. It should be a meaningful name, not just A and B obviously. +Consecutive Results add different labels to the same workspace. That’s useful +when an action yields multiple results at once that are processed in different +ways. + +> f ctx (i, Result _) = consumeStack ctx i + +Alternatives too add a label to the current workspace’s content, but they pick +one of things on the workspace and throw everything else away. This allows +adding optional or equivalent ingredients to a recipe (i.e. margarine or butter). + +> f ctx (i, Alternative _) = consumeStack ctx i + +References are similar to ingredients. They are used to add items from a +workspace labeled with Result or Alternative. More on that `in the next section +<references_>`_. + +> f ctx (i, Reference _) = addToStack ctx i + +Annotations add a description to any of the previous instructions. They can be +used to provide more information about ingredients (so “hot water” becomes +“+water (hot)”, tools (“&oven (200 °C)”) or actions (“[cook] (XXX)”). + +> f ctx@(Nothing, _, _) (_, Annotation _) = ctx +> f (Just prev, s, edges) (i, Annotation _) = (Just prev, s, (i, prev):edges) + +Unused directives or unknown instructions are danging nodes with no connection to +other nodes. + +> f ctx (_, Directive _) = ctx +> f ctx (_, Unknown _) = ctx + +These are helper functions: + +> addToStack (_, stack:sx, edges) i = (Just i, (i:stack):sx, edges) +> addToStack (_, [], _) _ = undefined -- never reached +> consumeStack (_, s, edges) i = +> let +> stack = dropWhile null s +> (top:sx) = if null stack then [[]] else stack +> in (Just i, []:top:sx, edgesTo i top ++ edges) +> edgesTo i = map (\x -> (x, i)) + +Here are a few example of how this stack-machine works. Each edge is a tuple of +two integer numbers. These are the nodes it connects, starting with zero. +Ingredient, Tool and Reference itself do not create any edges: + +> testGraph = [ +> cmpGraph "+ketchup &spoon *foobar" [] + +But Action, Alternative and Result do in combination with them: + +> , cmpGraph "+foobar [barbaz]" [(0, 1)] +> , cmpGraph "+foobar |barbaz" [(0, 1)] +> , cmpGraph "+foobar >barbaz" [(0, 1)] +> , cmpGraph "+foobar +B >barbaz" [(0, 2), (1, 2)] +> , cmpGraph "+foobar >barbaz +foobar >barbaz" [(0, 1), (2, 3)] +> , cmpGraph "+foobar [barbaz] +foobar >barbaz" [(0, 1), (1, 3), (2, 3)] +> , cmpGraph "&foobar [barbaz] [C] >D" [(0, 1), (1, 2), (2, 3)] + +If the stack is empty, i.e. it was cleared by a Result or Alternative +instruction, consecutive results or alternatives operate on the *previous*, +non-empty stack. + +> , cmpGraph "+foobar >barbaz >C" [(0, 1), (0, 2)] +> , cmpGraph "+foobar |barbaz |C" [(0, 1), (0, 2)] +> , cmpGraph "+foobar >barbaz |C" [(0, 1), (0, 2)] + +Unless that stack too is empty. Then they do nothing: + +> , cmpGraph ">foobar >foobar" [] +> , cmpGraph "|foobar |foobar" [] +> , cmpGraph "(foobar) (foobar)" [] +> , cmpGraph "[foobar]" [] + +The Annotation instruction always creates an edge to the most-recently processed +node that was not an annotation. Thus two consecutive annotations create edges +to the same node. + +> , cmpGraph "+foobar (barbaz)" [(1, 0)] +> , cmpGraph "+foobar (barbaz) (C)" [(1, 0), (2, 0)] +> , cmpGraph "+foobar (barbaz) >barbaz" [(1, 0), (0, 2)] +> , cmpGraph "+foobar >barbaz (C)" [(0, 1), (2, 1)] +> , cmpGraph "+foobar |barbaz (C)" [(0, 1), (2, 1)] +> , cmpGraph "*foobar (C)" [(1, 0)] + +Unknown directives or instructions are never connected to other nodes. + +> , cmpGraph "%invalid" [] +> , cmpGraph "invalid" [] +> ] + +References +++++++++++ + +Results and alternatives can be referenced with the Reference instruction. +Resolving these references does not happen while buiding the graph, but +afterwards. This allows referencing an a result or alternative before its +definition with regard to the their processing order. + +Resolving references is fairly simple: For every reference its object name a +case-insensitive looked is performed in a table containing all results and +alternatives. If it succeeds an edge from every result and alternative returned +to the reference in question is created. + +> resolveReferences nodes = foldl f [] nodes +> where +> f edges (i, ref@(Reference _)) = map (\x -> (x, i)) (findTarget nodes ref) ++ edges +> f edges _ = edges + +> findTarget nodes (Reference (Quantity _ _ a)) = map fst $ filter (isTarget a) nodes +> where +> lc = map toLower +> isTarget dest (_, Result (Quantity _ _ x)) = lc x == lc dest +> isTarget dest (_, Alternative (Quantity _ _ x)) = lc x == lc dest +> isTarget _ _ = False +> findTarget _ _ = [] + +References works before or after the result instruction. + +> testRef = [ +> cmpGraphRef ">foobar *foobar" [(0, 1)] +> , cmpGraphRef ">foobar |foobar *foobar" [(0, 2), (1, 2)] +> , cmpGraphRef "+A >foobar +B >barbaz *foobar *barbaz" [(1, 4), (3, 5)] +> , cmpGraphRef "*foobar >foobar" [(1, 0)] + +Nonexistent references do not create an edge. + +> , cmpGraphRef ">foobar *barbaz" [] + +References can use amounts and units. + +> , cmpGraphRef ">foobar *1 _ foobar *2 _ foobar" [(0, 1), (0, 2)] + +There are a few cases that do not make sense here (like loops or multiple +results with the same name). They are permitted at this stage, but rejected +`later <reject-loops_>`_. + +> , cmpGraphRef "*foobar |foobar >foobar" [(1, 0), (2, 0)] +> , cmpGraphRef "|foobar *foobar >foobar *foobar" [(0, 1), (0, 3), (2, 1), (2, 3)] +> ] + +Appendix +++++++++ + +> runGraphWith f doc expect = sort edges ~?= sort expect +> where +> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> nodes = zip [firstNodeId..] op +> edges = f nodes +> cmpGraph = runGraphWith toGraph +> cmpGraphRef = runGraphWith resolveReferences + +> type NodeId = Int +> type Node a = (NodeId, a) +> type Nodes a = [Node a] +> type Edge = (NodeId, NodeId) +> type Edges = [Edge] +> firstNodeId = 0 :: NodeId + +Find graph’s root node(s), that is a node without outgoing edges: + +> walkRoot nodes edges = let out = nub $ map fst edges +> in filter (\(x, _) -> notElem x out) nodes + +Get all nodes with edges pointing towards nodeid + +> incomingEdges edges (nodeid, _) = filter ((==) nodeid . snd) edges +> incomingNodes nodes edges n = map ((!!) nodes . fst) $ incomingEdges edges n + +> outgoingEdges edges (nodeid, _) = filter ((==) nodeid . fst) edges +> outgoingNodes nodes edges n = map ((!!) nodes . snd) $ outgoingEdges edges n + +> test = ["graph" ~: testGraph, "ref" ~: testRef, "extract" ~: testExtract] + +> third (_, _, x) = x + diff --git a/src/lib/Codec/Pesto/Lint.lhs b/src/lib/Codec/Pesto/Lint.lhs new file mode 100644 index 0000000..bc99e14 --- /dev/null +++ b/src/lib/Codec/Pesto/Lint.lhs @@ -0,0 +1,432 @@ +Linting +------- + +.. class:: nodoc + +> module Codec.Pesto.Lint (lint +> , test +> , parseMetadata +> , extractMetadata +> , Metadata(..) +> , LintResult(..)) where +> import Test.HUnit hiding (test, Node) +> import Data.List (sort, isPrefixOf) +> import Text.Parsec hiding (parse) +> import Data.Char (isSpace) +> import Data.Ratio ((%)) +> import Data.Maybe (fromMaybe) +> +> import Codec.Pesto.Graph hiding (test) +> import Codec.Pesto.Parse hiding (test) + +Not every graph generated in the previous section is a useful recipe. Some +instruction sequences just do not make sense. The tests in this section can +detect those. Failing any of them does not render a stream of instructions or +graph invalid. They just does not describe a *useful* recipe. Thus +implementations must not generate or export such documents. However they should +accept input that fails any of the tests and warn the user about the failure. + +Additionally this section provides guidance on how to use the instructions +provided by the Pesto language properly. + +Graph properties +++++++++++++++++ + +- weakly connected, no dangling nodes/subgraphs +- acyclic + +The graph must have exactly one root node (i.e. a node with incoming edges +only). This also requires all results and alternatives to be referenced +somewhere. Directives are either consumed when parsing, generating a graph and +linting. Otherwise they are dangling as well. Unknown instructions are always +dangling. + +> rootIsResult nodes edges = case walkRoot nodes edges of +> [] -> [LintResult NoRootNode []] +> (_, Result _):[] -> [] +> (i, _):[] -> [LintResult NonResultRootNode [i]] +> xs -> [LintResult MoreThanOneRootNode (map fst xs)] + +Empty recipes or circular references have no root node: + +> testConnectivity = [ +> cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []] +> , cmpLint "*foobar >foobar" +> [LintResult NoRootNode [], LintResult NoMetadata []] +> , cmpLint "+foobar" +> [LintResult NonResultRootNode [0], LintResult NoMetadata []] + +Directives and unknown instructions are dangling and thus root nodes. + +> , cmpLint "invalid %invalid +foo >bar" +> [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []] +> ] + +Metadata +++++++++ + +.. _resultsused: + +.. class:: todo + +root node can be alternative too? + +The graph’s root node must be a result. It contains yield (amount and unit) and +title (object) of the recipe. + +> extractMetadata nodes edges = case walkRoot nodes edges of +> [n@(i, Result q@(Quantity _ _ title))] -> +> Just $ (i, ("title", MetaStr title)) +> :(i, ("yield", MetaQty q)) +> :foldl f [] (incomingNodes nodes edges n) +> _ -> Nothing +> where + +Additional key-value metadata for the whole recipe can be added as annotations +to the root node. If multiple annotations with the same key exist the key maps +to a list of those values. Annotations that are unparseable key-value pairs are +added as recipe description instead. + +> f xs (i, Annotation s) = case parseMetadata s of +> Left _ -> (i, ("description", MetaStr s)):xs +> Right (k, v) -> (i, (k, MetaStr v)):xs +> f xs _ = xs + +Key and value are separated by a colon. Keys must not contain whitespace or the +colon char. A value may be empty. + +> parseMetadata = runParser metadata () "" +> metadata = let keychars = satisfy (\x -> not (isSpace x) && x /= ':') in (,) +> <$> many1 keychars +> <*> (char ':' *> spaces *> many anyChar) + +> lintMetadata nodes edges = case extractMetadata nodes edges of +> Just result -> foldl checkKey [] result +> Nothing -> [LintResult NoMetadata []] +> where +> checkKey xs (_, (k, _)) | isKeyKnown k = xs +> checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs + +Valid metadata keys are listed below. Additionally applications may add keys by +prefixing them with “x-myapp-”, thus an application called “basil” adding +“some-key” would use the full key “x-basil-some-key”. + +> isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k + +The following metadata keys are permitted: + +> knownKeys = [ + +Both, title and description, are implicit. + +> "title" +> , "description" + +The recipe’s language, as 2 character code (`ISO 639-1 +<http://www.loc.gov/standards/iso639-2/php/English_list.php>`_). + +> , "language" + +Yield and time both must be a quantity. + +> , "yield" +> , "time" + +An image can be a relative file reference or URI + +> , "image" +> , "author" +> ] + +.. class:: todo + +Check the metadata’s value format. I.e. yield/time must be quantity + +For instance a german language recipe for one person would look like this: + +> testMetadata = [ +> cmpLintMeta "+foo >1 ml foobar (language: de) (x-app-key: value)" +> [] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "ml" "foobar"))) +> , (2, ("language", MetaStr "de")) +> , (3, ("x-app-key", MetaStr "value"))]) + +Unparseable annotations or unknown keys are linting errors: + +> , cmpLintMeta "+foo >foobar (unknown-key: value)" +> [LintResult UnknownMetadataKey [2]] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (strQuantity "foobar"))) +> , (2, ("unknown-key", MetaStr "value"))]) + +Root node annotations not containing a parseable key-value pair are assigned +the key “description”. + +> , cmpLintMeta "+foo >foobar ( some description ) (another one: with colon) (another: valid key-value)" +> [LintResult UnknownMetadataKey [4]] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (strQuantity "foobar"))) +> , (2, ("description", MetaStr " some description ")) +> , (3, ("description", MetaStr "another one: with colon")) +> , (4, ("another", MetaStr "valid key-value"))]) +> ] + +.. _time-is-a-tool: + +Time is a tool +++++++++++++++ + +By definition time is a tool and not an ingredient. + +> timeUnits = ["s", "min", "h", "d"] +> +> isTime (Quantity _ unit "") | unit `elem` timeUnits = True +> isTime _ = False + +> timeIsATool nodes _ = foldl f [] nodes +> where +> f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs +> f xs _ = xs + +> testLintQuantity = [ +> cmpLint "+10 min >foo" [LintResult TimeIsATool [0]] +> , cmpLint "+10-12 h >foo" [LintResult TimeIsATool [0]] +> , cmpLint "+90/120 s >foo" [LintResult TimeIsATool [0]] +> , cmpLint "+~12 s >foo" [LintResult TimeIsATool [0]] +> , cmpLint "&10 min [bar] >foo" [] +> ] + +Only actions can be annotated with a time. It can be used to indicate how long +a certain action is *expected* to take (i.e. peeling potatoes takes two +minutes) or how long the action is supposed to be executed (i.e. cook five +minutes). More time annotations improve the software’s scheduling capabilities. + +> timeAnnotatesAction nodes edges = foldl f [] nodes +> where +> f xs n@(nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges n) = LintResult TimeAnnotatesAction [nodeid]:xs +> f xs _ = xs +> toNodelist = (!!) nodes . snd +> allActions = all (isAction . snd . toNodelist) + +For example “cook 10 minutes” can be expressed with + +> testTimeAnnotatesAction = [ +> cmpLint "&10 min [cook] >soup" [] +> , cmpLint "&10 min [cook] &5-6 h [cook again] >soup" [] +> , cmpLint "&10 min >soup" [LintResult TimeAnnotatesAction [0]] +> , cmpLint "&10 min &15 min |time *time [cook] >soup" +> [LintResult TimeAnnotatesAction [0], LintResult TimeAnnotatesAction [1]] +> ] + +.. _well-known-units: + +Well-known units +++++++++++++++++ + +Units can be an arbitrary strings, but implementations should recognize the +common metric units g (gram), l (litre) and m (metre). One of these prefixes +may be used with each of them: m (milli-), c (centi-), d (dezi-) and k (kilo-). +Additionally time in s (second), min (minute), h (hour), d (day) should be +accepted. + +> wellKnownUnit nodes _ = foldl f [] nodes +> where +> extractQty (Ingredient q) = Just q +> extractQty (Tool q) = Just q +> extractQty (Result q) = Just q +> extractQty (Alternative q) = Just q +> extractQty (Reference q) = Just q +> extractQty _ = Nothing +> f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) = +> LintResult UnitNotWellKnown [nodeid]:xs +> f xs _ = xs +> known (Quantity _ unit _) = unit `elem` knownUnits +> knownUnits = [ +> "" +> , "mg", "g", "kg" +> , "ml", "cl", "dl", "l" +> , "cm", "dm", "m" +> ] ++ timeUnits + +Usage of imperial units (inch, pound, …) as well as non-XXX units like +“teaspoon”, “cup”, … is discouraged because the former is used by just three +countries in the world right now and the latter is language- and +country-dependent. The implementation may provide the user with a conversion +utility. + +.. class:: todo + +- example: 1 oz ~= 28.349523125 g, can only be approximated by rational number, for instance 29767/1050 g +- 15 oz would are :math:`\frac{29767}{70} \mathrm{g} = 425+\frac{17}{70} \mathrm{g}`, since nobody sells 17/70 g the implementation would round down to ~425 g (although <1g is not really enough to justify adding approx) + +> testLintWellKnownUnit = [ +> cmpLint "+1 in foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+2 teaspoons foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+3 cups foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+1 ml foobar >bar" [] +> , cmpLint "+1 cl foobar >bar" [] +> , cmpLint "+1 dl foobar >bar" [] +> , cmpLint "+1 l foobar >bar" [] +> , cmpLint "+2 _ something >bar" [] +> , cmpLint "&1 min [foo] >bar" [] + +The unit is case-sensitive, thus + +.. class:: todo + +Should we allow case-insensitive units? References are case-insensitive as +well… + +> , cmpLint "+1 Mg foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+1 kG foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "&1 MIN [foo] >bar" [LintResult UnitNotWellKnown [0]] +> ] + +References +++++++++++ + +All references must be resolved. An `earlier check <resultsused_>`_ makes sure +all results and alternatives are referenced at some point. + +> referencesResolved nodes edges = foldl f [] nodes +> where +> f xs n@(nodeid, Reference _) | null (incomingEdges edges n) = +> LintResult UndefinedReference [nodeid]:xs +> f xs _ = xs + +> testLintRefs = [ +> cmpLint "*foobar >foobar >barbaz" [] +> , cmpLint "*foobar >foo" [LintResult UndefinedReference [0]] +> ] + +A result must have at least one incoming edge. This is a special case and can +only occur at the beginning of a recipe. + +> resultNonempty nodes edges = foldl f [] nodes +> where +> f xs n@(nodeid, Result _) | null (incomingEdges edges n) = +> LintResult TooFewChildren [nodeid]:xs +> f xs _ = xs + +> testLintResultNonempty = [ +> cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]] +> , cmpLint "+A >bar *bar >baz" [] +> , cmpLint "+A >bar >foo *bar *foo >baz" [] +> ] + +Alternatives must have at least two incoming edges since a smaller amount would +make the alternative pointless. + +> twoAlternatives nodes edges = foldl f [] nodes +> where +> f xs n@(nodeid, Alternative _) | length (incomingEdges edges n) < 2 = +> LintResult TooFewChildren [nodeid]:xs +> f xs _ = xs + +> testLintTwoAlternatives = [ +> cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]] +> , cmpLint "+A +B |foo *foo >bar" [] + +.. class:: todo + +should we allow this? it does not make sense imo + +> , cmpLint "+A &B |foo *foo >bar" [] +> ] + +.. _reject-loops: + +.. class:: todo + +- reject loops +- reject multiple results/alternatives with the same name + +Ranges +++++++ + +The first amount of a range ratio must be strictly smaller than the second. +This limitation is not enforced for ranges containing strings. + +> rangeFromLargerThanTo nodes _ = foldl f [] nodes +> where +> f xs (nodeid, Ingredient q) | not $ rangeOk q = +> LintResult RangeFromLargerThanTo [nodeid]:xs +> f xs (nodeid, Reference q) | not $ rangeOk q = +> LintResult RangeFromLargerThanTo [nodeid]:xs +> f xs _ = xs +> rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b +> rangeOk _ = True + +> testRangeFromLargerThanTo = [ +> cmpLint "+2-3 l water >bar" [] +> , cmpLint "+3-2 l water >bar" [LintResult RangeFromLargerThanTo [0]] +> , cmpLint "+2/3-1/3 l water >bar" [LintResult RangeFromLargerThanTo [0]] +> , cmpLint "+some-many _ eggs >bar" [] +> , cmpLint "+1-\"a few\" _ eggs >bar" [] +> ] + +Appendix +++++++++ + +> data LintResult = LintResult LintStatus [NodeId] deriving (Show, Eq, Ord) +> data LintStatus = +> NoRootNode +> | NonResultRootNode +> | MoreThanOneRootNode +> | UndefinedReference +> | TooFewChildren +> | TimeIsATool +> | TimeAnnotatesAction +> | UnitNotWellKnown +> | InvalidNode +> | RangeFromLargerThanTo +> | NoMetadata +> | UnknownMetadataKey +> deriving (Show, Eq, Ord) + +Every lint test checks a single aspect of the graph. + +> lint nodes edges = concatMap (\f -> f nodes edges) lintTests + +> lintTests = [ +> rootIsResult +> , referencesResolved +> , resultNonempty +> , twoAlternatives +> , timeIsATool +> , timeAnnotatesAction +> , wellKnownUnit +> , lintMetadata +> , rangeFromLargerThanTo +> ] + +> withGraph doc f = f nodes edges +> where +> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> nodes = zip [firstNodeId..] op +> edges = toGraph nodes ++ resolveReferences nodes + +> cmpLint doc expect = withGraph doc (\nodes edges -> doc ~: sort (lint nodes edges) ~?= sort expect) + + +> data Metadata = MetaQty Quantity | MetaStr String deriving (Show, Eq) + +> cmpLintMeta doc expectLint expectMeta = withGraph doc (\nodes edges -> doc ~: [ +> sort (lint nodes edges) ~?= sort expectLint +> , extractMetadata nodes edges ~?= expectMeta +> ]) +> strQuantity = Quantity (Exact (AmountStr "")) "" + +> test = [ +> testConnectivity +> , testMetadata +> , testLintRefs +> , testLintQuantity +> , testLintWellKnownUnit +> , testTimeAnnotatesAction +> , testLintTwoAlternatives +> , testLintResultNonempty +> , testRangeFromLargerThanTo +> ] + diff --git a/src/lib/Codec/Pesto/Parse.lhs b/src/lib/Codec/Pesto/Parse.lhs new file mode 100644 index 0000000..518b866 --- /dev/null +++ b/src/lib/Codec/Pesto/Parse.lhs @@ -0,0 +1,403 @@ +.. _language-syntax: + +Language syntax +--------------- + +.. class:: nodoc + +> module Codec.Pesto.Parse ( +> parse +> , test +> , Instruction(..) +> , Quantity(..) +> , Unit +> , Object +> , Approximately(..) +> , Amount(..) +> , isResult +> , isReference +> , isAlternative +> , isAnnotation +> , isAction +> , isDirective +> , isUnknown +> , spaces1 +> , notspace +> ) where +> import Data.Char (isSpace) +> import Data.Ratio ((%)) +> import Text.Parsec hiding (parse) +> import Text.ParserCombinators.Parsec.Pos (newPos) +> import Text.ParserCombinators.Parsec.Error (newErrorUnknown) +> import Test.HUnit hiding (test) +> +> import Codec.Pesto.Serialize (serialize) + +Pesto parses UTF-8_ encoded input data consisting of space-delimited +instructions. Every character within the Unicode whitespace class is +considered a space. + +.. _UTF-8: https://tools.ietf.org/html/rfc3629 +.. _spaces1: + +> stream = ((,) <$> getPosition <*> instruction) `sepEndBy` spaces1 +> <?> "stream" +> spaces1 = many1 space + +The following instructions are supported: + +> data Instruction = +> Annotation String +> | Ingredient Quantity +> | Tool Quantity +> | Action String +> | Reference Quantity +> | Result Quantity +> | Alternative Quantity +> | Directive String +> | Unknown String +> deriving (Show, Eq) +> +> instruction = +> try annotation +> <|> try ingredient +> <|> try tool +> <|> try action +> <|> try result +> <|> try alternative +> <|> try reference +> <|> try directive +> <|> try unknown +> <?> "instruction" + +The pesto grammar has two instruction types: The first one begins with a +start symbol (``start``) and consumes any character up to and including a +terminating symbol (``end``), which can be escaped with a backslash (``\``). + +> betweenEscaped :: Char -> Char -> Parsec String () String +> betweenEscaped start end = +> char start +> *> many (try (char '\\' *> char end) <|> satisfy (/= end)) +> <* char end + +Annotations and actions both are of this kind: + +> annotation = Annotation <$> betweenEscaped '(' ')' +> action = Action <$> betweenEscaped '[' ']' + +Here are examples for both: + +> testOpterm = [cmpInstruction "(skinless\nboneless)" (Right (Annotation "skinless\nboneless")) +> , cmpInstruction "[stir together]" (Right (Action "stir together")) +> , cmpInstruction "[stir\\]together]" (Right (Action "stir]together"))] + +The second one starts with one identifying character, ignores the following +whitespace characters and then consumes an object or a quantity. + +> oparg :: Char -> Parsec String () Instruction -> Parsec String () Instruction +> oparg ident cont = char ident *> spaces *> cont +> ingredient = oparg '+' (Ingredient <$> quantity) +> tool = oparg '&' (Tool <$> quantity) +> result = oparg '>' (Result <$> quantity) +> alternative = oparg '|' (Alternative <$> quantity) +> reference = oparg '*' (Reference <$> quantity) + +Additionally there are two special instructions. Directives are similar to the +previous instructions, but consume a qstr. + +> directive = oparg '%' (Directive <$> qstr) + +Unknown instructions are the fallthrough-case and accept anything. They must +not be discarded at this point. The point of accepting anything is to fail as +late as possible while processing input. This gives the parser a chance to +print helpful mesages that provide additional aid to the user who can then fix +the problem. + +> unknown = Unknown <$> many1 notspace + +Below are examples for these instructions: + +> testOparg = [ +> cmpInstruction "+100 g flour" +> (Right (Ingredient (Quantity (Exact (AmountRatio (100%1))) "g" "flour"))) +> , cmpInstruction "&oven" +> (Right (Tool (strQuantity "oven"))) +> , cmpInstruction ">dough" (Right (Result (strQuantity "dough"))) +> , cmpInstruction "|trimmings" (Right (Alternative (strQuantity "trimmings"))) +> , cmpInstruction "*fish" +> (Right (Reference (strQuantity "fish"))) +> , cmpInstruction3 "% invalid" (Right (Directive "invalid")) "%invalid" +> , cmpInstruction3 "* \t\n 1 _ cheese" +> (Right (Reference (Quantity (Exact (AmountRatio (1%1))) "" "cheese"))) +> "*1 _ cheese" +> ] + +Qstr +++++ + +Before introducing quantities we need to have a look at qstr, which is used by +them. A qstr, short for quoted string, can be – you guessed it already – a +string enclosed in double quotes, a single word or the underscore character +that represents the empty string. + +> qstr = try (betweenEscaped '"' '"') +> <|> word +> <|> char '_' *> return "" + +A word always starts with a letter, followed by any number of non-space +characters. + +> word = (:) <$> letter <*> many notspace +> notspace = satisfy (not . isSpace) + +The empty string can be represented by two double quotes or the underscore, but +not the empty string itself. + +> testQstr = [ +> cmpQstr3 "\"\"" (Right "") "_" +> , cmpQstr "_" (Right "") +> , cmpQstr "" parseError + +Any Unicode character with a General_Category major class L (i.e. a letter, see +`Unicode standard section 4.5 +<http://www.unicode.org/versions/Unicode7.0.0/ch04.pdf>`_ for example) is +accected as first character of a word. That includes german umlauts as well as +greek or arabic script. Numbers, separators, punctuation and others are not +permitted. + +> , cmpQstr "water" (Right "water") +> , cmpQstr "Äpfel" (Right "Äpfel") +> , cmpQstr "τυρί" (Right "τυρί") +> , cmpQstr "جبن" (Right "جبن") +> , cmpQstr "1sugar" parseError +> , cmpQstr "+milk" parseError +> , cmpQstr "∀onion" parseError + +The remaining letters of a word can be any character, including symbols, +numbers, … + +> , cmpQstr "rump-roast" (Right "rump-roast") +> , cmpQstr "v1negar" (Right "v1negar") +> , cmpQstr "mush\"rooms" (Right "mush\"rooms") + +…but not spaces. + +> , cmpQstr " tomatoes" parseError +> , cmpQstr "tomatoes " parseError +> , cmpQstr "lemon juice" parseError +> , cmpQstr "sour\tcream" parseError +> , cmpQstr "white\nwine" parseError + +If a string contains spaces or starts with a special character it must be +enclosed in double quotes. + +> , cmpQstr3 "\"salt\"" (Right "salt") "salt" +> , cmpQstr "\"+milk\"" (Right "+milk") +> , cmpQstr "\"soy sauce\"" (Right "soy sauce") +> , cmpQstr "\"1sugar\"" (Right "1sugar") +> , cmpQstr "\"chicken\tbreast\nmeat\"" (Right "chicken\tbreast\nmeat") + +Double quotes within a string can be quoted by prepending a backslash. However +the usual escape codes like \\n, \\t, … will *not* be expanded. + +> , cmpQstr "\"vine\"gar\"" parseError +> , cmpQstr3 "\"vine\\\"gar\"" (Right "vine\"gar") "vine\"gar" +> , cmpQstr "\"oli\\ve oil\"" (Right "oli\\ve oil") +> , cmpQstr "\"oli\\\\\"ve oil\"" (Right "oli\\\"ve oil") +> , cmpQstr3 "\"sal\\tmon\"" (Right "sal\\tmon") "sal\\tmon" +> ] + +Quantity +++++++++ + +The instructions Ingredient, Tool and Reference accept a *quantity*, that is a +triple of Approximately, Unit and Object as parameter. + +> data Quantity = Quantity Approximately Unit Object deriving (Show, Eq) + +The syntactic construct is overloaded and accepts one to three arguments. If +just one is given it is assumed to be the Object and Approximately and Unit are +empty. Two arguments set Approximately and Unit, which is convenient when the +unit implies the object (minutes usually refer to the object time, for +example). + +> quantity = try quantityA <|> quantityB + +> quantityA = Quantity +> <$> approximately +> <* spaces1 +> <*> unit +> <*> (try (spaces1 *> object) <|> return "") + +> quantityB = Quantity +> <$> return (Exact (AmountStr "")) +> <*> return "" +> <*> object + +> testQuantityOverloaded = [ +> cmpQuantity "oven" (exactQuantity (AmountStr "") "" "oven") +> , cmpQuantity "10 min" (exactQuantity (AmountRatio (10%1)) "min" "") +> , cmpQuantity "100 g flour" (exactQuantity (AmountRatio (100%1)) "g" "flour") + +The first two are equivalent to + +> , cmpQuantity3 "_ _ oven" (exactQuantity (AmountStr "") "" "oven") "oven" +> , cmpQuantity3 "10 min _" (exactQuantity (AmountRatio (10%1)) "min" "") "10 min" + +Missing units must not be ommited. The version with underscore should be prefered. + +> , cmpQuantity3 "1 \"\" meal" (exactQuantity (AmountRatio (1%1)) "" "meal") "1 _ meal" +> , cmpQuantity "1 _ meal" (exactQuantity (AmountRatio (1%1)) "" "meal") +> ] + +Units and objects are just strings. However units should be limited to +`well-known metric units <well-known-units_>`_ and `some guidelines +<objects-and-annotations_>`_ apply to Objects as well. + +> type Unit = String +> unit = qstr +> +> type Object = String +> object = qstr + +Approximately is a wrapper for ranges, that is two amounts separated by a dash, +approximate amounts, prepended with a tilde and exact amounts without modifier. + +> data Approximately = +> Range Amount Amount +> | Approx Amount +> | Exact Amount +> deriving (Show, Eq) +> +> approximately = try range <|> try approx <|> exact +> range = Range <$> amount <*> (char '-' *> amount) +> approx = Approx <$> (char '~' *> amount) +> exact = Exact <$> amount + +> testQuantityApprox = [ +> cmpQuantity "1-2 _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountRatio (2%1))) "" "bananas")) +> , cmpQuantity "1 - 2 _ bananas" parseError +> , cmpQuantity "1- 2 _ bananas" parseError +> , cmpQuantity "1 -2 _ bananas" parseError +> , cmpQuantity "~2 _ bananas" (Right (Quantity (Approx (AmountRatio (2%1))) "" "bananas")) +> , cmpQuantity "~ 2 _ bananas" parseError + +> ] + +Amounts are limited to rational numbers and strings. There are no real numbers +by design and implementations should avoid representing rational numbers as +IEEE float. They are not required and introduce ugly corner cases when +rounding while converting units for example. + +> data Amount = +> AmountRatio Rational +> | AmountStr String +> deriving (Show, Eq) +> +> amount = try ratio <|> AmountStr <$> qstr + +> testQuantityAmount = [ +> cmpQuantity "some _ pepper" (exactQuantity (AmountStr "some") "" "pepper") +> , cmpQuantity3 "\"some\"-\"a few\" _ bananas" (Right (Quantity (Range (AmountStr "some") (AmountStr "a few")) "" "bananas")) "some-\"a few\" _ bananas" +> , cmpQuantity "~\"the stars in your eyes\" _ bananas" (Right (Quantity (Approx (AmountStr "the stars in your eyes")) "" "bananas")) +> ] + +Rational numbers can be an integral, numerator and denominator, each separated +by a forward slash, just the numerator and denominator, again separated by a +forward slash or just a numerator with the default denominator 1 (i.e. ordinary +integral number). + +> ratio = let toRatio i num denom = AmountRatio ((i*denom+num)%denom) in +> try (toRatio <$> int <*> (char '/' *> int) <*> (char '/' *> int)) +> <|> try (toRatio <$> return 0 <*> int <*> (char '/' *> int)) +> <|> try (toRatio <$> return 0 <*> int <*> return 1) + +These are all equal. + +> testQuantityRatio = [ +> cmpQuantity "3 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") +> , cmpQuantity3 "3/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" +> , cmpQuantity3 "3/0/1 _ bananas" (exactQuantity (AmountRatio (3%1)) "" "bananas") "3 _ bananas" + +XXtwo is num and denom + +> , cmpQuantity "3/5 _ bananas" (exactQuantity (AmountRatio (3%5)) "" "bananas") + +three is int, num and denom + +> , cmpQuantity "3/5/7 _ bananas" (exactQuantity (AmountRatio ((3*7+5)%7)) "" "bananas") + +> , cmpQuantity3 "10/3 _ bananas" (exactQuantity (AmountRatio (10%3)) "" "bananas") "3/1/3 _ bananas" + +Can be used with ranges and approximate too. and mixed with strings + +> , cmpQuantity "1-\"a few\" _ bananas" (Right (Quantity (Range (AmountRatio (1%1)) (AmountStr "a few")) "" "bananas")) +> , cmpQuantity "1/1/2-2 _ bananas" (Right (Quantity (Range (AmountRatio (3%2)) (AmountRatio (4%2))) "" "bananas")) +> , cmpQuantity "~1/1/2 _ bananas" (Right (Quantity (Approx (AmountRatio (3%2))) "" "bananas")) + +> ] + +Appendix +++++++++ + +> int = read <$> many1 digit +> parse = runParser stream () "" + +Test helpers: + +> isLeft (Left _) = True +> isLeft _ = False + +A generic parser error: + +> parseError = Left (newErrorUnknown (newPos "" 0 0)) + +Compare output of parser ``f`` for string ``str`` with ``expected``. The +expected result can be a parser error, which matches any actual parse error +(first case). + +> cmpParser f str (Left _) = TestCase $ assertBool str $ isLeft $ runParser (f <* eof) () "" str +> cmpParser f str expected = str ~: runParser (f <* eof) () "" str ~?= expected + +> cmpParseSerialize f str expectp@(Left _) _ = [cmpParser f str expectp] +> cmpParseSerialize f str expectp@(Right expectpval) expects = [ +> cmpParser f str expectp +> , serialize expectpval ~?= expects] + +Wrap qstr test in AmountStr to aid serialization test + +> cmpQstr input expectp = cmpQstr3 input expectp input +> cmpQstr3 input (Left expect) _ = [cmpParser (AmountStr <$> qstr) input (Left expect)] +> cmpQstr3 input (Right expect) expects = cmpParseSerialize (AmountStr <$> qstr) input (Right (AmountStr expect)) expects + +> cmpQuantity a b = cmpQuantity3 a b a +> cmpQuantity3 = cmpParseSerialize quantity + +> cmpInstruction a b = cmpInstruction3 a b a +> cmpInstruction3 = cmpParseSerialize instruction + +> exactQuantity a b c = Right (Quantity (Exact a) b c) +> strQuantity = Quantity (Exact (AmountStr "")) "" + +> test = [ +> "quantity" ~: testQuantityOverloaded ++ testQuantityApprox ++ testQuantityAmount ++ testQuantityRatio +> , "qstr" ~: testQstr +> , "oparg" ~: testOparg +> , "opterm" ~: testOpterm +> ] + +> isResult (Result _) = True +> isResult _ = False +> isReference (Reference _) = True +> isReference _ = False +> isAlternative (Alternative _) = True +> isAlternative _ = False +> isAnnotation (Annotation _) = True +> isAnnotation _ = False +> isAction (Action _) = True +> isAction _ = False +> isDirective (Directive _) = True +> isDirective _ = False +> isUnknown (Unknown _) = True +> isUnknown _ = False + diff --git a/src/lib/Codec/Pesto/Parse.lhs-boot b/src/lib/Codec/Pesto/Parse.lhs-boot new file mode 100644 index 0000000..9096ad7 --- /dev/null +++ b/src/lib/Codec/Pesto/Parse.lhs-boot @@ -0,0 +1,22 @@ +> module Codec.Pesto.Parse where + +> data Instruction = +> Annotation String +> | Ingredient Quantity +> | Tool Quantity +> | Action String +> | Reference Quantity +> | Result Quantity +> | Alternative Quantity +> | Directive String +> | Unknown String +> data Quantity = Quantity Approximately Unit Object +> type Unit = String +> type Object = String +> data Approximately = +> Range Amount Amount +> | Approx Amount +> | Exact Amount +> data Amount = +> AmountRatio Rational +> | AmountStr String diff --git a/src/lib/Codec/Pesto/Serialize.lhs b/src/lib/Codec/Pesto/Serialize.lhs new file mode 100644 index 0000000..f07e871 --- /dev/null +++ b/src/lib/Codec/Pesto/Serialize.lhs @@ -0,0 +1,70 @@ +Serializing +----------- + +.. class:: nodoc + +> module Codec.Pesto.Serialize (serialize) where +> import Data.Char (isSpace, isLetter) +> import Data.Ratio (numerator, denominator) +> +> import {-# SOURCE #-} Codec.Pesto.Parse + +> class Serializeable a where +> serialize :: a -> String + +.. class:: todo + +- Add instance for graph +- use :math:`\mathcal{O}(1)` string builder + +Finally transform linear stream of instructions into a string again: + +> instance Serializeable a => Serializeable [a] where +> serialize ops = unlines $ map serialize ops + +> instance Serializeable Instruction where +> serialize (Annotation s) = quote '(' ')' s +> serialize (Ingredient q) = '+':serialize q +> serialize (Tool q) = '&':serialize q +> serialize (Action s) = quote '[' ']' s +> serialize (Reference q) = '*':serialize q +> serialize (Result q) = '>':serialize q +> serialize (Alternative q) = '|':serialize q +> serialize (Directive s) = '%':serializeQstr s +> serialize (Unknown s) = s + +> instance Serializeable Quantity where +> serialize (Quantity a b "") = serialize a ++ " " ++ serializeQstr b +> serialize (Quantity (Exact (AmountStr "")) "" c) = serializeQstr c +> serialize (Quantity a "" c) = serialize a ++ " _ " ++ serializeQstr c +> serialize (Quantity a b c) = serialize a ++ " " ++ serializeQstr b ++ " " ++ serializeQstr c + +> instance Serializeable Approximately where +> serialize (Range a b) = serialize a ++ "-" ++ serialize b +> serialize (Approx a) = '~':serialize a +> serialize (Exact a) = serialize a + +There are two special cases here, both for aesthetic reasons: + +1) If the denominator is one we can just skip printing it, because + :math:`\frac{2}{1} = 2` and +2) if the numerator is larger than the denominator use mixed fraction notation, + because :math:`\frac{7}{2} = 3+\frac{1}{2}` + +> instance Serializeable Amount where +> serialize (AmountRatio a) | denominator a == 1 = show (numerator a) +> serialize (AmountRatio a) | numerator a > denominator a = +> show full ++ "/" ++ show num ++ "/" ++ show denom +> where +> full = numerator a `div` denom +> num = numerator a - full * denom +> denom = denominator a +> serialize (AmountRatio a) = show (numerator a) ++ "/" ++ show (denominator a) +> serialize (AmountStr s) = serializeQstr s + +> serializeQstr "" = "_" +> serializeQstr s | (not . isLetter . head) s || hasSpaces s = quote '"' '"' s +> serializeQstr s = s +> hasSpaces = any isSpace +> quote start end s = [start] ++ concatMap (\c -> if c == end then ['\\', end] else [c]) s ++ [end] + -- cgit v1.2.3