summaryrefslogtreecommitdiff
path: root/src/lib/Codec/Pesto/Lint.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/Codec/Pesto/Lint.lhs')
-rw-r--r--src/lib/Codec/Pesto/Lint.lhs466
1 files changed, 466 insertions, 0 deletions
diff --git a/src/lib/Codec/Pesto/Lint.lhs b/src/lib/Codec/Pesto/Lint.lhs
new file mode 100644
index 0000000..3ecdfa1
--- /dev/null
+++ b/src/lib/Codec/Pesto/Lint.lhs
@@ -0,0 +1,466 @@
+.. _linting:
+
+Linting
+-------
+
+.. class:: nodoc
+
+> module Codec.Pesto.Lint (lint
+> , test
+> , parseMetadata
+> , extractMetadata
+> , Metadata(..)
+> , LintResult(..)) where
+> import Test.HUnit hiding (test, Node)
+> import Data.List (sort, isPrefixOf, insert, intersect)
+> import Text.Parsec hiding (parse)
+> import Data.Char (isSpace, toLower)
+> import Data.Ratio ((%))
+> import Data.Maybe (fromMaybe)
+> import qualified Data.Map.Strict as M
+>
+> import Codec.Pesto.Graph hiding (test)
+> import Codec.Pesto.Parse hiding (test)
+
+Not every graph generated in the previous section is a useful recipe. Some
+instruction sequences just do not make sense. The tests in this section can
+detect those. Failing any of them does not render a stream of instructions or
+graph invalid. They just do not describe a *useful* recipe. Thus
+implementations must not generate or export such documents. However, they should
+accept input that fails any of the tests and warn the user about the failure.
+
+Additionally, this section provides guidance on how to use the instructions
+provided by the Pesto language properly.
+
+Graph properties
+++++++++++++++++
+
+.. _resultsused:
+
+The graph must have exactly one root node (i.e., a node with incoming edges
+only). This also requires all results and alternatives to be referenced
+somewhere. Directives are either consumed when parsing, generating a graph, and
+linting. Otherwise they are dangling as well. Unknown instructions are always
+dangling.
+
+> rootIsResult nodes edges = case walkRoot nodes edges of
+> [] -> [LintResult NoRootNode []]
+> (_, Result _):[] -> []
+> (i, _):[] -> [LintResult NonResultRootNode [i]]
+> xs -> [LintResult MoreThanOneRootNode (map fst xs)]
+
+Empty recipes or circular references have no root node:
+
+> testConnectivity = [
+> cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []]
+> , cmpLint "*foobar >foobar"
+> [LintResult NoRootNode [], LintResult CircularLoop [0, 1], LintResult NoMetadata []]
+> , cmpLint "+foobar"
+> [LintResult NonResultRootNode [0], LintResult NoMetadata []]
+
+Directives and unknown instructions are dangling and thus root nodes.
+
+> , cmpLint "invalid %invalid +foo >bar"
+> [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []]
+> ]
+
+Metadata
+++++++++
+
+The graph’s root node must be a result. It contains yield (amount and unit) and
+title (object) of the recipe.
+
+> extractMetadata nodes edges = case walkRoot nodes edges of
+> [(i, Result q@(Quantity _ _ title))] ->
+> Just $ (i, ("title", MetaStr title))
+> :(i, ("yield", MetaQty q))
+> :foldl f [] (incomingNodes nodes edges i)
+> _ -> Nothing
+> where
+
+Additional key-value metadata for the whole recipe can be added as annotations
+to the root node. If multiple annotations with the same key exist, the key maps
+to a list of those values. Annotations that are unparseable key-value pairs are
+added as recipe description instead.
+
+> f xs (i, Annotation s) = case parseMetadata s of
+> Left _ -> (i, ("description", MetaStr s)):xs
+> Right (k, v) -> (i, (k, MetaStr v)):xs
+> f xs _ = xs
+
+Key and value are separated by a colon. Keys must not contain whitespace or the
+colon char. A value may be empty.
+
+> parseMetadata = runParser metadata () ""
+> metadata = let keychars = satisfy (\x -> not (isSpace x) && x /= ':') in (,)
+> <$> many1 keychars
+> <*> (char ':' *> spaces *> many anyChar)
+
+> lintMetadata nodes edges = case extractMetadata nodes edges of
+> Just result -> foldl checkKey [] result
+> Nothing -> [LintResult NoMetadata []]
+> where
+> checkKey xs (_, (k, _)) | isKeyKnown k = xs
+> checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs
+
+Valid metadata keys are listed below. Additionally, applications may add keys by
+prefixing them with “x-myapp-”. Thus an application called “basil” adding
+“some-key” would use the full key “x-basil-some-key”.
+
+> isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k
+
+The following metadata keys are permitted:
+
+> knownKeys = [
+
+The title, description and yield are implicit.
+
+> "title"
+> , "description"
+> , "yield"
+
+The recipe’s language, as 2 character code (`ISO 639-1
+<http://www.loc.gov/standards/iso639-2/php/English_list.php>`_).
+
+> , "language"
+
+Time both must be a time-unit quantity.
+
+> , "time"
+
+An image can be a relative file reference or URI
+
+> , "image"
+> , "author"
+> ]
+
+For instance a german language recipe for one person would look like this:
+
+> testMetadata = [
+> cmpLintMeta "+foo >1 _ foobar (language: de) (x-app-key: value)"
+> []
+> (Just [(1, ("title", MetaStr "foobar"))
+> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "" "foobar")))
+> , (2, ("language", MetaStr "de"))
+> , (3, ("x-app-key", MetaStr "value"))])
+
+Unparseable annotations or unknown keys are linting errors:
+
+> , cmpLintMeta "+foo >foobar (unknown-key: value)"
+> [LintResult UnknownMetadataKey [2]]
+> (Just [(1, ("title", MetaStr "foobar"))
+> , (1, ("yield", MetaQty (strQuantity "foobar")))
+> , (2, ("unknown-key", MetaStr "value"))])
+
+Root node annotations not containing a parseable key-value pair are assigned
+the key “description”.
+
+> , cmpLintMeta "+foo >foobar ( some description ) (another one: with colon) (another: valid key-value)"
+> [LintResult UnknownMetadataKey [4]]
+> (Just [(1, ("title", MetaStr "foobar"))
+> , (1, ("yield", MetaQty (strQuantity "foobar")))
+> , (2, ("description", MetaStr " some description "))
+> , (3, ("description", MetaStr "another one: with colon"))
+> , (4, ("another", MetaStr "valid key-value"))])
+> ]
+
+.. _time-is-a-tool:
+
+Time is a tool
+++++++++++++++
+
+By definition, time is a tool and not an ingredient.
+
+> timeUnits = ["s", "min", "h", "d"]
+>
+> isTime (Quantity _ unit "") | unit `elem` timeUnits = True
+> isTime _ = False
+
+> timeIsATool nodes _ = foldl f [] nodes
+> where
+> f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs
+> f xs _ = xs
+
+> testLintQuantity = [
+> cmpLint "+10 min >foo" [LintResult TimeIsATool [0]]
+> , cmpLint "+10-12 h >foo" [LintResult TimeIsATool [0]]
+> , cmpLint "+90/120 s >foo" [LintResult TimeIsATool [0]]
+> , cmpLint "+~12 s >foo" [LintResult TimeIsATool [0]]
+> , cmpLint "&10 min [bar] >foo" []
+> ]
+
+Only actions can be annotated like this. It can be used to indicate how long
+a particular action is *expected* to take (i.e., peeling potatoes takes two
+minutes) or how long the action is supposed to be executed (i.e. cook for five
+minutes). More time annotations improve the software’s scheduling capabilities.
+
+> timeAnnotatesAction nodes edges = foldl f [] nodes
+> where
+> f xs (nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges nodeid) = LintResult TimeAnnotatesAction [nodeid]:xs
+> f xs _ = xs
+> toNodelist = (!!) nodes . snd
+> allActions = all (isAction . snd . toNodelist)
+
+For example, “cook 10 minutes” can be expressed with:
+
+> testTimeAnnotatesAction = [
+> cmpLint "&10 min [cook] >soup" []
+> , cmpLint "&10 min [cook] &5-6 h [cook again] >soup" []
+> , cmpLint "&10 min >soup" [LintResult TimeAnnotatesAction [0]]
+> , cmpLint "&10 min &15 min |time *time [cook] >soup"
+> [LintResult TimeAnnotatesAction [0], LintResult TimeAnnotatesAction [1]]
+> ]
+
+.. _well-known-units:
+
+Well-known units
+++++++++++++++++
+
+Units can be arbitrary strings, but implementations should recognize the
+standard metric units g (gram), l (liter), and m (meter). One of these prefixes
+may be used with each of them: m (milli-), c (centi-), d (dezi-), and k (kilo-).
+Additionally, time in s (second), min (minute), h (hour), and d (day) should be
+accepted.
+
+> wellKnownUnit nodes _ = foldl f [] nodes
+> where
+> extractQty (Ingredient q) = Just q
+> extractQty (Tool q) = Just q
+> extractQty (Result q) = Just q
+> extractQty (Alternative q) = Just q
+> extractQty (Reference q) = Just q
+> extractQty _ = Nothing
+> f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) =
+> LintResult UnitNotWellKnown [nodeid]:xs
+> f xs _ = xs
+> known (Quantity _ unit _) = unit `elem` knownUnits
+> knownUnits = [
+> ""
+> , "mg", "g", "kg"
+> , "ml", "cl", "dl", "l"
+> , "cm", "dm", "m"
+> ] ++ timeUnits
+
+Usage of imperial units (inch, pound, …), non-standard
+units like “teaspoon,” “cup,” or similar is discouraged because
+the former is used by just three countries in the world right now, and
+the latter is language- and country-dependent. The implementation may
+provide the user with a conversion utility.
+
+> testLintWellKnownUnit = [
+> cmpLint "+1 in foobar >bar" [LintResult UnitNotWellKnown [0]]
+> , cmpLint "+2 teaspoons foobar >bar" [LintResult UnitNotWellKnown [0]]
+> , cmpLint "+3 cups foobar >bar" [LintResult UnitNotWellKnown [0]]
+> , cmpLint "+1 ml foobar >bar" []
+> , cmpLint "+1 cl foobar >bar" []
+> , cmpLint "+1 dl foobar >bar" []
+> , cmpLint "+1 l foobar >bar" []
+> , cmpLint "+2 _ something >bar" []
+> , cmpLint "&1 min [foo] >bar" []
+
+The unit is case-sensitive, thus
+
+> , cmpLint "+1 Mg foobar >bar" [LintResult UnitNotWellKnown [0]]
+> , cmpLint "+1 kG foobar >bar" [LintResult UnitNotWellKnown [0]]
+> , cmpLint "&1 MIN [foo] >bar" [LintResult UnitNotWellKnown [0]]
+> ]
+
+References
+++++++++++
+
+All references must be resolved. An `earlier check <#resultsused>`_ ensures
+all results and alternatives are referenced at some point.
+
+> referencesResolved nodes edges = foldl f [] nodes
+> where
+> f xs (nodeid, Reference _) | null (incomingEdges edges nodeid) =
+> LintResult UndefinedReference [nodeid]:xs
+> f xs _ = xs
+
+> testLintRefs = [
+> cmpLint "*foobar >foobar >barbaz" [LintResult CircularLoop [0, 1]]
+> , cmpLint "*foobar >foo" [LintResult UndefinedReference [0]]
+> ]
+
+Results and alternatives must not have duplicate names, so collect
+their lower-case object names into a ``Map`` and flag those which
+reference multiple nodes.
+
+> uniqueNames nodes _ = M.foldl f [] nameMap
+> where
+> f xs fnodes | length fnodes > 1 = LintResult DuplicateReferenceName fnodes:xs
+> f xs _ = xs
+> nameMap = foldl buildMap M.empty nodes
+> buildMap m (nodeid, Result qty) = M.insertWith append (getObject qty) [nodeid] m
+> buildMap m (nodeid, Alternative qty) = M.insertWith append (getObject qty) [nodeid] m
+> buildMap m _ = m
+> getObject (Quantity _ _ object) = map toLower object
+> append a b = insert (head a) b
+
+> testUniqueNames = [
+> cmpLint "+a >x +b >y *x *y >foo" []
+> , cmpLint "+a >x +b >x *x >y" [LintResult DuplicateReferenceName [1, 3]]
+> , cmpLint "+a >x +b +c |x *x >y" [LintResult DuplicateReferenceName [1, 4]]
+> , cmpLint "+a >1 _ foo +a >2 _ FOO +a >3 _ foO *Foo >y"
+> [LintResult DuplicateReferenceName [1, 3, 5]]
+> ]
+
+A result must have at least one incoming edge. This is a special case and can
+only occur at the beginning of a recipe.
+
+> resultNonempty nodes edges = foldl f [] nodes
+> where
+> f xs (nodeid, Result _) | null (incomingEdges edges nodeid) =
+> LintResult TooFewChildren [nodeid]:xs
+> f xs _ = xs
+
+> testLintResultNonempty = [
+> cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]]
+> , cmpLint "+A >bar *bar >baz" []
+> , cmpLint "+A >bar >foo *bar *foo >baz" []
+> ]
+
+Alternatives must have at least two incoming edges since a smaller amount would
+make the alternative pointless.
+
+> twoAlternatives nodes edges = foldl f [] nodes
+> where
+> f xs (nodeid, Alternative _) | length (incomingEdges edges nodeid) < 2 =
+> LintResult TooFewChildren [nodeid]:xs
+> f xs _ = xs
+
+> testLintTwoAlternatives = [
+> cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]]
+> , cmpLint "+A +B |foo *foo >bar" []
+> , cmpLint "+A &B |foo *foo >bar" []
+> ]
+
+.. _reject-loops:
+
+References cannot loop because, well, you cannot cook something and
+use an ingredient you have not made yet. It is possible to branch out
+and merge again if an ingredient is split into multiple parts
+and added to different outputs.
+
+> circularLoops nodes edges = map (LintResult CircularLoop) circles
+> where
+> allReferences = foldl referenceNodes [] nodes
+> referenceNodes xs (nodeid, Reference _) = nodeid:xs
+> referenceNodes xs _ = xs
+> circles = filter (not . null) $ map (visitIncoming [] . singleton) allReferences
+> singleton x = [x]
+> visitIncoming _ [] = []
+> visitIncoming visited next = case length (intersect visited nextNext) of
+> 0 -> visitIncoming nextVisited nextNext
+> _ -> nextVisited
+> where
+> nextVisited = visited ++ next
+> nextNext = map fst $ concat $ map (incomingNodes nodes edges) next
+
+> testLintCircularLoops = [
+> cmpLint "*y >x *x >y >foobar"
+> [LintResult CircularLoop [0, 3, 2, 1] , LintResult CircularLoop [2, 1, 0, 3]]
+> , cmpLint "*z >x *x >y *y >z *z >foobar" [
+> LintResult CircularLoop [0, 5, 4, 3, 2, 1]
+> , LintResult CircularLoop [2, 1, 0, 5, 4, 3]
+> , LintResult CircularLoop [4, 3, 2, 1, 0, 5]
+> , LintResult CircularLoop [6, 5, 4, 3, 2, 1, 0]
+> ]
+> , cmpLint "+a >foobar *1/2 _ foobar >x *1/2 _ foobar >y *x *y >final" []
+> , cmpLint "+a >foobar *1/2 _ foobar >x *x *1/2 _ foobar >final" []
+> ]
+
+Ranges
+++++++
+
+The first amount of a range ratio must be strictly smaller than the second.
+This limitation is not enforced for ranges containing strings.
+
+> rangeFromLargerThanTo nodes _ = foldl f [] nodes
+> where
+> f xs (nodeid, Ingredient q) | not $ rangeOk q =
+> LintResult RangeFromLargerThanTo [nodeid]:xs
+> f xs (nodeid, Reference q) | not $ rangeOk q =
+> LintResult RangeFromLargerThanTo [nodeid]:xs
+> f xs _ = xs
+> rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b
+> rangeOk _ = True
+
+> testRangeFromLargerThanTo = [
+> cmpLint "+2-3 l water >bar" []
+> , cmpLint "+3-2 l water >bar" [LintResult RangeFromLargerThanTo [0]]
+> , cmpLint "+2/3-1/3 l water >bar" [LintResult RangeFromLargerThanTo [0]]
+> , cmpLint "+some-many _ eggs >bar" []
+> , cmpLint "+1-\"a few\" _ eggs >bar" []
+> ]
+
+Appendix
+++++++++
+
+> data LintResult = LintResult LintStatus [NodeId] deriving (Show, Eq, Ord)
+> data LintStatus =
+> NoRootNode
+> | NonResultRootNode
+> | MoreThanOneRootNode
+> | UndefinedReference
+> | DuplicateReferenceName
+> | CircularLoop
+> | TooFewChildren
+> | TimeIsATool
+> | TimeAnnotatesAction
+> | UnitNotWellKnown
+> | InvalidNode
+> | RangeFromLargerThanTo
+> | NoMetadata
+> | UnknownMetadataKey
+> deriving (Show, Eq, Ord)
+
+Every lint test checks a single aspect of the graph.
+
+> lint nodes edges = concatMap (\f -> f nodes edges) lintTests
+
+> lintTests = [
+> rootIsResult
+> , referencesResolved
+> , uniqueNames
+> , circularLoops
+> , resultNonempty
+> , twoAlternatives
+> , timeIsATool
+> , timeAnnotatesAction
+> , wellKnownUnit
+> , lintMetadata
+> , rangeFromLargerThanTo
+> ]
+
+> withGraph doc f = f nodes edges
+> where
+> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc)
+> nodes = zip [firstNodeId..] op
+> edges = toGraph nodes ++ resolveReferences nodes
+
+> cmpLint doc expect = withGraph doc (\nodes edges -> doc ~: sort (lint nodes edges) ~?= sort expect)
+
+
+> data Metadata = MetaQty Quantity | MetaStr String deriving (Show, Eq)
+
+> cmpLintMeta doc expectLint expectMeta = withGraph doc (\nodes edges -> doc ~: [
+> sort (lint nodes edges) ~?= sort expectLint
+> , extractMetadata nodes edges ~?= expectMeta
+> ])
+> strQuantity = Quantity (Exact (AmountStr "")) ""
+
+> test = [
+> testConnectivity
+> , testMetadata
+> , testLintRefs
+> , testUniqueNames
+> , testLintCircularLoops
+> , testLintQuantity
+> , testLintWellKnownUnit
+> , testTimeAnnotatesAction
+> , testLintTwoAlternatives
+> , testLintResultNonempty
+> , testRangeFromLargerThanTo
+> ]
+