summaryrefslogtreecommitdiff
path: root/src/lib/Codec/Pesto/Lint.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/Codec/Pesto/Lint.lhs')
-rw-r--r--src/lib/Codec/Pesto/Lint.lhs212
1 files changed, 122 insertions, 90 deletions
diff --git a/src/lib/Codec/Pesto/Lint.lhs b/src/lib/Codec/Pesto/Lint.lhs
index 919eeef..3ecdfa1 100644
--- a/src/lib/Codec/Pesto/Lint.lhs
+++ b/src/lib/Codec/Pesto/Lint.lhs
@@ -12,11 +12,12 @@ Linting
> , Metadata(..)
> , LintResult(..)) where
> import Test.HUnit hiding (test, Node)
-> import Data.List (sort, isPrefixOf)
+> import Data.List (sort, isPrefixOf, insert, intersect)
> import Text.Parsec hiding (parse)
-> import Data.Char (isSpace)
+> import Data.Char (isSpace, toLower)
> import Data.Ratio ((%))
> import Data.Maybe (fromMaybe)
+> import qualified Data.Map.Strict as M
>
> import Codec.Pesto.Graph hiding (test)
> import Codec.Pesto.Parse hiding (test)
@@ -24,22 +25,21 @@ Linting
Not every graph generated in the previous section is a useful recipe. Some
instruction sequences just do not make sense. The tests in this section can
detect those. Failing any of them does not render a stream of instructions or
-graph invalid. They just does not describe a *useful* recipe. Thus
-implementations must not generate or export such documents. However they should
+graph invalid. They just do not describe a *useful* recipe. Thus
+implementations must not generate or export such documents. However, they should
accept input that fails any of the tests and warn the user about the failure.
-Additionally this section provides guidance on how to use the instructions
+Additionally, this section provides guidance on how to use the instructions
provided by the Pesto language properly.
Graph properties
++++++++++++++++
-- weakly connected, no dangling nodes/subgraphs
-- acyclic
+.. _resultsused:
-The graph must have exactly one root node (i.e. a node with incoming edges
+The graph must have exactly one root node (i.e., a node with incoming edges
only). This also requires all results and alternatives to be referenced
-somewhere. Directives are either consumed when parsing, generating a graph and
+somewhere. Directives are either consumed when parsing, generating a graph, and
linting. Otherwise they are dangling as well. Unknown instructions are always
dangling.
@@ -54,38 +54,32 @@ Empty recipes or circular references have no root node:
> testConnectivity = [
> cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []]
> , cmpLint "*foobar >foobar"
-> [LintResult NoRootNode [], LintResult NoMetadata []]
+> [LintResult NoRootNode [], LintResult CircularLoop [0, 1], LintResult NoMetadata []]
> , cmpLint "+foobar"
-> [LintResult NonResultRootNode [0], LintResult NoMetadata []]
+> [LintResult NonResultRootNode [0], LintResult NoMetadata []]
Directives and unknown instructions are dangling and thus root nodes.
> , cmpLint "invalid %invalid +foo >bar"
> [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []]
-> ]
+> ]
Metadata
++++++++
-.. _resultsused:
-
-.. class:: todo
-
-root node can be alternative too?
-
The graph’s root node must be a result. It contains yield (amount and unit) and
title (object) of the recipe.
> extractMetadata nodes edges = case walkRoot nodes edges of
-> [n@(i, Result q@(Quantity _ _ title))] ->
+> [(i, Result q@(Quantity _ _ title))] ->
> Just $ (i, ("title", MetaStr title))
> :(i, ("yield", MetaQty q))
-> :foldl f [] (incomingNodes nodes edges n)
-> _ -> Nothing
+> :foldl f [] (incomingNodes nodes edges i)
+> _ -> Nothing
> where
Additional key-value metadata for the whole recipe can be added as annotations
-to the root node. If multiple annotations with the same key exist the key maps
+to the root node. If multiple annotations with the same key exist, the key maps
to a list of those values. Annotations that are unparseable key-value pairs are
added as recipe description instead.
@@ -109,8 +103,8 @@ colon char. A value may be empty.
> checkKey xs (_, (k, _)) | isKeyKnown k = xs
> checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs
-Valid metadata keys are listed below. Additionally applications may add keys by
-prefixing them with “x-myapp-”, thus an application called “basil” adding
+Valid metadata keys are listed below. Additionally, applications may add keys by
+prefixing them with “x-myapp-”. Thus an application called “basil” adding
“some-key” would use the full key “x-basil-some-key”.
> isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k
@@ -119,19 +113,19 @@ The following metadata keys are permitted:
> knownKeys = [
-Both, title and description, are implicit.
+The title, description and yield are implicit.
-> "title"
+> "title"
> , "description"
+> , "yield"
The recipe’s language, as 2 character code (`ISO 639-1
<http://www.loc.gov/standards/iso639-2/php/English_list.php>`_).
> , "language"
-Yield and time both must be a quantity.
+Time both must be a time-unit quantity.
-> , "yield"
> , "time"
An image can be a relative file reference or URI
@@ -140,18 +134,14 @@ An image can be a relative file reference or URI
> , "author"
> ]
-.. class:: todo
-
-Check the metadata’s value format. I.e. yield/time must be quantity
-
For instance a german language recipe for one person would look like this:
> testMetadata = [
-> cmpLintMeta "+foo >1 ml foobar (language: de) (x-app-key: value)"
-> []
-> (Just [(1, ("title", MetaStr "foobar"))
-> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "ml" "foobar")))
-> , (2, ("language", MetaStr "de"))
+> cmpLintMeta "+foo >1 _ foobar (language: de) (x-app-key: value)"
+> []
+> (Just [(1, ("title", MetaStr "foobar"))
+> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "" "foobar")))
+> , (2, ("language", MetaStr "de"))
> , (3, ("x-app-key", MetaStr "value"))])
Unparseable annotations or unknown keys are linting errors:
@@ -159,7 +149,7 @@ Unparseable annotations or unknown keys are linting errors:
> , cmpLintMeta "+foo >foobar (unknown-key: value)"
> [LintResult UnknownMetadataKey [2]]
> (Just [(1, ("title", MetaStr "foobar"))
-> , (1, ("yield", MetaQty (strQuantity "foobar")))
+> , (1, ("yield", MetaQty (strQuantity "foobar")))
> , (2, ("unknown-key", MetaStr "value"))])
Root node annotations not containing a parseable key-value pair are assigned
@@ -179,7 +169,7 @@ the key “description”.
Time is a tool
++++++++++++++
-By definition time is a tool and not an ingredient.
+By definition, time is a tool and not an ingredient.
> timeUnits = ["s", "min", "h", "d"]
>
@@ -189,7 +179,7 @@ By definition time is a tool and not an ingredient.
> timeIsATool nodes _ = foldl f [] nodes
> where
> f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs
-> f xs _ = xs
+> f xs _ = xs
> testLintQuantity = [
> cmpLint "+10 min >foo" [LintResult TimeIsATool [0]]
@@ -199,19 +189,19 @@ By definition time is a tool and not an ingredient.
> , cmpLint "&10 min [bar] >foo" []
> ]
-Only actions can be annotated with a time. It can be used to indicate how long
-a certain action is *expected* to take (i.e. peeling potatoes takes two
-minutes) or how long the action is supposed to be executed (i.e. cook five
+Only actions can be annotated like this. It can be used to indicate how long
+a particular action is *expected* to take (i.e., peeling potatoes takes two
+minutes) or how long the action is supposed to be executed (i.e. cook for five
minutes). More time annotations improve the software’s scheduling capabilities.
> timeAnnotatesAction nodes edges = foldl f [] nodes
> where
-> f xs n@(nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges n) = LintResult TimeAnnotatesAction [nodeid]:xs
+> f xs (nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges nodeid) = LintResult TimeAnnotatesAction [nodeid]:xs
> f xs _ = xs
> toNodelist = (!!) nodes . snd
> allActions = all (isAction . snd . toNodelist)
-For example “cook 10 minutes” can be expressed with
+For example, “cook 10 minutes” can be expressed with:
> testTimeAnnotatesAction = [
> cmpLint "&10 min [cook] >soup" []
@@ -226,10 +216,10 @@ For example “cook 10 minutes” can be expressed with
Well-known units
++++++++++++++++
-Units can be an arbitrary strings, but implementations should recognize the
-common metric units g (gram), l (litre) and m (metre). One of these prefixes
-may be used with each of them: m (milli-), c (centi-), d (dezi-) and k (kilo-).
-Additionally time in s (second), min (minute), h (hour), d (day) should be
+Units can be arbitrary strings, but implementations should recognize the
+standard metric units g (gram), l (liter), and m (meter). One of these prefixes
+may be used with each of them: m (milli-), c (centi-), d (dezi-), and k (kilo-).
+Additionally, time in s (second), min (minute), h (hour), and d (day) should be
accepted.
> wellKnownUnit nodes _ = foldl f [] nodes
@@ -242,7 +232,7 @@ accepted.
> extractQty _ = Nothing
> f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) =
> LintResult UnitNotWellKnown [nodeid]:xs
-> f xs _ = xs
+> f xs _ = xs
> known (Quantity _ unit _) = unit `elem` knownUnits
> knownUnits = [
> ""
@@ -251,16 +241,11 @@ accepted.
> , "cm", "dm", "m"
> ] ++ timeUnits
-Usage of imperial units (inch, pound, …) as well as non-XXX units like
-“teaspoon”, “cup”, … is discouraged because the former is used by just three
-countries in the world right now and the latter is language- and
-country-dependent. The implementation may provide the user with a conversion
-utility.
-
-.. class:: todo
-
-- example: 1 oz ~= 28.349523125 g, can only be approximated by rational number, for instance 29767/1050 g
-- 15 oz would are :math:`\frac{29767}{70} \mathrm{g} = 425+\frac{17}{70} \mathrm{g}`, since nobody sells 17/70 g the implementation would round down to ~425 g (although <1g is not really enough to justify adding approx)
+Usage of imperial units (inch, pound, …), non-standard
+units like “teaspoon,” “cup,” or similar is discouraged because
+the former is used by just three countries in the world right now, and
+the latter is language- and country-dependent. The implementation may
+provide the user with a conversion utility.
> testLintWellKnownUnit = [
> cmpLint "+1 in foobar >bar" [LintResult UnitNotWellKnown [0]]
@@ -275,11 +260,6 @@ utility.
The unit is case-sensitive, thus
-.. class:: todo
-
-Should we allow case-insensitive units? References are case-insensitive as
-well…
-
> , cmpLint "+1 Mg foobar >bar" [LintResult UnitNotWellKnown [0]]
> , cmpLint "+1 kG foobar >bar" [LintResult UnitNotWellKnown [0]]
> , cmpLint "&1 MIN [foo] >bar" [LintResult UnitNotWellKnown [0]]
@@ -288,28 +268,51 @@ well…
References
++++++++++
-All references must be resolved. An `earlier check <#resultsused>`_ makes sure
+All references must be resolved. An `earlier check <#resultsused>`_ ensures
all results and alternatives are referenced at some point.
> referencesResolved nodes edges = foldl f [] nodes
> where
-> f xs n@(nodeid, Reference _) | null (incomingEdges edges n) =
+> f xs (nodeid, Reference _) | null (incomingEdges edges nodeid) =
> LintResult UndefinedReference [nodeid]:xs
-> f xs _ = xs
+> f xs _ = xs
> testLintRefs = [
-> cmpLint "*foobar >foobar >barbaz" []
+> cmpLint "*foobar >foobar >barbaz" [LintResult CircularLoop [0, 1]]
> , cmpLint "*foobar >foo" [LintResult UndefinedReference [0]]
> ]
+Results and alternatives must not have duplicate names, so collect
+their lower-case object names into a ``Map`` and flag those which
+reference multiple nodes.
+
+> uniqueNames nodes _ = M.foldl f [] nameMap
+> where
+> f xs fnodes | length fnodes > 1 = LintResult DuplicateReferenceName fnodes:xs
+> f xs _ = xs
+> nameMap = foldl buildMap M.empty nodes
+> buildMap m (nodeid, Result qty) = M.insertWith append (getObject qty) [nodeid] m
+> buildMap m (nodeid, Alternative qty) = M.insertWith append (getObject qty) [nodeid] m
+> buildMap m _ = m
+> getObject (Quantity _ _ object) = map toLower object
+> append a b = insert (head a) b
+
+> testUniqueNames = [
+> cmpLint "+a >x +b >y *x *y >foo" []
+> , cmpLint "+a >x +b >x *x >y" [LintResult DuplicateReferenceName [1, 3]]
+> , cmpLint "+a >x +b +c |x *x >y" [LintResult DuplicateReferenceName [1, 4]]
+> , cmpLint "+a >1 _ foo +a >2 _ FOO +a >3 _ foO *Foo >y"
+> [LintResult DuplicateReferenceName [1, 3, 5]]
+> ]
+
A result must have at least one incoming edge. This is a special case and can
only occur at the beginning of a recipe.
> resultNonempty nodes edges = foldl f [] nodes
> where
-> f xs n@(nodeid, Result _) | null (incomingEdges edges n) =
+> f xs (nodeid, Result _) | null (incomingEdges edges nodeid) =
> LintResult TooFewChildren [nodeid]:xs
-> f xs _ = xs
+> f xs _ = xs
> testLintResultNonempty = [
> cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]]
@@ -322,27 +325,50 @@ make the alternative pointless.
> twoAlternatives nodes edges = foldl f [] nodes
> where
-> f xs n@(nodeid, Alternative _) | length (incomingEdges edges n) < 2 =
+> f xs (nodeid, Alternative _) | length (incomingEdges edges nodeid) < 2 =
> LintResult TooFewChildren [nodeid]:xs
-> f xs _ = xs
+> f xs _ = xs
> testLintTwoAlternatives = [
> cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]]
> , cmpLint "+A +B |foo *foo >bar" []
-
-.. class:: todo
-
-should we allow this? it does not make sense imo
-
> , cmpLint "+A &B |foo *foo >bar" []
> ]
.. _reject-loops:
-.. class:: todo
+References cannot loop because, well, you cannot cook something and
+use an ingredient you have not made yet. It is possible to branch out
+and merge again if an ingredient is split into multiple parts
+and added to different outputs.
-- reject loops
-- reject multiple results/alternatives with the same name
+> circularLoops nodes edges = map (LintResult CircularLoop) circles
+> where
+> allReferences = foldl referenceNodes [] nodes
+> referenceNodes xs (nodeid, Reference _) = nodeid:xs
+> referenceNodes xs _ = xs
+> circles = filter (not . null) $ map (visitIncoming [] . singleton) allReferences
+> singleton x = [x]
+> visitIncoming _ [] = []
+> visitIncoming visited next = case length (intersect visited nextNext) of
+> 0 -> visitIncoming nextVisited nextNext
+> _ -> nextVisited
+> where
+> nextVisited = visited ++ next
+> nextNext = map fst $ concat $ map (incomingNodes nodes edges) next
+
+> testLintCircularLoops = [
+> cmpLint "*y >x *x >y >foobar"
+> [LintResult CircularLoop [0, 3, 2, 1] , LintResult CircularLoop [2, 1, 0, 3]]
+> , cmpLint "*z >x *x >y *y >z *z >foobar" [
+> LintResult CircularLoop [0, 5, 4, 3, 2, 1]
+> , LintResult CircularLoop [2, 1, 0, 5, 4, 3]
+> , LintResult CircularLoop [4, 3, 2, 1, 0, 5]
+> , LintResult CircularLoop [6, 5, 4, 3, 2, 1, 0]
+> ]
+> , cmpLint "+a >foobar *1/2 _ foobar >x *1/2 _ foobar >y *x *y >final" []
+> , cmpLint "+a >foobar *1/2 _ foobar >x *x *1/2 _ foobar >final" []
+> ]
Ranges
++++++
@@ -352,11 +378,11 @@ This limitation is not enforced for ranges containing strings.
> rangeFromLargerThanTo nodes _ = foldl f [] nodes
> where
-> f xs (nodeid, Ingredient q) | not $ rangeOk q =
+> f xs (nodeid, Ingredient q) | not $ rangeOk q =
> LintResult RangeFromLargerThanTo [nodeid]:xs
-> f xs (nodeid, Reference q) | not $ rangeOk q =
+> f xs (nodeid, Reference q) | not $ rangeOk q =
> LintResult RangeFromLargerThanTo [nodeid]:xs
-> f xs _ = xs
+> f xs _ = xs
> rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b
> rangeOk _ = True
@@ -377,10 +403,12 @@ Appendix
> | NonResultRootNode
> | MoreThanOneRootNode
> | UndefinedReference
+> | DuplicateReferenceName
+> | CircularLoop
> | TooFewChildren
-> | TimeIsATool
+> | TimeIsATool
> | TimeAnnotatesAction
-> | UnitNotWellKnown
+> | UnitNotWellKnown
> | InvalidNode
> | RangeFromLargerThanTo
> | NoMetadata
@@ -393,9 +421,11 @@ Every lint test checks a single aspect of the graph.
> lintTests = [
> rootIsResult
-> , referencesResolved
-> , resultNonempty
-> , twoAlternatives
+> , referencesResolved
+> , uniqueNames
+> , circularLoops
+> , resultNonempty
+> , twoAlternatives
> , timeIsATool
> , timeAnnotatesAction
> , wellKnownUnit
@@ -404,7 +434,7 @@ Every lint test checks a single aspect of the graph.
> ]
> withGraph doc f = f nodes edges
-> where
+> where
> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc)
> nodes = zip [firstNodeId..] op
> edges = toGraph nodes ++ resolveReferences nodes
@@ -424,6 +454,8 @@ Every lint test checks a single aspect of the graph.
> testConnectivity
> , testMetadata
> , testLintRefs
+> , testUniqueNames
+> , testLintCircularLoops
> , testLintQuantity
> , testLintWellKnownUnit
> , testTimeAnnotatesAction