diff options
Diffstat (limited to 'src/lib/Codec/Pesto/Lint.lhs')
-rw-r--r-- | src/lib/Codec/Pesto/Lint.lhs | 212 |
1 files changed, 122 insertions, 90 deletions
diff --git a/src/lib/Codec/Pesto/Lint.lhs b/src/lib/Codec/Pesto/Lint.lhs index 919eeef..3ecdfa1 100644 --- a/src/lib/Codec/Pesto/Lint.lhs +++ b/src/lib/Codec/Pesto/Lint.lhs @@ -12,11 +12,12 @@ Linting > , Metadata(..) > , LintResult(..)) where > import Test.HUnit hiding (test, Node) -> import Data.List (sort, isPrefixOf) +> import Data.List (sort, isPrefixOf, insert, intersect) > import Text.Parsec hiding (parse) -> import Data.Char (isSpace) +> import Data.Char (isSpace, toLower) > import Data.Ratio ((%)) > import Data.Maybe (fromMaybe) +> import qualified Data.Map.Strict as M > > import Codec.Pesto.Graph hiding (test) > import Codec.Pesto.Parse hiding (test) @@ -24,22 +25,21 @@ Linting Not every graph generated in the previous section is a useful recipe. Some instruction sequences just do not make sense. The tests in this section can detect those. Failing any of them does not render a stream of instructions or -graph invalid. They just does not describe a *useful* recipe. Thus -implementations must not generate or export such documents. However they should +graph invalid. They just do not describe a *useful* recipe. Thus +implementations must not generate or export such documents. However, they should accept input that fails any of the tests and warn the user about the failure. -Additionally this section provides guidance on how to use the instructions +Additionally, this section provides guidance on how to use the instructions provided by the Pesto language properly. Graph properties ++++++++++++++++ -- weakly connected, no dangling nodes/subgraphs -- acyclic +.. _resultsused: -The graph must have exactly one root node (i.e. a node with incoming edges +The graph must have exactly one root node (i.e., a node with incoming edges only). This also requires all results and alternatives to be referenced -somewhere. Directives are either consumed when parsing, generating a graph and +somewhere. Directives are either consumed when parsing, generating a graph, and linting. Otherwise they are dangling as well. Unknown instructions are always dangling. @@ -54,38 +54,32 @@ Empty recipes or circular references have no root node: > testConnectivity = [ > cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []] > , cmpLint "*foobar >foobar" -> [LintResult NoRootNode [], LintResult NoMetadata []] +> [LintResult NoRootNode [], LintResult CircularLoop [0, 1], LintResult NoMetadata []] > , cmpLint "+foobar" -> [LintResult NonResultRootNode [0], LintResult NoMetadata []] +> [LintResult NonResultRootNode [0], LintResult NoMetadata []] Directives and unknown instructions are dangling and thus root nodes. > , cmpLint "invalid %invalid +foo >bar" > [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []] -> ] +> ] Metadata ++++++++ -.. _resultsused: - -.. class:: todo - -root node can be alternative too? - The graph’s root node must be a result. It contains yield (amount and unit) and title (object) of the recipe. > extractMetadata nodes edges = case walkRoot nodes edges of -> [n@(i, Result q@(Quantity _ _ title))] -> +> [(i, Result q@(Quantity _ _ title))] -> > Just $ (i, ("title", MetaStr title)) > :(i, ("yield", MetaQty q)) -> :foldl f [] (incomingNodes nodes edges n) -> _ -> Nothing +> :foldl f [] (incomingNodes nodes edges i) +> _ -> Nothing > where Additional key-value metadata for the whole recipe can be added as annotations -to the root node. If multiple annotations with the same key exist the key maps +to the root node. If multiple annotations with the same key exist, the key maps to a list of those values. Annotations that are unparseable key-value pairs are added as recipe description instead. @@ -109,8 +103,8 @@ colon char. A value may be empty. > checkKey xs (_, (k, _)) | isKeyKnown k = xs > checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs -Valid metadata keys are listed below. Additionally applications may add keys by -prefixing them with “x-myapp-”, thus an application called “basil” adding +Valid metadata keys are listed below. Additionally, applications may add keys by +prefixing them with “x-myapp-”. Thus an application called “basil” adding “some-key” would use the full key “x-basil-some-key”. > isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k @@ -119,19 +113,19 @@ The following metadata keys are permitted: > knownKeys = [ -Both, title and description, are implicit. +The title, description and yield are implicit. -> "title" +> "title" > , "description" +> , "yield" The recipe’s language, as 2 character code (`ISO 639-1 <http://www.loc.gov/standards/iso639-2/php/English_list.php>`_). > , "language" -Yield and time both must be a quantity. +Time both must be a time-unit quantity. -> , "yield" > , "time" An image can be a relative file reference or URI @@ -140,18 +134,14 @@ An image can be a relative file reference or URI > , "author" > ] -.. class:: todo - -Check the metadata’s value format. I.e. yield/time must be quantity - For instance a german language recipe for one person would look like this: > testMetadata = [ -> cmpLintMeta "+foo >1 ml foobar (language: de) (x-app-key: value)" -> [] -> (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "ml" "foobar"))) -> , (2, ("language", MetaStr "de")) +> cmpLintMeta "+foo >1 _ foobar (language: de) (x-app-key: value)" +> [] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "" "foobar"))) +> , (2, ("language", MetaStr "de")) > , (3, ("x-app-key", MetaStr "value"))]) Unparseable annotations or unknown keys are linting errors: @@ -159,7 +149,7 @@ Unparseable annotations or unknown keys are linting errors: > , cmpLintMeta "+foo >foobar (unknown-key: value)" > [LintResult UnknownMetadataKey [2]] > (Just [(1, ("title", MetaStr "foobar")) -> , (1, ("yield", MetaQty (strQuantity "foobar"))) +> , (1, ("yield", MetaQty (strQuantity "foobar"))) > , (2, ("unknown-key", MetaStr "value"))]) Root node annotations not containing a parseable key-value pair are assigned @@ -179,7 +169,7 @@ the key “description”. Time is a tool ++++++++++++++ -By definition time is a tool and not an ingredient. +By definition, time is a tool and not an ingredient. > timeUnits = ["s", "min", "h", "d"] > @@ -189,7 +179,7 @@ By definition time is a tool and not an ingredient. > timeIsATool nodes _ = foldl f [] nodes > where > f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintQuantity = [ > cmpLint "+10 min >foo" [LintResult TimeIsATool [0]] @@ -199,19 +189,19 @@ By definition time is a tool and not an ingredient. > , cmpLint "&10 min [bar] >foo" [] > ] -Only actions can be annotated with a time. It can be used to indicate how long -a certain action is *expected* to take (i.e. peeling potatoes takes two -minutes) or how long the action is supposed to be executed (i.e. cook five +Only actions can be annotated like this. It can be used to indicate how long +a particular action is *expected* to take (i.e., peeling potatoes takes two +minutes) or how long the action is supposed to be executed (i.e. cook for five minutes). More time annotations improve the software’s scheduling capabilities. > timeAnnotatesAction nodes edges = foldl f [] nodes > where -> f xs n@(nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges n) = LintResult TimeAnnotatesAction [nodeid]:xs +> f xs (nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges nodeid) = LintResult TimeAnnotatesAction [nodeid]:xs > f xs _ = xs > toNodelist = (!!) nodes . snd > allActions = all (isAction . snd . toNodelist) -For example “cook 10 minutes” can be expressed with +For example, “cook 10 minutes” can be expressed with: > testTimeAnnotatesAction = [ > cmpLint "&10 min [cook] >soup" [] @@ -226,10 +216,10 @@ For example “cook 10 minutes” can be expressed with Well-known units ++++++++++++++++ -Units can be an arbitrary strings, but implementations should recognize the -common metric units g (gram), l (litre) and m (metre). One of these prefixes -may be used with each of them: m (milli-), c (centi-), d (dezi-) and k (kilo-). -Additionally time in s (second), min (minute), h (hour), d (day) should be +Units can be arbitrary strings, but implementations should recognize the +standard metric units g (gram), l (liter), and m (meter). One of these prefixes +may be used with each of them: m (milli-), c (centi-), d (dezi-), and k (kilo-). +Additionally, time in s (second), min (minute), h (hour), and d (day) should be accepted. > wellKnownUnit nodes _ = foldl f [] nodes @@ -242,7 +232,7 @@ accepted. > extractQty _ = Nothing > f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) = > LintResult UnitNotWellKnown [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > known (Quantity _ unit _) = unit `elem` knownUnits > knownUnits = [ > "" @@ -251,16 +241,11 @@ accepted. > , "cm", "dm", "m" > ] ++ timeUnits -Usage of imperial units (inch, pound, …) as well as non-XXX units like -“teaspoon”, “cup”, … is discouraged because the former is used by just three -countries in the world right now and the latter is language- and -country-dependent. The implementation may provide the user with a conversion -utility. - -.. class:: todo - -- example: 1 oz ~= 28.349523125 g, can only be approximated by rational number, for instance 29767/1050 g -- 15 oz would are :math:`\frac{29767}{70} \mathrm{g} = 425+\frac{17}{70} \mathrm{g}`, since nobody sells 17/70 g the implementation would round down to ~425 g (although <1g is not really enough to justify adding approx) +Usage of imperial units (inch, pound, …), non-standard +units like “teaspoon,” “cup,” or similar is discouraged because +the former is used by just three countries in the world right now, and +the latter is language- and country-dependent. The implementation may +provide the user with a conversion utility. > testLintWellKnownUnit = [ > cmpLint "+1 in foobar >bar" [LintResult UnitNotWellKnown [0]] @@ -275,11 +260,6 @@ utility. The unit is case-sensitive, thus -.. class:: todo - -Should we allow case-insensitive units? References are case-insensitive as -well… - > , cmpLint "+1 Mg foobar >bar" [LintResult UnitNotWellKnown [0]] > , cmpLint "+1 kG foobar >bar" [LintResult UnitNotWellKnown [0]] > , cmpLint "&1 MIN [foo] >bar" [LintResult UnitNotWellKnown [0]] @@ -288,28 +268,51 @@ well… References ++++++++++ -All references must be resolved. An `earlier check <#resultsused>`_ makes sure +All references must be resolved. An `earlier check <#resultsused>`_ ensures all results and alternatives are referenced at some point. > referencesResolved nodes edges = foldl f [] nodes > where -> f xs n@(nodeid, Reference _) | null (incomingEdges edges n) = +> f xs (nodeid, Reference _) | null (incomingEdges edges nodeid) = > LintResult UndefinedReference [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintRefs = [ -> cmpLint "*foobar >foobar >barbaz" [] +> cmpLint "*foobar >foobar >barbaz" [LintResult CircularLoop [0, 1]] > , cmpLint "*foobar >foo" [LintResult UndefinedReference [0]] > ] +Results and alternatives must not have duplicate names, so collect +their lower-case object names into a ``Map`` and flag those which +reference multiple nodes. + +> uniqueNames nodes _ = M.foldl f [] nameMap +> where +> f xs fnodes | length fnodes > 1 = LintResult DuplicateReferenceName fnodes:xs +> f xs _ = xs +> nameMap = foldl buildMap M.empty nodes +> buildMap m (nodeid, Result qty) = M.insertWith append (getObject qty) [nodeid] m +> buildMap m (nodeid, Alternative qty) = M.insertWith append (getObject qty) [nodeid] m +> buildMap m _ = m +> getObject (Quantity _ _ object) = map toLower object +> append a b = insert (head a) b + +> testUniqueNames = [ +> cmpLint "+a >x +b >y *x *y >foo" [] +> , cmpLint "+a >x +b >x *x >y" [LintResult DuplicateReferenceName [1, 3]] +> , cmpLint "+a >x +b +c |x *x >y" [LintResult DuplicateReferenceName [1, 4]] +> , cmpLint "+a >1 _ foo +a >2 _ FOO +a >3 _ foO *Foo >y" +> [LintResult DuplicateReferenceName [1, 3, 5]] +> ] + A result must have at least one incoming edge. This is a special case and can only occur at the beginning of a recipe. > resultNonempty nodes edges = foldl f [] nodes > where -> f xs n@(nodeid, Result _) | null (incomingEdges edges n) = +> f xs (nodeid, Result _) | null (incomingEdges edges nodeid) = > LintResult TooFewChildren [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintResultNonempty = [ > cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]] @@ -322,27 +325,50 @@ make the alternative pointless. > twoAlternatives nodes edges = foldl f [] nodes > where -> f xs n@(nodeid, Alternative _) | length (incomingEdges edges n) < 2 = +> f xs (nodeid, Alternative _) | length (incomingEdges edges nodeid) < 2 = > LintResult TooFewChildren [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > testLintTwoAlternatives = [ > cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]] > , cmpLint "+A +B |foo *foo >bar" [] - -.. class:: todo - -should we allow this? it does not make sense imo - > , cmpLint "+A &B |foo *foo >bar" [] > ] .. _reject-loops: -.. class:: todo +References cannot loop because, well, you cannot cook something and +use an ingredient you have not made yet. It is possible to branch out +and merge again if an ingredient is split into multiple parts +and added to different outputs. -- reject loops -- reject multiple results/alternatives with the same name +> circularLoops nodes edges = map (LintResult CircularLoop) circles +> where +> allReferences = foldl referenceNodes [] nodes +> referenceNodes xs (nodeid, Reference _) = nodeid:xs +> referenceNodes xs _ = xs +> circles = filter (not . null) $ map (visitIncoming [] . singleton) allReferences +> singleton x = [x] +> visitIncoming _ [] = [] +> visitIncoming visited next = case length (intersect visited nextNext) of +> 0 -> visitIncoming nextVisited nextNext +> _ -> nextVisited +> where +> nextVisited = visited ++ next +> nextNext = map fst $ concat $ map (incomingNodes nodes edges) next + +> testLintCircularLoops = [ +> cmpLint "*y >x *x >y >foobar" +> [LintResult CircularLoop [0, 3, 2, 1] , LintResult CircularLoop [2, 1, 0, 3]] +> , cmpLint "*z >x *x >y *y >z *z >foobar" [ +> LintResult CircularLoop [0, 5, 4, 3, 2, 1] +> , LintResult CircularLoop [2, 1, 0, 5, 4, 3] +> , LintResult CircularLoop [4, 3, 2, 1, 0, 5] +> , LintResult CircularLoop [6, 5, 4, 3, 2, 1, 0] +> ] +> , cmpLint "+a >foobar *1/2 _ foobar >x *1/2 _ foobar >y *x *y >final" [] +> , cmpLint "+a >foobar *1/2 _ foobar >x *x *1/2 _ foobar >final" [] +> ] Ranges ++++++ @@ -352,11 +378,11 @@ This limitation is not enforced for ranges containing strings. > rangeFromLargerThanTo nodes _ = foldl f [] nodes > where -> f xs (nodeid, Ingredient q) | not $ rangeOk q = +> f xs (nodeid, Ingredient q) | not $ rangeOk q = > LintResult RangeFromLargerThanTo [nodeid]:xs -> f xs (nodeid, Reference q) | not $ rangeOk q = +> f xs (nodeid, Reference q) | not $ rangeOk q = > LintResult RangeFromLargerThanTo [nodeid]:xs -> f xs _ = xs +> f xs _ = xs > rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b > rangeOk _ = True @@ -377,10 +403,12 @@ Appendix > | NonResultRootNode > | MoreThanOneRootNode > | UndefinedReference +> | DuplicateReferenceName +> | CircularLoop > | TooFewChildren -> | TimeIsATool +> | TimeIsATool > | TimeAnnotatesAction -> | UnitNotWellKnown +> | UnitNotWellKnown > | InvalidNode > | RangeFromLargerThanTo > | NoMetadata @@ -393,9 +421,11 @@ Every lint test checks a single aspect of the graph. > lintTests = [ > rootIsResult -> , referencesResolved -> , resultNonempty -> , twoAlternatives +> , referencesResolved +> , uniqueNames +> , circularLoops +> , resultNonempty +> , twoAlternatives > , timeIsATool > , timeAnnotatesAction > , wellKnownUnit @@ -404,7 +434,7 @@ Every lint test checks a single aspect of the graph. > ] > withGraph doc f = f nodes edges -> where +> where > (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) > nodes = zip [firstNodeId..] op > edges = toGraph nodes ++ resolveReferences nodes @@ -424,6 +454,8 @@ Every lint test checks a single aspect of the graph. > testConnectivity > , testMetadata > , testLintRefs +> , testUniqueNames +> , testLintCircularLoops > , testLintQuantity > , testLintWellKnownUnit > , testTimeAnnotatesAction |