diff options
Diffstat (limited to 'src/lib/Codec/Pesto/Lint.lhs')
-rw-r--r-- | src/lib/Codec/Pesto/Lint.lhs | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/src/lib/Codec/Pesto/Lint.lhs b/src/lib/Codec/Pesto/Lint.lhs new file mode 100644 index 0000000..3ecdfa1 --- /dev/null +++ b/src/lib/Codec/Pesto/Lint.lhs @@ -0,0 +1,466 @@ +.. _linting: + +Linting +------- + +.. class:: nodoc + +> module Codec.Pesto.Lint (lint +> , test +> , parseMetadata +> , extractMetadata +> , Metadata(..) +> , LintResult(..)) where +> import Test.HUnit hiding (test, Node) +> import Data.List (sort, isPrefixOf, insert, intersect) +> import Text.Parsec hiding (parse) +> import Data.Char (isSpace, toLower) +> import Data.Ratio ((%)) +> import Data.Maybe (fromMaybe) +> import qualified Data.Map.Strict as M +> +> import Codec.Pesto.Graph hiding (test) +> import Codec.Pesto.Parse hiding (test) + +Not every graph generated in the previous section is a useful recipe. Some +instruction sequences just do not make sense. The tests in this section can +detect those. Failing any of them does not render a stream of instructions or +graph invalid. They just do not describe a *useful* recipe. Thus +implementations must not generate or export such documents. However, they should +accept input that fails any of the tests and warn the user about the failure. + +Additionally, this section provides guidance on how to use the instructions +provided by the Pesto language properly. + +Graph properties +++++++++++++++++ + +.. _resultsused: + +The graph must have exactly one root node (i.e., a node with incoming edges +only). This also requires all results and alternatives to be referenced +somewhere. Directives are either consumed when parsing, generating a graph, and +linting. Otherwise they are dangling as well. Unknown instructions are always +dangling. + +> rootIsResult nodes edges = case walkRoot nodes edges of +> [] -> [LintResult NoRootNode []] +> (_, Result _):[] -> [] +> (i, _):[] -> [LintResult NonResultRootNode [i]] +> xs -> [LintResult MoreThanOneRootNode (map fst xs)] + +Empty recipes or circular references have no root node: + +> testConnectivity = [ +> cmpLint "" [LintResult NoRootNode [], LintResult NoMetadata []] +> , cmpLint "*foobar >foobar" +> [LintResult NoRootNode [], LintResult CircularLoop [0, 1], LintResult NoMetadata []] +> , cmpLint "+foobar" +> [LintResult NonResultRootNode [0], LintResult NoMetadata []] + +Directives and unknown instructions are dangling and thus root nodes. + +> , cmpLint "invalid %invalid +foo >bar" +> [LintResult MoreThanOneRootNode [0,1,3], LintResult NoMetadata []] +> ] + +Metadata +++++++++ + +The graph’s root node must be a result. It contains yield (amount and unit) and +title (object) of the recipe. + +> extractMetadata nodes edges = case walkRoot nodes edges of +> [(i, Result q@(Quantity _ _ title))] -> +> Just $ (i, ("title", MetaStr title)) +> :(i, ("yield", MetaQty q)) +> :foldl f [] (incomingNodes nodes edges i) +> _ -> Nothing +> where + +Additional key-value metadata for the whole recipe can be added as annotations +to the root node. If multiple annotations with the same key exist, the key maps +to a list of those values. Annotations that are unparseable key-value pairs are +added as recipe description instead. + +> f xs (i, Annotation s) = case parseMetadata s of +> Left _ -> (i, ("description", MetaStr s)):xs +> Right (k, v) -> (i, (k, MetaStr v)):xs +> f xs _ = xs + +Key and value are separated by a colon. Keys must not contain whitespace or the +colon char. A value may be empty. + +> parseMetadata = runParser metadata () "" +> metadata = let keychars = satisfy (\x -> not (isSpace x) && x /= ':') in (,) +> <$> many1 keychars +> <*> (char ':' *> spaces *> many anyChar) + +> lintMetadata nodes edges = case extractMetadata nodes edges of +> Just result -> foldl checkKey [] result +> Nothing -> [LintResult NoMetadata []] +> where +> checkKey xs (_, (k, _)) | isKeyKnown k = xs +> checkKey xs (i, _) = LintResult UnknownMetadataKey [i]:xs + +Valid metadata keys are listed below. Additionally, applications may add keys by +prefixing them with “x-myapp-”. Thus an application called “basil” adding +“some-key” would use the full key “x-basil-some-key”. + +> isKeyKnown k = k `elem` knownKeys || "x-" `isPrefixOf` k + +The following metadata keys are permitted: + +> knownKeys = [ + +The title, description and yield are implicit. + +> "title" +> , "description" +> , "yield" + +The recipe’s language, as 2 character code (`ISO 639-1 +<http://www.loc.gov/standards/iso639-2/php/English_list.php>`_). + +> , "language" + +Time both must be a time-unit quantity. + +> , "time" + +An image can be a relative file reference or URI + +> , "image" +> , "author" +> ] + +For instance a german language recipe for one person would look like this: + +> testMetadata = [ +> cmpLintMeta "+foo >1 _ foobar (language: de) (x-app-key: value)" +> [] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (Quantity (Exact (AmountRatio (1%1))) "" "foobar"))) +> , (2, ("language", MetaStr "de")) +> , (3, ("x-app-key", MetaStr "value"))]) + +Unparseable annotations or unknown keys are linting errors: + +> , cmpLintMeta "+foo >foobar (unknown-key: value)" +> [LintResult UnknownMetadataKey [2]] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (strQuantity "foobar"))) +> , (2, ("unknown-key", MetaStr "value"))]) + +Root node annotations not containing a parseable key-value pair are assigned +the key “description”. + +> , cmpLintMeta "+foo >foobar ( some description ) (another one: with colon) (another: valid key-value)" +> [LintResult UnknownMetadataKey [4]] +> (Just [(1, ("title", MetaStr "foobar")) +> , (1, ("yield", MetaQty (strQuantity "foobar"))) +> , (2, ("description", MetaStr " some description ")) +> , (3, ("description", MetaStr "another one: with colon")) +> , (4, ("another", MetaStr "valid key-value"))]) +> ] + +.. _time-is-a-tool: + +Time is a tool +++++++++++++++ + +By definition, time is a tool and not an ingredient. + +> timeUnits = ["s", "min", "h", "d"] +> +> isTime (Quantity _ unit "") | unit `elem` timeUnits = True +> isTime _ = False + +> timeIsATool nodes _ = foldl f [] nodes +> where +> f xs (nodeid, Ingredient q) | isTime q = LintResult TimeIsATool [nodeid]:xs +> f xs _ = xs + +> testLintQuantity = [ +> cmpLint "+10 min >foo" [LintResult TimeIsATool [0]] +> , cmpLint "+10-12 h >foo" [LintResult TimeIsATool [0]] +> , cmpLint "+90/120 s >foo" [LintResult TimeIsATool [0]] +> , cmpLint "+~12 s >foo" [LintResult TimeIsATool [0]] +> , cmpLint "&10 min [bar] >foo" [] +> ] + +Only actions can be annotated like this. It can be used to indicate how long +a particular action is *expected* to take (i.e., peeling potatoes takes two +minutes) or how long the action is supposed to be executed (i.e. cook for five +minutes). More time annotations improve the software’s scheduling capabilities. + +> timeAnnotatesAction nodes edges = foldl f [] nodes +> where +> f xs (nodeid, Tool q) | isTime q && (not . allActions) (outgoingEdges edges nodeid) = LintResult TimeAnnotatesAction [nodeid]:xs +> f xs _ = xs +> toNodelist = (!!) nodes . snd +> allActions = all (isAction . snd . toNodelist) + +For example, “cook 10 minutes” can be expressed with: + +> testTimeAnnotatesAction = [ +> cmpLint "&10 min [cook] >soup" [] +> , cmpLint "&10 min [cook] &5-6 h [cook again] >soup" [] +> , cmpLint "&10 min >soup" [LintResult TimeAnnotatesAction [0]] +> , cmpLint "&10 min &15 min |time *time [cook] >soup" +> [LintResult TimeAnnotatesAction [0], LintResult TimeAnnotatesAction [1]] +> ] + +.. _well-known-units: + +Well-known units +++++++++++++++++ + +Units can be arbitrary strings, but implementations should recognize the +standard metric units g (gram), l (liter), and m (meter). One of these prefixes +may be used with each of them: m (milli-), c (centi-), d (dezi-), and k (kilo-). +Additionally, time in s (second), min (minute), h (hour), and d (day) should be +accepted. + +> wellKnownUnit nodes _ = foldl f [] nodes +> where +> extractQty (Ingredient q) = Just q +> extractQty (Tool q) = Just q +> extractQty (Result q) = Just q +> extractQty (Alternative q) = Just q +> extractQty (Reference q) = Just q +> extractQty _ = Nothing +> f xs (nodeid, instr) | fromMaybe False (extractQty instr >>= (return . not . known)) = +> LintResult UnitNotWellKnown [nodeid]:xs +> f xs _ = xs +> known (Quantity _ unit _) = unit `elem` knownUnits +> knownUnits = [ +> "" +> , "mg", "g", "kg" +> , "ml", "cl", "dl", "l" +> , "cm", "dm", "m" +> ] ++ timeUnits + +Usage of imperial units (inch, pound, …), non-standard +units like “teaspoon,” “cup,” or similar is discouraged because +the former is used by just three countries in the world right now, and +the latter is language- and country-dependent. The implementation may +provide the user with a conversion utility. + +> testLintWellKnownUnit = [ +> cmpLint "+1 in foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+2 teaspoons foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+3 cups foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+1 ml foobar >bar" [] +> , cmpLint "+1 cl foobar >bar" [] +> , cmpLint "+1 dl foobar >bar" [] +> , cmpLint "+1 l foobar >bar" [] +> , cmpLint "+2 _ something >bar" [] +> , cmpLint "&1 min [foo] >bar" [] + +The unit is case-sensitive, thus + +> , cmpLint "+1 Mg foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "+1 kG foobar >bar" [LintResult UnitNotWellKnown [0]] +> , cmpLint "&1 MIN [foo] >bar" [LintResult UnitNotWellKnown [0]] +> ] + +References +++++++++++ + +All references must be resolved. An `earlier check <#resultsused>`_ ensures +all results and alternatives are referenced at some point. + +> referencesResolved nodes edges = foldl f [] nodes +> where +> f xs (nodeid, Reference _) | null (incomingEdges edges nodeid) = +> LintResult UndefinedReference [nodeid]:xs +> f xs _ = xs + +> testLintRefs = [ +> cmpLint "*foobar >foobar >barbaz" [LintResult CircularLoop [0, 1]] +> , cmpLint "*foobar >foo" [LintResult UndefinedReference [0]] +> ] + +Results and alternatives must not have duplicate names, so collect +their lower-case object names into a ``Map`` and flag those which +reference multiple nodes. + +> uniqueNames nodes _ = M.foldl f [] nameMap +> where +> f xs fnodes | length fnodes > 1 = LintResult DuplicateReferenceName fnodes:xs +> f xs _ = xs +> nameMap = foldl buildMap M.empty nodes +> buildMap m (nodeid, Result qty) = M.insertWith append (getObject qty) [nodeid] m +> buildMap m (nodeid, Alternative qty) = M.insertWith append (getObject qty) [nodeid] m +> buildMap m _ = m +> getObject (Quantity _ _ object) = map toLower object +> append a b = insert (head a) b + +> testUniqueNames = [ +> cmpLint "+a >x +b >y *x *y >foo" [] +> , cmpLint "+a >x +b >x *x >y" [LintResult DuplicateReferenceName [1, 3]] +> , cmpLint "+a >x +b +c |x *x >y" [LintResult DuplicateReferenceName [1, 4]] +> , cmpLint "+a >1 _ foo +a >2 _ FOO +a >3 _ foO *Foo >y" +> [LintResult DuplicateReferenceName [1, 3, 5]] +> ] + +A result must have at least one incoming edge. This is a special case and can +only occur at the beginning of a recipe. + +> resultNonempty nodes edges = foldl f [] nodes +> where +> f xs (nodeid, Result _) | null (incomingEdges edges nodeid) = +> LintResult TooFewChildren [nodeid]:xs +> f xs _ = xs + +> testLintResultNonempty = [ +> cmpLint ">bar *bar >baz" [LintResult TooFewChildren [0]] +> , cmpLint "+A >bar *bar >baz" [] +> , cmpLint "+A >bar >foo *bar *foo >baz" [] +> ] + +Alternatives must have at least two incoming edges since a smaller amount would +make the alternative pointless. + +> twoAlternatives nodes edges = foldl f [] nodes +> where +> f xs (nodeid, Alternative _) | length (incomingEdges edges nodeid) < 2 = +> LintResult TooFewChildren [nodeid]:xs +> f xs _ = xs + +> testLintTwoAlternatives = [ +> cmpLint "+A |foo *foo >bar" [LintResult TooFewChildren [1]] +> , cmpLint "+A +B |foo *foo >bar" [] +> , cmpLint "+A &B |foo *foo >bar" [] +> ] + +.. _reject-loops: + +References cannot loop because, well, you cannot cook something and +use an ingredient you have not made yet. It is possible to branch out +and merge again if an ingredient is split into multiple parts +and added to different outputs. + +> circularLoops nodes edges = map (LintResult CircularLoop) circles +> where +> allReferences = foldl referenceNodes [] nodes +> referenceNodes xs (nodeid, Reference _) = nodeid:xs +> referenceNodes xs _ = xs +> circles = filter (not . null) $ map (visitIncoming [] . singleton) allReferences +> singleton x = [x] +> visitIncoming _ [] = [] +> visitIncoming visited next = case length (intersect visited nextNext) of +> 0 -> visitIncoming nextVisited nextNext +> _ -> nextVisited +> where +> nextVisited = visited ++ next +> nextNext = map fst $ concat $ map (incomingNodes nodes edges) next + +> testLintCircularLoops = [ +> cmpLint "*y >x *x >y >foobar" +> [LintResult CircularLoop [0, 3, 2, 1] , LintResult CircularLoop [2, 1, 0, 3]] +> , cmpLint "*z >x *x >y *y >z *z >foobar" [ +> LintResult CircularLoop [0, 5, 4, 3, 2, 1] +> , LintResult CircularLoop [2, 1, 0, 5, 4, 3] +> , LintResult CircularLoop [4, 3, 2, 1, 0, 5] +> , LintResult CircularLoop [6, 5, 4, 3, 2, 1, 0] +> ] +> , cmpLint "+a >foobar *1/2 _ foobar >x *1/2 _ foobar >y *x *y >final" [] +> , cmpLint "+a >foobar *1/2 _ foobar >x *x *1/2 _ foobar >final" [] +> ] + +Ranges +++++++ + +The first amount of a range ratio must be strictly smaller than the second. +This limitation is not enforced for ranges containing strings. + +> rangeFromLargerThanTo nodes _ = foldl f [] nodes +> where +> f xs (nodeid, Ingredient q) | not $ rangeOk q = +> LintResult RangeFromLargerThanTo [nodeid]:xs +> f xs (nodeid, Reference q) | not $ rangeOk q = +> LintResult RangeFromLargerThanTo [nodeid]:xs +> f xs _ = xs +> rangeOk (Quantity (Range (AmountRatio a) (AmountRatio b)) _ _) = a < b +> rangeOk _ = True + +> testRangeFromLargerThanTo = [ +> cmpLint "+2-3 l water >bar" [] +> , cmpLint "+3-2 l water >bar" [LintResult RangeFromLargerThanTo [0]] +> , cmpLint "+2/3-1/3 l water >bar" [LintResult RangeFromLargerThanTo [0]] +> , cmpLint "+some-many _ eggs >bar" [] +> , cmpLint "+1-\"a few\" _ eggs >bar" [] +> ] + +Appendix +++++++++ + +> data LintResult = LintResult LintStatus [NodeId] deriving (Show, Eq, Ord) +> data LintStatus = +> NoRootNode +> | NonResultRootNode +> | MoreThanOneRootNode +> | UndefinedReference +> | DuplicateReferenceName +> | CircularLoop +> | TooFewChildren +> | TimeIsATool +> | TimeAnnotatesAction +> | UnitNotWellKnown +> | InvalidNode +> | RangeFromLargerThanTo +> | NoMetadata +> | UnknownMetadataKey +> deriving (Show, Eq, Ord) + +Every lint test checks a single aspect of the graph. + +> lint nodes edges = concatMap (\f -> f nodes edges) lintTests + +> lintTests = [ +> rootIsResult +> , referencesResolved +> , uniqueNames +> , circularLoops +> , resultNonempty +> , twoAlternatives +> , timeIsATool +> , timeAnnotatesAction +> , wellKnownUnit +> , lintMetadata +> , rangeFromLargerThanTo +> ] + +> withGraph doc f = f nodes edges +> where +> (Right op) = (head . extract . snd . unzip) <$> parse ("%pesto " ++ doc) +> nodes = zip [firstNodeId..] op +> edges = toGraph nodes ++ resolveReferences nodes + +> cmpLint doc expect = withGraph doc (\nodes edges -> doc ~: sort (lint nodes edges) ~?= sort expect) + + +> data Metadata = MetaQty Quantity | MetaStr String deriving (Show, Eq) + +> cmpLintMeta doc expectLint expectMeta = withGraph doc (\nodes edges -> doc ~: [ +> sort (lint nodes edges) ~?= sort expectLint +> , extractMetadata nodes edges ~?= expectMeta +> ]) +> strQuantity = Quantity (Exact (AmountStr "")) "" + +> test = [ +> testConnectivity +> , testMetadata +> , testLintRefs +> , testUniqueNames +> , testLintCircularLoops +> , testLintQuantity +> , testLintWellKnownUnit +> , testTimeAnnotatesAction +> , testLintTwoAlternatives +> , testLintResultNonempty +> , testRangeFromLargerThanTo +> ] + |