From 949dd6d2a14f11036d251ef7d11607a214389d17 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Mon, 29 Jul 2019 15:34:50 +0200 Subject: doc: Auto-generate list of supported click selectors Using shinx plugin. Also improve click selector descriptions for this purpose. --- crocoite/data/click.yaml | 42 ++++++++++++++++++++++-------------------- doc/_ext/clicklist.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ doc/conf.py | 3 +++ doc/index.rst | 4 +++- doc/plugins.rst | 16 ++++++++++++++++ 5 files changed, 89 insertions(+), 21 deletions(-) create mode 100644 doc/_ext/clicklist.py create mode 100644 doc/plugins.rst diff --git a/crocoite/data/click.yaml b/crocoite/data/click.yaml index 757dfb0..c802573 100644 --- a/crocoite/data/click.yaml +++ b/crocoite/data/click.yaml @@ -2,107 +2,109 @@ # Example URLs are random. Believe me. match: ^www\.facebook\.com$ selector: - - description: Show comments and replies/nested comments on user pages + - description: Show comments and replies/nested comments on user pages. selector: form[action="/ajax/ufi/modify.php"] a[data-testid^="UFI2CommentsPagerRenderer/pager_depth_"] urls: ["https://www.facebook.com/tagesschau"] - - description: initially show comments below a single post/video, i.e. /user/post/123 + - description: Initially show comments below a single post/video, i.e. /user/post/123. selector: form[action="/ajax/ufi/modify.php"] a[data-testid="UFI2CommentsCount/root"] urls: ["https://www.facebook.com/tagesschau/posts/10157061068659407"] - - description: close the “register now” nag screen. for better screen shots + - description: Close the “register now” nag screen. For screenshots. selector: a#expanding_cta_close_button[role=button] urls: ["https://www.facebook.com/tagesschau"] --- match: ^twitter\.com$ selector: - - description: expand threads + - description: Expand threads. selector: a.ThreadedConversation-moreRepliesLink urls: ["https://twitter.com/realDonaldTrump/status/1068826073775964160"] - - description: show hidden profiles + - description: Show hidden profiles. selector: button.ProfileWarningTimeline-button urls: ["https://twitter.com/CookieCyboid"] - - description: show hidden/sensitive media. For screen-/snapshots. + - description: Show hidden/sensitive media. For screen-/snapshots. selector: button.Tombstone-action.js-display-this-media urls: ["https://twitter.com/CookieCyboid/status/1070807283305713665"] --- match: ^disqus\.com$ selector: - - description: load more comments + - description: Load more comments. selector: a.load-more__button multi: True --- # new layout match: ^www\.reddit\.com$ selector: - - description: show more comments, reddit’s javascript ignores events if too frequent + - description: Show more comments. selector: div[id^=moreComments-] > div > p + # reddit’s javascript ignores events if too frequent throttle: 500 urls: ["https://www.reddit.com/r/subredditcancer/comments/b2b80f/we_are_moderators_of_rwatchpeopledie_amaa_just/"] --- # old layout match: ^(old|np)\.reddit\.com$ selector: - - description: show more comments, reddit’s javascript ignores events if too frequent + - description: Show more comments. selector: span.morecomments a + # reddit’s javascript ignores events if too frequent throttle: 500 urls: ["https://old.reddit.com/r/subredditcancer/comments/b2b80f/we_are_moderators_of_rwatchpeopledie_amaa_just/"] --- match: ^www\.youtube\.com$ selector: - - description: expand comment + - description: Expand single comment. selector: ytd-comment-thread-renderer span.more-button urls: ["https://www.youtube.com/watch?v=udtFqQuBFSc"] - - description: show more comment thread replies + - description: Show more comment thread replies. selector: div.ytd-comment-replies-renderer > yt-next-continuation > paper-button urls: ["https://www.youtube.com/watch?v=Lov0T3eXI2k"] multi: True --- match: ^www\.patreon\.com$ selector: - - description: load more comments + - description: Load more comments. selector: div[data-tag=post-card] button[data-tag=loadMoreCommentsCta] urls: ["https://www.patreon.com/posts/what-im-on-22124040"] --- match: ^(www\.)?gab\.com$ selector: - - description: more posts + - description: Load more posts. selector: div.item-list[role=feed] button.load-more multi: True urls: ["https://gab.com/gab"] --- match: ^(www\.)?github\.com$ selector: - - description: show hidden issue items + - description: Show hidden issue items. urls: ["https://github.com/dominictarr/event-stream/issues/116"] selector: div#discussion_bucket form.ajax-pagination-form button.ajax-pagination-btn --- match: ^www\.gamasutra\.com$ selector: - - description: Load more comments + - description: Load more comments. urls: ["http://www.gamasutra.com/blogs/RaminShokrizade/20130626/194933/The_Top_F2P_Monetization_Tricks.php"] selector: div#dynamiccomments div.viewTopCmts a --- match: ^(www\.)?steamcommunity\.com$ selector: - - description: Load more content + - description: Load more content. urls: ["https://steamcommunity.com/app/252950/reviews/?p=1&browsefilter=toprated&filterLanguage=all"] selector: "#GetMoreContentBtn a" multi: True --- match: ^imgur\.com$ selector: - - description: Load more image of an album + - description: Load more images of an album. urls: ["https://imgur.com/a/JG1yc"] selector: div.js-post-truncated a.post-loadall - - description: Expand all comments (for snapshots) + - description: Expand all comments. For snapshots. urls: ["https://imgur.com/a/JG1yc"] selector: div.comments-info span.comments-expand - - description: Show bad replies (for snapshots) + - description: Show bad replies. for snapshots. urls: ["https://imgur.com/gallery/0l6Yo4r"] selector: div#comments div.bad-captions a.link --- match: ^(www\.)?vimeo\.com$ selector: - - description: Load more videos on profile page + - description: Load more videos on profile page. urls: ["https://vimeo.com/dsam4a"] selector: div.profile_main div.profile-load-more__button--wrapper button.profile-load-more__button # XXX: this works when using a non-headless browser, but does not otherwise diff --git a/doc/_ext/clicklist.py b/doc/_ext/clicklist.py new file mode 100644 index 0000000..a69452c --- /dev/null +++ b/doc/_ext/clicklist.py @@ -0,0 +1,45 @@ +""" +Render click.yaml config file into human-readable list of supported sites +""" + +import pkg_resources, yaml +from docutils import nodes +from docutils.parsers.rst import Directive +from yarl import URL + +class ClickList (Directive): + def run(self): + # XXX: do this once only + fd = pkg_resources.resource_stream ('crocoite', 'data/click.yaml') + config = list (yaml.safe_load_all (fd)) + + l = nodes.definition_list () + for site in config: + urls = set () + v = nodes.definition () + vl = nodes.bullet_list () + v += vl + for s in site['selector']: + i = nodes.list_item () + i += nodes.paragraph (text=s['description']) + vl += i + urls.update (map (lambda x: URL(x).with_path ('/'), s.get ('urls', []))) + + item = nodes.definition_list_item () + term = ', '.join (map (lambda x: x.host, urls)) if urls else site['match'] + k = nodes.term (text=term) + item += k + + item += v + l += item + return [l] + +def setup(app): + app.add_directive ("clicklist", ClickList) + + return { + 'version': '0.1', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } + diff --git a/doc/conf.py b/doc/conf.py index 26747b4..8336c27 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import os, sys # -- Project information ----------------------------------------------------- @@ -8,9 +9,11 @@ author = 'crocoite contributors' # -- General configuration --------------------------------------------------- +sys.path.append(os.path.abspath("./_ext")) extensions = [ 'sphinx.ext.viewcode', 'sphinx.ext.autodoc', + 'clicklist', ] # Add any paths that contain templates here, relative to this directory. diff --git a/doc/index.rst b/doc/index.rst index 39c2f73..53f5f77 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -11,6 +11,7 @@ Chrome`_. :hidden: usage.rst + plugins.rst rationale.rst develop.rst related.rst @@ -24,7 +25,7 @@ Google Chrome-powered WARC output Includes all network requests made by the browser Site interaction - Auto-expand on-click content, infinite-scrolling + :ref:`Auto-expand on-click content `, infinite-scrolling DOM snapshot Contains the page’s state, renderable without JavaScript Image screenshot @@ -32,3 +33,4 @@ Image screenshot Machine-readable interface Easy integration into custom tools/scripts + diff --git a/doc/plugins.rst b/doc/plugins.rst new file mode 100644 index 0000000..062e1bf --- /dev/null +++ b/doc/plugins.rst @@ -0,0 +1,16 @@ +Plugins +======= + +crocoite comes with plug-ins that modify loaded sites’ or interact with them. + +.. _click: + +click +----- + +The following sites are currently supported. Note this is an ongoing +battle against layout changes and thus older software versions will stop +working very soon. + +.. clicklist:: + -- cgit v1.2.3