summaryrefslogtreecommitdiff
path: root/crocoite/data
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite/data')
-rw-r--r--crocoite/data/click.js194
-rw-r--r--crocoite/data/click.yaml117
-rw-r--r--crocoite/data/cookies.txt9
-rw-r--r--crocoite/data/extract-links.js21
-rw-r--r--crocoite/data/screenshot.js20
-rw-r--r--crocoite/data/scroll.js41
6 files changed, 265 insertions, 137 deletions
diff --git a/crocoite/data/click.js b/crocoite/data/click.js
index c51a690..ae189da 100644
--- a/crocoite/data/click.js
+++ b/crocoite/data/click.js
@@ -4,109 +4,7 @@
* like navigating to a different location. Thus whitelist known elements.
*/
-(function(){
-const selectorFlag = Object.freeze ({
- none: 0,
- multi: 1, /* click item multiple times */
-});
-const defaultClickThrottle = 50; /* in ms */
-const discoverInterval = 1000; /* 1 second */
-const sites = Object.freeze ([
- {
- hostname: /^www\.facebook\.com$/i,
- selector: [
- /* show more comments */
- {s: 'a.UFIPagerLink[role=button]', flags: selectorFlag.none},
- /* show nested comments*/
- {s: 'a.UFICommentLink[role=button]', flags: selectorFlag.none},
- ],
- }, {
- hostname: /^twitter\.com$/i,
- selector: [
- /* expand threads */
- {s: 'a.ThreadedConversation-moreRepliesLink', flags: selectorFlag.none},
- /* show hidden profiles */
- {s: 'button.ProfileWarningTimeline-button', flags: selectorFlag.none},
- /* show hidden/sensitive media */
- {s: 'button.Tombstone-action.js-display-this-media', flags: selectorFlag.none},
- ],
- }, {
- hostname: /^disqus\.com$/i,
- selector: [
- /* load more comments */
- {s: 'a.load-more__button', flags: selectorFlag.multi},
- ],
- }, {
- hostname: /^(www|np)\.reddit\.com$/i,
- selector: [
- /* show more comments, reddit’s javascript ignores events if too
- * frequent */
- {s: 'span.morecomments a', flags: selectorFlag.none, throttle: 500},
- ],
- }, {
- hostname: /^www\.instagram\.com$/i,
- selector: [
- /* posts may have multiple images that load dynamically, click the arrow */
- {s: 'a[role=button].coreSpriteRightChevron', flags: selectorFlag.multi, throttle: 500},
- /* load more comments */
- {s: 'article div ul li a[role=button]', flags: selectorFlag.multi},
- ],
- }, {
- hostname: /^www\.youtube\.com$/i,
- selector: [
- /* expand comment thread */
- {s: 'ytd-comment-thread-renderer div.more-button', flags: selectorFlag.none},
- ],
- }, {
- hostname: /^www\.patreon\.com$/i,
- selector: [
- /* load more content */
- {s: 'div[display=flex] div[display=block] button[color=gray][type=button]', flags: selectorFlag.multi},
- /* load more comments */
- {s: 'div.stackable[display=block] > div > div > a[color=dark][target=_self]', flags: selectorFlag.none},
- /* load more replies */
- {s: 'div > a[scale="0"][color=blue][size="1"]', flags: selectorFlag.none},
- ],
- }
- ]);
-
-/* pick selectors matching current location */
-let hostname = document.location.hostname;
-let selector = [];
-for (let s of sites) {
- if (s.hostname.test (hostname)) {
- selector = selector.concat (s.selector);
- }
-}
-
-function makeClickEvent () {
- return new MouseEvent('click', {
- view: window,
- bubbles: true,
- cancelable: true
- });
-}
-
-/* throttle clicking */
-let queue = [];
-let clickTimeout = null;
-function click () {
- if (queue.length > 0) {
- const item = queue.shift ();
- const o = item.o;
- const selector = item.selector;
- o.dispatchEvent (makeClickEvent ());
-
- if (queue.length > 0) {
- const nextTimeout = 'throttle' in selector ?
- selector.throttle : defaultClickThrottle;
- clickTimeout = window.setTimeout (click, nextTimeout);
- } else {
- clickTimeout = null;
- }
- }
-}
-
+(function() {
/* Element is visible if itself and all of its parents are
*/
function isVisible (o) {
@@ -128,28 +26,82 @@ function isClickable (o) {
return !o.hasAttribute ('disabled') && isVisible (o);
}
-/* some sites don’t remove/replace the element immediately, so keep track of
- * which ones we already clicked */
-let have = new Set ();
-function discover () {
- for (let s of selector) {
- let obj = document.querySelectorAll (s.s);
- for (let o of obj) {
- if (!have.has (o) && isClickable (o)) {
- queue.push ({o: o, selector: s});
- if (!(s.flags & selectorFlag.multi)) {
- have.add (o);
+const defaultClickThrottle = 50; /* in ms */
+const discoverInterval = 1000; /* 1 second */
+
+class Click {
+ constructor(options) {
+ /* pick selectors matching current location */
+ let hostname = document.location.hostname;
+ this.selector = [];
+ for (let s of options['sites']) {
+ let r = new RegExp (s.match, 'i');
+ if (r.test (hostname)) {
+ this.selector = this.selector.concat (s.selector);
+ }
+ }
+ /* throttle clicking */
+ this.queue = [];
+ this.clickTimeout = null;
+
+ /* some sites don’t remove/replace the element immediately, so keep track of
+ * which ones we already clicked */
+ this.have = new Set ();
+
+ /* XXX: can we use a mutation observer instead? */
+ this.interval = window.setInterval (this.discover.bind (this), discoverInterval);
+ }
+
+ makeClickEvent () {
+ return new MouseEvent('click', {
+ view: window,
+ bubbles: true,
+ cancelable: true
+ });
+ }
+
+ click () {
+ if (this.queue.length > 0) {
+ const item = this.queue.shift ();
+ const o = item.o;
+ const selector = item.selector;
+ o.dispatchEvent (this.makeClickEvent ());
+
+ if (this.queue.length > 0) {
+ const nextTimeout = 'throttle' in selector ?
+ selector.throttle : defaultClickThrottle;
+ this.clickTimeout = window.setTimeout (this.click.bind (this), nextTimeout);
+ } else {
+ this.clickTimeout = null;
+ }
+ }
+ }
+
+ discover () {
+ for (let s of this.selector) {
+ let obj = document.querySelectorAll (s.selector);
+ for (let o of obj) {
+ if (!this.have.has (o) && isClickable (o)) {
+ this.queue.push ({o: o, selector: s});
+ if (!s.multi) {
+ this.have.add (o);
+ }
}
}
}
+ if (this.queue.length > 0 && this.clickTimeout === null) {
+ /* start clicking immediately */
+ this.clickTimeout = window.setTimeout (this.click.bind (this), 0);
+ }
+ return true;
}
- if (queue.length > 0 && clickTimeout === null) {
- /* start clicking immediately */
- clickTimeout = window.setTimeout (click, 0);
+
+
+ stop () {
+ window.clearInterval (this.interval);
+ window.clearTimeout (this.clickTimeout);
}
- return true;
}
-/* XXX: can we use a mutation observer instead? */
-window.setInterval (discover, discoverInterval);
-}());
+return Click;
+}())
diff --git a/crocoite/data/click.yaml b/crocoite/data/click.yaml
new file mode 100644
index 0000000..78278b9
--- /dev/null
+++ b/crocoite/data/click.yaml
@@ -0,0 +1,117 @@
+# Configuration for behavior.py:Click
+# Example URLs are random. Believe me.
+match: ^www\.facebook\.com$
+selector:
+ - description: Show comments and replies/nested comments on user pages.
+ selector: form[action="/ajax/ufi/modify.php"] a[data-testid^="UFI2CommentsPagerRenderer/pager_depth_"]
+ urls: ["https://www.facebook.com/tagesschau"]
+ - description: Initially show comments below a single post/video, i.e. /user/post/123.
+ selector: form[action="/ajax/ufi/modify.php"] a[data-testid="UFI2CommentsCount/root"]
+ urls: ["https://www.facebook.com/tagesschau/posts/10157061068659407"]
+ - description: Close the “register now” nag screen. For screenshots.
+ selector: a#expanding_cta_close_button[role=button]
+ urls: ["https://www.facebook.com/tagesschau"]
+---
+match: ^twitter\.com$
+selector:
+ - description: Expand threads.
+ selector: a.ThreadedConversation-moreRepliesLink
+ urls: ["https://twitter.com/realDonaldTrump/status/1068826073775964160"]
+ - description: Show hidden profiles.
+ selector: button.ProfileWarningTimeline-button
+ urls: ["https://twitter.com/CookieCyboid"]
+ - description: Show hidden/sensitive media. For screen-/snapshots.
+ selector: button.Tombstone-action.js-display-this-media
+ urls: ["https://twitter.com/CookieCyboid/status/1070807283305713665"]
+ - description: Show more replies.
+ selector: button.ThreadedConversation-showMoreThreadsButton
+ urls: ["https://twitter.com/fuglydug/status/1172160128101076995"]
+---
+match: ^disqus\.com$
+selector:
+ - description: Load more comments.
+ selector: a.load-more__button
+ multi: True
+---
+# new layout
+match: ^www\.reddit\.com$
+selector:
+ - description: Show more comments.
+ selector: div[id^=moreComments-] > div > p
+ # reddit’s javascript ignores events if too frequent
+ throttle: 500
+ urls: ["https://www.reddit.com/r/subredditcancer/comments/b2b80f/we_are_moderators_of_rwatchpeopledie_amaa_just/"]
+---
+# old layout
+match: ^(old|np)\.reddit\.com$
+selector:
+ - description: Show more comments.
+ selector: span.morecomments a
+ # reddit’s javascript ignores events if too frequent
+ throttle: 500
+ urls: ["https://old.reddit.com/r/subredditcancer/comments/b2b80f/we_are_moderators_of_rwatchpeopledie_amaa_just/"]
+---
+match: ^www\.youtube\.com$
+selector:
+ - description: Expand single comment.
+ selector: ytd-comment-thread-renderer span[slot=more-button]
+ urls: ["https://www.youtube.com/watch?v=udtFqQuBFSc"]
+ - description: Show more comment thread replies.
+ selector: div.ytd-comment-replies-renderer > yt-next-continuation > paper-button
+ urls: ["https://www.youtube.com/watch?v=Lov0T3eXI2k"]
+ multi: True
+---
+match: ^www\.patreon\.com$
+selector:
+ - description: Load more comments.
+ selector: div[data-tag=post-card] button[data-tag=loadMoreCommentsCta]
+ urls: ["https://www.patreon.com/posts/what-im-on-22124040"]
+---
+match: ^(www\.)?gab\.com$
+selector:
+ - description: Load more posts.
+ selector: div.item-list[role=feed] button.load-more
+ multi: True
+ urls: ["https://gab.com/gab"]
+---
+match: ^(www\.)?github\.com$
+selector:
+ - description: Show hidden issue items.
+ urls: ["https://github.com/dominictarr/event-stream/issues/116"]
+ selector: div#discussion_bucket form.ajax-pagination-form button.ajax-pagination-btn
+---
+match: ^www\.gamasutra\.com$
+selector:
+ - description: Load more comments.
+ urls: ["http://www.gamasutra.com/blogs/RaminShokrizade/20130626/194933/The_Top_F2P_Monetization_Tricks.php"]
+ selector: div#dynamiccomments div.viewTopCmts a
+---
+match: ^(www\.)?steamcommunity\.com$
+selector:
+ - description: Load more content.
+ urls: ["https://steamcommunity.com/app/252950/reviews/?p=1&browsefilter=toprated&filterLanguage=all"]
+ selector: "#GetMoreContentBtn a"
+ multi: True
+---
+match: ^imgur\.com$
+selector:
+ - description: Load more images of an album.
+ urls: ["https://imgur.com/a/JG1yc"]
+ selector: div.js-post-truncated a.post-loadall
+ - description: Expand all comments. For snapshots.
+ urls: ["https://imgur.com/a/JG1yc"]
+ selector: div.comments-info span.comments-expand
+ - description: Show bad replies. for snapshots.
+ urls: ["https://imgur.com/gallery/jRzMfRG"]
+ selector: div#comments div.bad-captions a.link
+---
+match: ^(www\.)?vimeo\.com$
+selector:
+ - description: Load more videos on profile page.
+ urls: ["https://vimeo.com/dsam4a"]
+ selector: div.profile_main div.profile-load-more__button--wrapper button
+# XXX: this works when using a non-headless browser, but does not otherwise
+# - description: Expand video comments
+# urls: ["https://vimeo.com/22439234"]
+# selector: section#comments button.iris_comment-more
+# multi: True
diff --git a/crocoite/data/cookies.txt b/crocoite/data/cookies.txt
new file mode 100644
index 0000000..6ac62c3
--- /dev/null
+++ b/crocoite/data/cookies.txt
@@ -0,0 +1,9 @@
+# Default cookies for crocoite. This file does *not* use Netscape’s cookie
+# file format. Lines are expected to be in Set-Cookie format.
+# And this line is a comment.
+
+# Reddit:
+# skip over 18 prompt
+over18=1; Domain=www.reddit.com
+# skip quarantined subreddit prompt
+_options={%22pref_quarantine_optin%22:true}; Domain=www.reddit.com
diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js
index 4d1a3d0..5a4f9f0 100644
--- a/crocoite/data/extract-links.js
+++ b/crocoite/data/extract-links.js
@@ -25,11 +25,26 @@ function isClickable (o) {
}
/* --- end copy&paste */
-let x = document.body.querySelectorAll('a[href]');
let ret = [];
+['a[href]', 'area[href]'].forEach (function (s) {
+ let x = document.querySelectorAll(s);
+ for (let i=0; i < x.length; i++) {
+ if (isClickable (x[i])) {
+ ret.push (x[i].href);
+ }
+ }
+});
+
+/* If Chrome loads plain-text documents it’ll wrap them into <pre>. Check those
+ * for links as well, assuming the whole line is a link (i.e. list of links). */
+let x = document.querySelectorAll ('body > pre');
for (let i=0; i < x.length; i++) {
- if (isClickable (x[i])) {
- ret.push (x[i].href);
+ if (isVisible (x[i])) {
+ x[i].innerText.split ('\n').forEach (function (s) {
+ if (s.match ('^https?://')) {
+ ret.push (s);
+ }
+ });
}
}
return ret; /* immediately return results, for use with Runtime.evaluate() */
diff --git a/crocoite/data/screenshot.js b/crocoite/data/screenshot.js
new file mode 100644
index 0000000..a9a41e1
--- /dev/null
+++ b/crocoite/data/screenshot.js
@@ -0,0 +1,20 @@
+/* Find and scrollable full-screen elements and return their actual size
+ */
+(function () {
+/* limit the number of elements queried */
+let elem = document.querySelectorAll ('body > div');
+let ret = [];
+for (let i = 0; i < elem.length; i++) {
+ let e = elem[i];
+ let s = window.getComputedStyle (e);
+ if (s.getPropertyValue ('position') == 'fixed' &&
+ s.getPropertyValue ('overflow') == 'auto' &&
+ s.getPropertyValue ('left') == '0px' &&
+ s.getPropertyValue ('right') == '0px' &&
+ s.getPropertyValue ('top') == '0px' &&
+ s.getPropertyValue ('bottom') == '0px') {
+ ret.push (e.scrollHeight);
+ }
+}
+return ret; /* immediately return results, for use with Runtime.evaluate() */
+})();
diff --git a/crocoite/data/scroll.js b/crocoite/data/scroll.js
index 13e856d..be88edf 100644
--- a/crocoite/data/scroll.js
+++ b/crocoite/data/scroll.js
@@ -1,23 +1,38 @@
/* Continuously scrolls the page
*/
-var __crocoite_stop__ = false;
(function(){
-function scroll (event) {
- if (__crocoite_stop__) {
- return false;
- } else {
+class Scroll {
+ constructor (options) {
+ this.scrolled = new Map ();
+ this.interval = window.setInterval (this.scroll.bind (this), 200);
+ }
+
+ stop() {
+ window.clearInterval (this.interval);
+ window.scrollTo (0, 0);
+ this.scrolled.forEach (function (value, key, map) {
+ key.scrollTop = value;
+ });
+ }
+ /* save initial scroll state */
+ save(obj) {
+ if (!this.scrolled.has (obj)) {
+ this.scrolled.set (obj, obj.scrollTop);
+ }
+ }
+ /* perform a single scroll step */
+ scroll (event) {
window.scrollBy (0, window.innerHeight/2);
- document.querySelectorAll ('*').forEach (
+ document.querySelectorAll ('html body *').forEach (
function (d) {
- if (d.clientHeight < d.scrollHeight) {
+ if (d.scrollHeight-d.scrollTop > d.clientHeight) {
+ this.save (d);
d.scrollBy (0, d.clientHeight/2);
}
- });
+ }.bind (this));
return true;
}
}
-function onload (event) {
- window.setInterval (scroll, 200);
-}
-document.addEventListener("DOMContentLoaded", onload);
-}());
+
+return Scroll;
+}())