diff options
Diffstat (limited to 'crocoite/data')
| -rw-r--r-- | crocoite/data/click.js | 194 | ||||
| -rw-r--r-- | crocoite/data/click.yaml | 117 | ||||
| -rw-r--r-- | crocoite/data/cookies.txt | 9 | ||||
| -rw-r--r-- | crocoite/data/extract-links.js | 21 | ||||
| -rw-r--r-- | crocoite/data/screenshot.js | 20 | ||||
| -rw-r--r-- | crocoite/data/scroll.js | 41 | 
6 files changed, 265 insertions, 137 deletions
| diff --git a/crocoite/data/click.js b/crocoite/data/click.js index c51a690..ae189da 100644 --- a/crocoite/data/click.js +++ b/crocoite/data/click.js @@ -4,109 +4,7 @@   *  like navigating to a different location. Thus whitelist known elements.   */ -(function(){ -const selectorFlag = Object.freeze ({ -	none: 0, -	multi: 1, /* click item multiple times */ -}); -const defaultClickThrottle = 50; /* in ms */ -const discoverInterval = 1000; /* 1 second */ -const sites = Object.freeze ([ -	{ -		hostname: /^www\.facebook\.com$/i, -		selector: [ -			/* show more comments */ -			{s: 'a.UFIPagerLink[role=button]', flags: selectorFlag.none}, -			/* show nested comments*/ -			{s: 'a.UFICommentLink[role=button]', flags: selectorFlag.none}, -			], -	}, { -		hostname: /^twitter\.com$/i, -		selector: [ -			/* expand threads */ -			{s: 'a.ThreadedConversation-moreRepliesLink', flags: selectorFlag.none}, -			/* show hidden profiles */ -			{s: 'button.ProfileWarningTimeline-button', flags: selectorFlag.none}, -			/* show hidden/sensitive media */ -			{s: 'button.Tombstone-action.js-display-this-media', flags: selectorFlag.none}, -			], -	}, { -		hostname: /^disqus\.com$/i, -		selector: [ -			/* load more comments */ -			{s: 'a.load-more__button', flags: selectorFlag.multi}, -			], -	}, { -		hostname: /^(www|np)\.reddit\.com$/i, -		selector: [ -			/* show more comments, reddit’s javascript ignores events if too -			 * frequent */ -			{s: 'span.morecomments a', flags: selectorFlag.none, throttle: 500}, -			], -	}, { -		hostname: /^www\.instagram\.com$/i, -		selector: [ -			/* posts may have multiple images that load dynamically, click the arrow */ -			{s: 'a[role=button].coreSpriteRightChevron', flags: selectorFlag.multi, throttle: 500}, -			/* load more comments */ -			{s: 'article div ul li a[role=button]', flags: selectorFlag.multi}, -			], -	}, { -		hostname: /^www\.youtube\.com$/i, -		selector: [ -			/* expand comment thread */ -			{s: 'ytd-comment-thread-renderer div.more-button', flags: selectorFlag.none}, -			], -	}, { -		hostname: /^www\.patreon\.com$/i, -		selector: [ -			/* load more content */ -			{s: 'div[display=flex] div[display=block] button[color=gray][type=button]', flags: selectorFlag.multi}, -			/* load more comments */ -			{s: 'div.stackable[display=block] > div  > div  > a[color=dark][target=_self]', flags: selectorFlag.none}, -			/* load more replies */ -			{s: 'div > a[scale="0"][color=blue][size="1"]', flags: selectorFlag.none}, -			], -	} -	]); - -/* pick selectors matching current location */ -let hostname = document.location.hostname; -let selector = []; -for (let s of sites) { -	if (s.hostname.test (hostname)) { -		selector = selector.concat (s.selector); -	} -} - -function makeClickEvent () { -	return new MouseEvent('click', { -				view: window, -				bubbles: true, -				cancelable: true -				}); -} - -/* throttle clicking */ -let queue = []; -let clickTimeout = null; -function click () { -	if (queue.length > 0) { -		const item = queue.shift (); -		const o = item.o; -		const selector = item.selector; -		o.dispatchEvent (makeClickEvent ()); - -		if (queue.length > 0) { -			const nextTimeout = 'throttle' in selector ? -					selector.throttle : defaultClickThrottle; -			clickTimeout = window.setTimeout (click, nextTimeout); -		} else { -			clickTimeout = null; -		} -	} -} - +(function() {  /*	Element is visible if itself and all of its parents are   */  function isVisible (o) { @@ -128,28 +26,82 @@ function isClickable (o) {  	return !o.hasAttribute ('disabled') && isVisible (o);  } -/* some sites don’t remove/replace the element immediately, so keep track of - * which ones we already clicked */ -let have = new Set (); -function discover () { -	for (let s of selector) { -		let obj = document.querySelectorAll (s.s); -		for (let o of obj) { -			if (!have.has (o) && isClickable (o)) { -				queue.push ({o: o, selector: s}); -				if (!(s.flags & selectorFlag.multi)) { -					have.add (o); +const defaultClickThrottle = 50; /* in ms */ +const discoverInterval = 1000; /* 1 second */ + +class Click { +	constructor(options) { +		/* pick selectors matching current location */ +		let hostname = document.location.hostname; +		this.selector = []; +		for (let s of options['sites']) { +			let r = new RegExp (s.match, 'i'); +			if (r.test (hostname)) { +				this.selector = this.selector.concat (s.selector); +			} +		} +		/* throttle clicking */ +		this.queue = []; +		this.clickTimeout = null; + +		/* some sites don’t remove/replace the element immediately, so keep track of +		 * which ones we already clicked */ +		this.have = new Set (); + +		/* XXX: can we use a mutation observer instead? */ +		this.interval = window.setInterval (this.discover.bind (this), discoverInterval); +	} + +	makeClickEvent () { +		return new MouseEvent('click', { +					view: window, +					bubbles: true, +					cancelable: true +					}); +	} + +	click () { +		if (this.queue.length > 0) { +			const item = this.queue.shift (); +			const o = item.o; +			const selector = item.selector; +			o.dispatchEvent (this.makeClickEvent ()); + +			if (this.queue.length > 0) { +				const nextTimeout = 'throttle' in selector ? +						selector.throttle : defaultClickThrottle; +				this.clickTimeout = window.setTimeout (this.click.bind (this), nextTimeout); +			} else { +				this.clickTimeout = null; +			} +		} +	} + +	discover () { +		for (let s of this.selector) { +			let obj = document.querySelectorAll (s.selector); +			for (let o of obj) { +				if (!this.have.has (o) && isClickable (o)) { +					this.queue.push ({o: o, selector: s}); +					if (!s.multi) { +						this.have.add (o); +					}  				}  			}  		} +		if (this.queue.length > 0 && this.clickTimeout === null) { +			/* start clicking immediately */ +			this.clickTimeout = window.setTimeout (this.click.bind (this), 0); +		} +		return true;  	} -	if (queue.length > 0 && clickTimeout === null) { -		/* start clicking immediately */ -		clickTimeout = window.setTimeout (click, 0); + + +	stop () { +		window.clearInterval (this.interval); +		window.clearTimeout (this.clickTimeout);  	} -	return true;  } -/* XXX: can we use a mutation observer instead? */ -window.setInterval (discover, discoverInterval); -}()); +return Click; +}()) diff --git a/crocoite/data/click.yaml b/crocoite/data/click.yaml new file mode 100644 index 0000000..78278b9 --- /dev/null +++ b/crocoite/data/click.yaml @@ -0,0 +1,117 @@ +# Configuration for behavior.py:Click +# Example URLs are random. Believe me. +match: ^www\.facebook\.com$ +selector: +  - description: Show comments and replies/nested comments on user pages. +    selector: form[action="/ajax/ufi/modify.php"] a[data-testid^="UFI2CommentsPagerRenderer/pager_depth_"] +    urls: ["https://www.facebook.com/tagesschau"] +  - description: Initially show comments below a single post/video, i.e. /user/post/123. +    selector: form[action="/ajax/ufi/modify.php"] a[data-testid="UFI2CommentsCount/root"] +    urls: ["https://www.facebook.com/tagesschau/posts/10157061068659407"] +  - description: Close the “register now” nag screen. For screenshots. +    selector: a#expanding_cta_close_button[role=button] +    urls: ["https://www.facebook.com/tagesschau"] +--- +match: ^twitter\.com$ +selector: +  - description: Expand threads. +    selector: a.ThreadedConversation-moreRepliesLink +    urls: ["https://twitter.com/realDonaldTrump/status/1068826073775964160"] +  - description: Show hidden profiles. +    selector: button.ProfileWarningTimeline-button +    urls: ["https://twitter.com/CookieCyboid"] +  - description: Show hidden/sensitive media. For screen-/snapshots. +    selector: button.Tombstone-action.js-display-this-media +    urls: ["https://twitter.com/CookieCyboid/status/1070807283305713665"] +  - description: Show more replies. +    selector: button.ThreadedConversation-showMoreThreadsButton +    urls: ["https://twitter.com/fuglydug/status/1172160128101076995"] +--- +match: ^disqus\.com$ +selector: +  - description: Load more comments. +    selector: a.load-more__button +    multi: True +--- +# new layout +match: ^www\.reddit\.com$ +selector: +  - description: Show more comments. +    selector: div[id^=moreComments-] > div > p +    # reddit’s javascript ignores events if too frequent +    throttle: 500 +    urls: ["https://www.reddit.com/r/subredditcancer/comments/b2b80f/we_are_moderators_of_rwatchpeopledie_amaa_just/"] +--- +# old layout +match: ^(old|np)\.reddit\.com$ +selector: +  - description: Show more comments. +    selector: span.morecomments a +    # reddit’s javascript ignores events if too frequent +    throttle: 500 +    urls: ["https://old.reddit.com/r/subredditcancer/comments/b2b80f/we_are_moderators_of_rwatchpeopledie_amaa_just/"] +--- +match: ^www\.youtube\.com$ +selector: +  - description: Expand single comment. +    selector: ytd-comment-thread-renderer span[slot=more-button] +    urls: ["https://www.youtube.com/watch?v=udtFqQuBFSc"] +  - description: Show more comment thread replies. +    selector: div.ytd-comment-replies-renderer > yt-next-continuation > paper-button +    urls: ["https://www.youtube.com/watch?v=Lov0T3eXI2k"] +    multi: True +--- +match: ^www\.patreon\.com$ +selector: +  - description: Load more comments. +    selector: div[data-tag=post-card] button[data-tag=loadMoreCommentsCta] +    urls: ["https://www.patreon.com/posts/what-im-on-22124040"] +--- +match: ^(www\.)?gab\.com$ +selector: +  - description: Load more posts. +    selector: div.item-list[role=feed] button.load-more +    multi: True +    urls: ["https://gab.com/gab"] +--- +match: ^(www\.)?github\.com$ +selector: +  - description: Show hidden issue items. +    urls: ["https://github.com/dominictarr/event-stream/issues/116"] +    selector: div#discussion_bucket form.ajax-pagination-form button.ajax-pagination-btn +--- +match: ^www\.gamasutra\.com$ +selector: +    - description: Load more comments. +      urls: ["http://www.gamasutra.com/blogs/RaminShokrizade/20130626/194933/The_Top_F2P_Monetization_Tricks.php"] +      selector: div#dynamiccomments div.viewTopCmts a +--- +match: ^(www\.)?steamcommunity\.com$ +selector: +    - description: Load more content. +      urls: ["https://steamcommunity.com/app/252950/reviews/?p=1&browsefilter=toprated&filterLanguage=all"] +      selector: "#GetMoreContentBtn a" +      multi: True +--- +match: ^imgur\.com$ +selector: +    - description: Load more images of an album. +      urls: ["https://imgur.com/a/JG1yc"] +      selector: div.js-post-truncated a.post-loadall +    - description: Expand all comments. For snapshots. +      urls: ["https://imgur.com/a/JG1yc"] +      selector: div.comments-info span.comments-expand +    - description: Show bad replies. for snapshots. +      urls: ["https://imgur.com/gallery/jRzMfRG"] +      selector: div#comments div.bad-captions a.link +--- +match: ^(www\.)?vimeo\.com$ +selector: +    - description: Load more videos on profile page. +      urls: ["https://vimeo.com/dsam4a"] +      selector: div.profile_main div.profile-load-more__button--wrapper button +#    XXX: this works when using a non-headless browser, but does not otherwise +#    - description: Expand video comments +#      urls: ["https://vimeo.com/22439234"] +#      selector: section#comments button.iris_comment-more +#      multi: True diff --git a/crocoite/data/cookies.txt b/crocoite/data/cookies.txt new file mode 100644 index 0000000..6ac62c3 --- /dev/null +++ b/crocoite/data/cookies.txt @@ -0,0 +1,9 @@ +# Default cookies for crocoite. This file does *not* use Netscape’s cookie +# file format. Lines are expected to be in Set-Cookie format. +# And this line is a comment. + +# Reddit: +# skip over 18 prompt +over18=1; Domain=www.reddit.com +# skip quarantined subreddit prompt +_options={%22pref_quarantine_optin%22:true}; Domain=www.reddit.com diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js index 4d1a3d0..5a4f9f0 100644 --- a/crocoite/data/extract-links.js +++ b/crocoite/data/extract-links.js @@ -25,11 +25,26 @@ function isClickable (o) {  }  /* --- end copy&paste */ -let x = document.body.querySelectorAll('a[href]');  let ret = []; +['a[href]', 'area[href]'].forEach (function (s) { +	let x = document.querySelectorAll(s); +	for (let i=0; i < x.length; i++) { +		if (isClickable (x[i])) { +			ret.push (x[i].href); +		} +	} +}); + +/* If Chrome loads plain-text documents it’ll wrap them into <pre>. Check those + * for links as well, assuming the whole line is a link (i.e. list of links). */ +let x = document.querySelectorAll ('body > pre');  for (let i=0; i < x.length; i++) { -	if (isClickable (x[i])) { -		ret.push (x[i].href); +	if (isVisible (x[i])) { +		x[i].innerText.split ('\n').forEach (function (s) { +			if (s.match ('^https?://')) { +				ret.push (s); +			} +		});  	}  }  return ret; /* immediately return results, for use with Runtime.evaluate() */ diff --git a/crocoite/data/screenshot.js b/crocoite/data/screenshot.js new file mode 100644 index 0000000..a9a41e1 --- /dev/null +++ b/crocoite/data/screenshot.js @@ -0,0 +1,20 @@ +/* Find and scrollable full-screen elements and return their actual size + */ +(function () { +/* limit the number of elements queried */ +let elem = document.querySelectorAll ('body > div'); +let ret = []; +for (let i = 0; i < elem.length; i++) { +	let e = elem[i]; +	let s = window.getComputedStyle (e); +	if (s.getPropertyValue ('position') == 'fixed' && +			s.getPropertyValue ('overflow') == 'auto' && +			s.getPropertyValue ('left') == '0px' && +			s.getPropertyValue ('right') == '0px' && +			s.getPropertyValue ('top') == '0px' && +			s.getPropertyValue ('bottom') == '0px') { +		ret.push (e.scrollHeight); +	} +} +return ret; /* immediately return results, for use with Runtime.evaluate() */ +})(); diff --git a/crocoite/data/scroll.js b/crocoite/data/scroll.js index 13e856d..be88edf 100644 --- a/crocoite/data/scroll.js +++ b/crocoite/data/scroll.js @@ -1,23 +1,38 @@  /*	Continuously scrolls the page   */ -var __crocoite_stop__ = false;  (function(){ -function scroll (event) { -	if (__crocoite_stop__) { -		return false; -	} else { +class Scroll { +	constructor (options) { +		this.scrolled = new Map (); +		this.interval = window.setInterval (this.scroll.bind (this), 200); +	} + +	stop() { +		window.clearInterval (this.interval); +		window.scrollTo (0, 0); +		this.scrolled.forEach (function (value, key, map) { +			key.scrollTop = value; +		}); +	} +	/* save initial scroll state */ +	save(obj) { +		if (!this.scrolled.has (obj)) { +			this.scrolled.set (obj, obj.scrollTop); +		} +	} +	/* perform a single scroll step */ +	scroll (event) {  		window.scrollBy (0, window.innerHeight/2); -		document.querySelectorAll ('*').forEach ( +		document.querySelectorAll ('html body *').forEach (  			function (d) { -				if (d.clientHeight < d.scrollHeight) { +				if (d.scrollHeight-d.scrollTop > d.clientHeight) { +					this.save (d);  					d.scrollBy (0, d.clientHeight/2);  				} -			}); +			}.bind (this));  		return true;  	}  } -function onload (event) { -    window.setInterval (scroll, 200); -} -document.addEventListener("DOMContentLoaded", onload); -}()); + +return Scroll; +}()) | 
