From 84c3f69293fa79d752127410c7468038c907c96a Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 10 Dec 2017 12:31:07 +0100 Subject: Add distributed archiving Using celery. Also adds a plugin for the IRC bot sopel. Code still needs some love, but it should work. --- README.rst | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'README.rst') diff --git a/README.rst b/README.rst index 3a7aa7c..3d5af5f 100644 --- a/README.rst +++ b/README.rst @@ -66,3 +66,41 @@ also saved. This causes its own set of issues though: - JavaScript-based navigation does not work. +Distributed crawling +-------------------- + +Configure using celeryconfig.py + +.. code:: python + + broker_url = 'pyamqp://' + result_backend = 'rpc://' + warc_filename = '{domain}-{date}-{id}.warc.gz' + temp_dir = '/tmp/' + finished_dir = '/tmp/finished' + +Start a Celery worker:: + + celery -A crocoite.cli worker --loglevel=info + +Then queue archive job:: + + crocoite-standalone --distributed … + +Alternative: IRC bot using sopel_. Use contrib/celerycrocoite.py + +~/.sopel/default.cfg + +.. code:: ini + + [core] + nick = chromebot + host = irc.efnet.fr + port = 6667 + owner = someone + extra = /path/to/crocoite/contrib + enable = celerycrocoite + channels = #somechannel + +Then in #somechannel ``chromebot: ao `` + -- cgit v1.2.3