summaryrefslogtreecommitdiff
path: root/corpus/arwiki
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-11-30 14:19:00 +0100
committerLars-Dominik Braun <lars@6xq.net>2019-11-30 14:19:00 +0100
commit810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9 (patch)
treef3db99cc0870af583c4b1d7d3f2376e943014fc1 /corpus/arwiki
parentfeb84c69b4e3e7294f69456b50a1fb678566cf24 (diff)
downloadlulua-810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9.tar.gz
lulua-810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9.tar.bz2
lulua-810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9.zip
Add missing corpuse metadata files
Diffstat (limited to 'corpus/arwiki')
-rw-r--r--corpus/arwiki/metadata.yaml8
1 files changed, 8 insertions, 0 deletions
diff --git a/corpus/arwiki/metadata.yaml b/corpus/arwiki/metadata.yaml
new file mode 100644
index 0000000..2a1ff72
--- /dev/null
+++ b/corpus/arwiki/metadata.yaml
@@ -0,0 +1,8 @@
+source:
+ name: Arabic Wikipedia
+ url: https://ar.wikipedia.org/
+extractor:
+ name: wikiextractor
+ url: https://github.com/attardi/wikiextractor/tree/3162bb6c3c9ebd2d15be507aa11d6fa818a454ac
+date: 2019-07-01
+count: [857386, articles]