summaryrefslogtreecommitdiff
path: root/corpus/bbcarabic
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-11-30 14:19:00 +0100
committerLars-Dominik Braun <lars@6xq.net>2019-11-30 14:19:00 +0100
commit810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9 (patch)
treef3db99cc0870af583c4b1d7d3f2376e943014fc1 /corpus/bbcarabic
parentfeb84c69b4e3e7294f69456b50a1fb678566cf24 (diff)
downloadlulua-810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9.tar.gz
lulua-810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9.tar.bz2
lulua-810c8ff0bea17214b4e4c5ce802ad89b5ad6e2c9.zip
Add missing corpuse metadata files
Diffstat (limited to 'corpus/bbcarabic')
-rw-r--r--corpus/bbcarabic/metadata.yaml7
1 files changed, 7 insertions, 0 deletions
diff --git a/corpus/bbcarabic/metadata.yaml b/corpus/bbcarabic/metadata.yaml
new file mode 100644
index 0000000..d1c06a5
--- /dev/null
+++ b/corpus/bbcarabic/metadata.yaml
@@ -0,0 +1,7 @@
+source:
+ name: BBC Arabic
+ url: http://www.bbc.com/arabic
+extractor:
+ name: Custom scripts
+date: 2019-07
+count: [149901, articles]