From 41f342e12b975e785de9d755d38eb92cf38f5ec5 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 16 Nov 2019 13:40:39 +0100 Subject: Add OpenStreetMap label corpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract node labels (name:ar) from OpenStreetMap’s planet dump. Heavily leans towards a few common words (“street”, obviously), but we should be fine since the corpus is not that large. --- corpus/osm/metadata.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 corpus/osm/metadata.yaml (limited to 'corpus') diff --git a/corpus/osm/metadata.yaml b/corpus/osm/metadata.yaml new file mode 100644 index 0000000..e3aff73 --- /dev/null +++ b/corpus/osm/metadata.yaml @@ -0,0 +1,5 @@ +source: + name: OpenStreetMap Arabic Labels + url: https://planet.openstreetmap.org/ +date: 2019-11-04 +count: [376148, labels] -- cgit v1.2.3