From 013d15a127a93a60fd27b7554b31a32fe91a39c1 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Tue, 17 Oct 2017 00:43:02 +0300 Subject: [PATCH 1/2] Grammar notes update for Swift implementation --- Grammar.md | 21 +++++++++++++++++---- languages/grammar/ru.json | 21 +++++++-------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Grammar.md b/Grammar.md index 8a6d8874d..127899452 100644 --- a/Grammar.md +++ b/Grammar.md @@ -15,7 +15,7 @@ The quite universal and simplier solution is the changing street names with the The required grammatical case should be specified right in instruction's substitution variables: - `{way_name}` and `{rotary_name}` variables in translated instructions should be appended with required grammar case name after colon: `{way_name:accusative}` for example -- [languages/grammar](languages/grammar/) folder should contain language-specific JSON file with regular expressions for specified grammar case: +- [languages/grammar](languages/grammar/) folder should contain language-specific JSON file with regular expressions for specified grammatical case: ```json { "v5": { @@ -28,9 +28,21 @@ The required grammatical case should be specified right in instruction's substit - Instruction text formatter ([index.js](index.js) in this module) should: - check `{way_name}` and `{rotary_name}` variables for optional grammar case after colon: `{way_name:accusative}` - find appropriate regular expressions block for target language and specified grammar case - - call standard [string replace with regular expression](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) for each expression in block passing result from previous call to the next; the first call should enclose original street name with whitespaces to make parsing words in names a bit simplier. + - call standard [string replace with regular expression](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) for each expression in block passing result from previous call to the next; the first call should enclose original street name with whitespaces to make parsing several words inside name a bit simplier. - Strings replacement with regular expression is available in almost all other programming language and so this should not be the problem for other code used OSRM Text Instructions' data only. -- If there is no regular expression matched source name (that's for names from foreign country for example), original name is returned without changes. This is also expected behavior of standard [string replace with regular expression](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace). And the same behavior is expected in case of missing grammar JSON file or grammar case inside it. +- Grammar JSON could have [regular expression flags in JS notation](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp): +```json +{ + "meta": { + "regExpFlags": "ig" + }, +``` +- Please note, not all JS regular expression flags could be supported in other languages. + For example, [OSRM Text Instructions for Swift](https://github.com/Project-OSRM/osrm-text-instructions.swift/) don't support "non-global match" and so always supposes `g` flag turned on. + So if some regular expressions suppose stopping after their match, please include `^` and/or `$` into patterns for exact matching or return "finished" string in replace expression without enclosing whitespaces. +- If there is no regular expression matched source name (that's for names from foreign country for example), original name is returned without changes. + This is also expected behavior of standard [string replace with regular expression](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace). + And the same behavior is expected in case of missing grammar JSON file or grammar case inside it. ### Example @@ -50,4 +62,5 @@ Russian _"Большая Монетная улица"_ street from St Petersburg - Russian regular expressions are based on [Garmin Russian TTS voices update](https://github.com/yuryleb/garmin-russian-tts-voices) project; see [file with regular expressions to apply to source text before pronouncing by TTS](https://github.com/yuryleb/garmin-russian-tts-voices/blob/master/src/Pycckuu__Milena%202.10/RULESET.TXT). - There is another grammar-supporting module - [jquery.i18n](https://github.com/wikimedia/jquery.i18n) - but unfortunately it has very poor implementation in part of grammatical case applying and is supposed to work with single words only. -- Actually it would be great to get street names also in target language not from default OSM `name` only - there are several multi-lingual countries supporting several `name:` names for streets. But this the subject to address to [OSRM engine](https://github.com/Project-OSRM/osrm-backend) first. +- Actually it would be great to get street names also in target language not from default OSM `name` only - there are several multi-lingual countries supporting several `name:` names for streets. + But this the subject to address to [OSRM engine](https://github.com/Project-OSRM/osrm-backend). diff --git a/languages/grammar/ru.json b/languages/grammar/ru.json index 91e69485b..23006a329 100644 --- a/languages/grammar/ru.json +++ b/languages/grammar/ru.json @@ -370,12 +370,9 @@ ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1му $2му шоссе "], ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1му $2му шоссе "], - [" Третому ", " Третьему "], - [" третому ", " третьему "], - ["жому ", "жьему "], - ["жой ", "жей "], - ["чому ", "чьему "], - ["чой ", "чей "] + [" ([Тт])ретому ", " $1ретьему "], + ["([жч])ому ", "$1ьему "], + ["([жч])ой ", "$1ей "] ], "genitive": [ ["^ (\\S+)ая [Аа]ллея ", " $1ой аллеи "], @@ -663,10 +660,8 @@ ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1го $2го шоссе "], ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1го $2го шоссе "], - [" Третого ", " Третьего "], - [" третого ", " третьего "], - ["жого ", "жьего "], - ["чого ", "чьего "] + [" ([Тт])ретого ", " $1ретьего "], + ["([жч])ого ", "$1ьего "] ], "prepositional": [ ["^ (\\S+)ая [Аа]ллея ", " $1ой аллее "], @@ -954,10 +949,8 @@ ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1м $2м шоссе "], ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1м $2м шоссе "], - [" Третом ", " Третьем "], - [" третом ", " третьем "], - ["жом ", "жьем "], - ["чом ", "чьем "] + [" ([Тт])ретом ", " $1ретьем "], + ["([жч])ом ", "$1ьем "] ] } } From 30816aa5db038d7c33ee4de0c84275272e824d3d Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Tue, 17 Oct 2017 23:24:27 +0300 Subject: [PATCH 2/2] Missing dash in numerals endings --- languages/grammar/ru.json | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/languages/grammar/ru.json b/languages/grammar/ru.json index 23006a329..b6948e7b9 100644 --- a/languages/grammar/ru.json +++ b/languages/grammar/ru.json @@ -649,16 +649,16 @@ ["^ (\\S+ье) ([Пп]олу)?[Кк]ольцо ", " $1го $2кольца "], ["^ (\\S+[ео])е (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1го $2го $3кольца "], ["^ (\\S+ье) (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1го $2го $3кольца "], - ["^ (\\d+)-е (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1го $2го $3кольца "], - ["^ (\\d+)-е (\\S+ье) ([Пп]олу)?[Кк]ольцо ", " $1го $2го $3кольца "], + ["^ (\\d+)-е (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1-го $2го $3кольца "], + ["^ (\\d+)-е (\\S+ье) ([Пп]олу)?[Кк]ольцо ", " $1-го $2го $3кольца "], ["^ ([Пп]олу)?[Кк]ольцо ", " $1кольца "], ["^ (\\S+[ео])е [Шш]оссе ", " $1го шоссе "], ["^ (\\S+ье) [Шш]оссе ", " $1го шоссе "], ["^ (\\S+[ео])е (\\S+[ео])е [Шш]оссе ", " $1го $2го шоссе "], ["^ (\\S+ье) (\\S+[ео])е [Шш]оссе ", " $1го $2го шоссе "], - ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1го $2го шоссе "], - ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1го $2го шоссе "], + ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1-го $2го шоссе "], + ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1-го $2го шоссе "], [" ([Тт])ретого ", " $1ретьего "], ["([жч])ого ", "$1ьего "] @@ -938,16 +938,16 @@ ["^ (\\S+ье) ([Пп]олу)?[Кк]ольцо ", " $1м $2кольце "], ["^ (\\S+[ео])е (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1м $2м $3кольце "], ["^ (\\S+ье) (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1м $2м $3кольце "], - ["^ (\\d+)-е (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1м $2м $3кольце "], - ["^ (\\d+)-е (\\S+ье) ([Пп]олу)?[Кк]ольцо ", " $1м $2м $3кольце "], + ["^ (\\d+)-е (\\S+[ео])е ([Пп]олу)?[Кк]ольцо ", " $1-м $2м $3кольце "], + ["^ (\\d+)-е (\\S+ье) ([Пп]олу)?[Кк]ольцо ", " $1-м $2м $3кольце "], ["^ ([Пп]олу)?[Кк]ольцо ", " $1кольце "], ["^ (\\S+[ео])е [Шш]оссе ", " $1м шоссе "], ["^ (\\S+ье) [Шш]оссе ", " $1м шоссе "], ["^ (\\S+[ео])е (\\S+[ео])е [Шш]оссе ", " $1м $2м шоссе "], ["^ (\\S+ье) (\\S+[ео])е [Шш]оссе ", " $1м $2м шоссе "], - ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1м $2м шоссе "], - ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1м $2м шоссе "], + ["^ (\\d+)-е (\\S+[ео])е [Шш]оссе ", " $1-м $2м шоссе "], + ["^ (\\d+)-е (\\S+ье) [Шш]оссе ", " $1-м $2м шоссе "], [" ([Тт])ретом ", " $1ретьем "], ["([жч])ом ", "$1ьем "]