Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply grammar rules to token replacement values #49

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 120 additions & 9 deletions OSRMTextInstructions/OSRMTextInstructions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,55 @@ import MapboxDirections

// Will automatically read localized Instructions.plist
let OSRMTextInstructionsStrings = NSDictionary(contentsOfFile: Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Instructions", ofType: "plist")!)!
let OSRMTextInstructionsGrammar: NSDictionary? = {
guard let path = Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Grammar", ofType: "plist") else {
return nil
}

return NSDictionary(contentsOfFile: path)
}()

extension NSRegularExpression.Options {
init(javaScriptFlags: String) {
var options: NSRegularExpression.Options = []
for flag in javaScriptFlags.characters {
switch flag {
case "g":
break
case "i":
options.insert(.caseInsensitive)
case "m":
options.insert(.anchorsMatchLines)
case "u":
// Character classes are always Unicode-aware in ICU regular expressions.
options.insert(.useUnicodeWordBoundaries)
case "y":
break
default:
break
}
}
self.init(rawValue: options.rawValue)
}
}

protocol Tokenized {
associatedtype T

/**
Replaces `{tokens}` in the receiver using the given closure.
*/
func replacingTokens(using interpolator: ((TokenType) -> T)) -> T
func replacingTokens(using interpolator: ((TokenType, String?) -> T)) -> T

func inflected(into variant: String, version: String) -> T
}

extension String: Tokenized {
public var sentenceCased: String {
return String(characters.prefix(1)).uppercased() + String(characters.dropFirst())
}

public func replacingTokens(using interpolator: ((TokenType) -> String)) -> String {
public func replacingTokens(using interpolator: ((TokenType, String?) -> String)) -> String {
let scanner = Scanner(string: self)
scanner.charactersToBeSkipped = nil
var result = ""
Expand All @@ -38,9 +71,17 @@ extension String: Tokenized {
continue
}

var variant: NSString?
if scanner.scanString(":", into: nil) {
guard scanner.scanUpTo("}", into: &variant) else {
result += ":"
continue
}
}

if scanner.scanString("}", into: nil) {
if let tokenType = TokenType(description: token! as String) {
result += interpolator(tokenType)
result += interpolator(tokenType, variant as String?)
} else {
result += "{\(token!)}"
}
Expand All @@ -59,10 +100,34 @@ extension String: Tokenized {
}
return result
}

func inflected(into variant: String, version: String) -> String {
guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
return self
}

guard let rules = grammar[variant] as? [[String]] else {
return self
}

var grammaticalReplacement = " \(self) "
var regularExpressionOptions: NSRegularExpression.Options = []
if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
let flags = meta["regExpFlags"] {
regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
}

for rule in rules {
let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
grammaticalReplacement = regularExpression.stringByReplacingMatches(in: grammaticalReplacement, options: [], range: NSRange(location: 0, length: grammaticalReplacement.characters.count), withTemplate: rule[1])
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this method replace all matches not the first only (JS RegExp g flag is always on)? I hope this will not cause problems but this should be documented in OSRMTI Grammar feature description.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the NSRegularExpression.stringByReplacingMatches(in:options:range:withTemplate:) method always replaces all matches. Replacing just one occurrence would be a more manual affair, either finding the first matching substring and replacing it, or enumerating the matches but stopping the enumeration after the first iteration.

I was thinking that the presence of ^ in each of the regular expression patterns would avoid issues where a rule unexpected matches multiple times, but that would be a brittle assumption unless we remove the ^ anchors from the JSON file and add them to the patterns at runtime.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, adding ^ at runtime is bad idea. So we just need to warn developers in Grammar description about implicitly turned on g flag in Swift and perhaps add some tests for this

}

return grammaticalReplacement.trimmingCharacters(in: .whitespaces)
}
}

extension NSAttributedString: Tokenized {
public func replacingTokens(using interpolator: ((TokenType) -> NSAttributedString)) -> NSAttributedString {
public func replacingTokens(using interpolator: ((TokenType, String?) -> NSAttributedString)) -> NSAttributedString {
let scanner = Scanner(string: string)
scanner.charactersToBeSkipped = nil
let result = NSMutableAttributedString()
Expand All @@ -78,12 +143,21 @@ extension NSAttributedString: Tokenized {

var token: NSString?
guard scanner.scanUpTo("}", into: &token) else {
result.append(NSAttributedString(string: "}"))
continue
}

var variant: NSString?
if scanner.scanString(":", into: nil) {
guard scanner.scanUpTo("}", into: &variant) else {
result.append(NSAttributedString(string: "}"))
continue
}
}

if scanner.scanString("}", into: nil) {
if let tokenType = TokenType(description: token! as String) {
result.append(interpolator(tokenType))
result.append(interpolator(tokenType, variant as String?))
}
} else {
result.append(NSAttributedString(string: token! as String))
Expand All @@ -101,6 +175,34 @@ extension NSAttributedString: Tokenized {
}
return result as NSAttributedString
}

@nonobjc func inflected(into variant: String, version: String) -> NSAttributedString {
guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
return self
}

guard let rules = grammar[variant] as? [[String]] else {
return self
}

let grammaticalReplacement = NSMutableAttributedString(string: " ")
grammaticalReplacement.append(self)
grammaticalReplacement.append(NSAttributedString(string: " "))

var regularExpressionOptions: NSRegularExpression.Options = []
if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
let flags = meta["regExpFlags"] {
regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
}

for rule in rules {
let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
regularExpression.replaceMatches(in: grammaticalReplacement.mutableString, options: [], range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length), withTemplate: rule[1])
}

grammaticalReplacement.mutableString.replaceOccurrences(of: "^ +| +$", with: "", options: .regularExpression, range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length))
return grammaticalReplacement
}
}

public class OSRMInstructionFormatter: Formatter {
Expand Down Expand Up @@ -323,15 +425,21 @@ public class OSRMInstructionFormatter: Formatter {
let attributedName = NSAttributedString(string: name, attributes: attrs)
let attributedRef = NSAttributedString(string: ref, attributes: attrs)
let phrase = NSAttributedString(string: self.phrase(named: .nameWithCode), attributes: attrs)
wayName = phrase.replacingTokens(using: { (tokenType) -> NSAttributedString in
wayName = phrase.replacingTokens(using: { (tokenType, variant) -> NSAttributedString in
var replacement: NSAttributedString
switch tokenType {
case .wayName:
return modifyValueByKey?(.wayName, attributedName) ?? attributedName
replacement = attributedName
case .code:
return modifyValueByKey?(.code, attributedRef) ?? attributedRef
replacement = attributedRef
default:
fatalError("Unexpected token type \(tokenType) in name-and-ref phrase")
}

if let variant = variant {
replacement = replacement.inflected(into: variant, version: version)
}
return modifyValueByKey?(tokenType, replacement) ?? replacement
})
} else if let ref = ref, isMotorway, let decimalRange = ref.rangeOfCharacter(from: .decimalDigits), !decimalRange.isEmpty {
let attributedRef = NSAttributedString(string: ref, attributes: attrs)
Expand Down Expand Up @@ -411,7 +519,7 @@ public class OSRMInstructionFormatter: Formatter {
if step.finalHeading != nil { bearing = Int(step.finalHeading! as Double) }

// Replace tokens
let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType) -> NSAttributedString in
let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType, variant) -> NSAttributedString in
var replacement: String
switch tokenType {
case .code: replacement = step.codes?.first ?? ""
Expand All @@ -430,6 +538,9 @@ public class OSRMInstructionFormatter: Formatter {
if tokenType == .wayName {
return wayName // already modified above
} else {
if let variant = variant {
replacement = replacement.inflected(into: variant, version: version)
}
let attributedReplacement = NSAttributedString(string: replacement, attributes: attrs)
return modifyValueByKey?(tokenType, attributedReplacement) ?? attributedReplacement
}
Expand Down
2 changes: 1 addition & 1 deletion OSRMTextInstructionsTests/OSRMTextInstructionsTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class OSRMTextInstructionsTests: XCTestCase {
let fixtureOptions = json["options"] as! [String: String]

let expectedValue = (json["phrases"] as! [String: String])["en"]
let actualValue = phrase?.replacingTokens(using: { (tokenType) -> String in
let actualValue = phrase?.replacingTokens(using: { (tokenType, variant) -> String in
var replacement: String?
switch tokenType {
case .firstInstruction:
Expand Down
12 changes: 10 additions & 2 deletions OSRMTextInstructionsTests/TokenTests.swift
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import XCTest
import OSRMTextInstructions
@testable import OSRMTextInstructions

class TokenTests: XCTestCase {
func testReplacingTokens() {
XCTAssertEqual("Dead Beef", "Dead Beef".replacingTokens { _ in "" })
XCTAssertEqual("Food", "F{ref}{ref}d".replacingTokens { _ in "o" })

XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType) -> String in
XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType, variant) -> String in
switch tokenType {
case .modifier:
return "left"
Expand All @@ -19,8 +19,16 @@ class TokenTests: XCTestCase {
})

XCTAssertEqual("{👿}", "{👿}".replacingTokens { _ in "👼" })
XCTAssertEqual("{👿:}", "{👿:}".replacingTokens { _ in "👼" })
XCTAssertEqual("{👿:💣}", "{👿:💣}".replacingTokens { _ in "👼" })
XCTAssertEqual("{", "{".replacingTokens { _ in "🕳" })
XCTAssertEqual("{💣", "{💣".replacingTokens { _ in "🕳" })
XCTAssertEqual("}", "}".replacingTokens { _ in "🕳" })
}

func testInflectingStrings() {
if Bundle(for: OSRMInstructionFormatter.self).preferredLocalizations.contains(where: { $0.starts(with: "ru") }) {
XCTAssertEqual("Бармалееву улицу", "Бармалеева улица".inflected(into: "accusative", version: "v5"))
}
}
}
16 changes: 15 additions & 1 deletion json2plist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# Transform select osrm-text-instructions language files from json to plist
git submodule init
git submodule update
cd "./osrm-text-instructions/languages/translations/" || exit 1

cd "./osrm-text-instructions/languages/translations/" || exit 1
for file in ./*; do
if [ "$file" = "./en.json" ]; then
LANGUAGE="Base"
Expand All @@ -18,4 +18,18 @@ for file in ./*; do
plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Instructions.plist"
done

cd "../grammar/" || exit 1
for file in ./*; do
if [ "$file" = "./en.json" ]; then
LANGUAGE="Base"
else
LANGUAGE=$(basename $file)
LANGUAGE=${LANGUAGE%.json}
fi

LANGUAGE_DIR="${BUILT_PRODUCTS_DIR:-../../../OSRMTextInstructions/}/${UNLOCALIZED_RESOURCES_FOLDER_PATH:-}/${LANGUAGE}.lproj"
mkdir -p "${LANGUAGE_DIR}"
plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Grammar.plist"
done

cd - || exit 1