Skip to content

Commit

Permalink
'Island' doesn't register as 'Iceland' (IS) anymore if there is no la…
Browse files Browse the repository at this point in the history
…nguage specified
  • Loading branch information
pleplenier committed Nov 5, 2024
1 parent f8487fe commit 0e2236c
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [5.2.3] - 2024-11-04

- "Pacific Island" no longer matches Iceland.
- "Island" no longer matches Iceland when language is not set.

## [5.2.2] - 2024-08-23

Expand Down
12 changes: 10 additions & 2 deletions geoconvert/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
DE_POSTCODE_RANGE,
NUTS_CODES_BY_COUNTRY,
all_nuts_regex,
ambiguous_countries,
br_postcode_regex,
br_state_code_regex,
br_state_name_regex,
Expand Down Expand Up @@ -413,8 +414,15 @@ def _full_name_to_country_code(text, lang, language_to_full_names):
# If the language is unknown, just do not use any language.
language_to_full_names = {}

for lang, full_names in language_to_full_names.items():
country_code = _full_name_to_country_code_for_lang(text, lang, full_names)
for language, full_names in language_to_full_names.items():
# When language is not set, we remove ambiguous countries names from the countries list
if not lang:
full_names = {
name: code
for name, code in full_names.items()
if name not in ambiguous_countries
}
country_code = _full_name_to_country_code_for_lang(text, language, full_names)
if country_code:
return country_code

Expand Down
1 change: 1 addition & 0 deletions geoconvert/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .capitals import capitals_de, capitals_en, capitals_fr, language_to_capital_names
from .countries import (
ambiguous_countries,
countries_de,
countries_en,
countries_fr,
Expand Down
5 changes: 5 additions & 0 deletions geoconvert/data/countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
"pacific island": None, # en
}

# We register here names matching common words in other languages to prevent
# matching them when language is not defined
ambiguous_countries = ["island"]


countries_fr = {
**special_countries,
**{
Expand Down
5 changes: 4 additions & 1 deletion tests/test_countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ class TestCountries:
("Rhode Island", {}, "US"), # en
("Pacific island", {}, None), # en
("Fiji/Pacific island", {}, "FJ"), # en
("island", {}, None), # en
# However, in cases where island is singular instead of plural,
# there can be confusion.
("Solomon Island Nationals", {}, "IS"), # en
("Solomon Island Nationals", {}, None), # en
# Any capitalization for lang works
("Germany", {"lang": "en"}, "DE"),
("Germany", {"lang": "En"}, "DE"),
Expand Down Expand Up @@ -166,6 +167,7 @@ def test_country_name_to_country_code_fr(self, input_data, expected):
("state of palestine", "PS"),
("palestine, state of", "PS"),
("Irak", None),
("island", None),
],
)
def test_country_name_to_country_code_en(self, input_data, expected):
Expand Down Expand Up @@ -194,6 +196,7 @@ def test_country_name_to_country_code_en(self, input_data, expected):
("Dschibuti-Stadt", "DJ"),
("elfenbeinküste und ecuador ", "CI"),
("Fiji/Pacific island", None), # en
("island", "IS"),
],
)
def test_country_name_to_country_code_de(self, input_data, expected):
Expand Down

0 comments on commit 0e2236c

Please sign in to comment.