Skip to content

Commit

Permalink
'Pacific Island' no longer matches Iceland (IS)
Browse files Browse the repository at this point in the history
  • Loading branch information
pleplenier committed Nov 4, 2024
1 parent c64ba41 commit f8487fe
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

## [5.2.3] - 2024-11-04

- "Pacific Island" no longer matches Iceland.

## [5.2.2] - 2024-08-23

- Detection for "moldavie" as "MD" in French.
Expand Down
7 changes: 7 additions & 0 deletions geoconvert/data/countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
"haute vienne": "FR", # fr
}

# We want to make sure pacific island is not registered as IS (Iceland) in German.
# Other languages doesn't have issues with it and we shouldn't prevent them from retrieving
# other countries like FJ (Fiji) in "Fiji/Pacific Island"
special_countries_de = {
"pacific island": None, # en
}

countries_fr = {
**special_countries,
Expand Down Expand Up @@ -597,6 +603,7 @@


countries_de = {
**special_countries_de,
**special_countries,
**{
"agypten": "EG",
Expand Down
6 changes: 4 additions & 2 deletions geoconvert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ def safe_string(text):
'how are you'
>>> safe_string("l’ocean")
"l'ocean"
>>> safe_string('Fiji/Pacific Island')
'fiji pacific island'
"""
text = remove_accents(text)
# Replace "-" and ":" with a whitespace
text = re.sub(r"[-:]", " ", text)
# Replace "-", ":" and "/" with a whitespace
text = re.sub(r"[-:/]", " ", text)
# Replace weird '
text = re.sub(r"[ʼ]", "'", text)
# Only keep word or space characters as well as "_", and "'".
Expand Down
3 changes: 3 additions & 0 deletions tests/test_countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class TestCountries:
("Solomon Islands", {}, "SB"), # en
("Prince Edward Island", {}, "CA"), # en
("Rhode Island", {}, "US"), # en
("Pacific island", {}, None), # en
("Fiji/Pacific island", {}, "FJ"), # en
# However, in cases where island is singular instead of plural,
# there can be confusion.
("Solomon Island Nationals", {}, "IS"), # en
Expand Down Expand Up @@ -191,6 +193,7 @@ def test_country_name_to_country_code_en(self, input_data, expected):
(" Land der Hinrichtung : Deutschland", "DE"),
("Dschibuti-Stadt", "DJ"),
("elfenbeinküste und ecuador ", "CI"),
("Fiji/Pacific island", None), # en
],
)
def test_country_name_to_country_code_de(self, input_data, expected):
Expand Down

0 comments on commit f8487fe

Please sign in to comment.