Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use cldr #24

Merged
merged 2 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@

Elixir library for accessing ISO3166-1 (country) and ISO3166-2 (subdivision) data as well as geoname data for cities. Source data comes from the upstream [debian iso-codes](https://salsa.debian.org/iso-codes-team/iso-codes) package and the [Geonames](http://www.geonames.org/) project.



### Countries

The data for countries comes primarily from the [debian iso-codes](https://salsa.debian.org/iso-codes-team/iso-codes) package. The data file for that is stored in `priv/iso_3166-1.json`. We do
manually add some data that is missing from upstream. Overrides can be found in `priv/override/iso_3166-1.json`

### Subdivisions

The data for subdivisions comes primarily from the [debian iso-codes](https://salsa.debian.org/iso-codes-team/iso-codes) package. The data file for that is stored in `priv/iso_3166-2.json`. The
subdivision names in this file are mostly in local language (i.e. Wien instead of Vienna). English translations are obtained from Wikipedia using a scraper. The translations found in `priv/iso_3166-2.en-translations.json` are used when available instead of the original name.
The data for subdivisions comes primarily from the [debian iso-codes](https://salsa.debian.org/iso-codes-team/iso-codes) package. The data file for that is stored in `priv/iso_3166-2.json`. The subdivision names in this file are sometimes in local language (i.e. Berne instead of Bern). English translations are obtained from Cldr and Wikipedia.

We also add some data manually that is missing from upstream. Overrides can be found in `priv/override/iso_3166-2.json`

Expand Down
4 changes: 4 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import Config

config :ex_cldr,
default_backend: Location.Cldr
3 changes: 3 additions & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import Config

config :ex_cldr, default_backend: Location.Cldr

if config_env() != :prod do
config :location, :lightweight, true
config :ex_cldr, default_backend: Location.Cldr
end
18 changes: 0 additions & 18 deletions lib/location/subdivision.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,16 @@ defmodule Location.Subdivision do
def load() do
ets = :ets.new(@ets_table, [:named_table])

translations = File.read!(translations_file()) |> Jason.decode!()

File.read!(source_file())
|> Jason.decode!()
|> Map.fetch!("3166-2")
|> Enum.each(fn entry ->
entry = translate_entry(translations, entry)
:ets.insert(ets, {entry["code"], to_struct(entry)})
end)

File.read!(restore_source_file())
|> Jason.decode!()
|> Enum.each(fn entry ->
entry = translate_entry(translations, entry)
:ets.insert(ets, {entry["code"], to_struct(entry)})
end)

Expand Down Expand Up @@ -65,16 +61,6 @@ defmodule Location.Subdivision do
}
end

defp translate_entry(translations, entry) do
case Map.get(translations, entry["code"]) do
nil ->
entry

translation ->
Map.put(entry, "name", translation)
end
end

defp source_file() do
Application.app_dir(:location, "priv/iso_3166-2.json")
end
Expand All @@ -83,10 +69,6 @@ defmodule Location.Subdivision do
Application.app_dir(:location, "priv/restore/iso_3166-2.json")
end

defp translations_file() do
Application.app_dir(:location, "priv/iso_3166-2.en-translations.json")
end

defp override_source_file() do
Application.app_dir(:location, "priv/override/iso_3166-2.json")
end
Expand Down
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ defmodule Location.MixProject do
[
{:jason, "~> 1.3"},
{:nimble_csv, "~> 1.1"},
{:ex_cldr_territories, "~> 2.9", only: [:dev, :test]},
{:floki, "~> 0.36.0", only: [:dev, :test]},
{:flow, "~> 1.0", only: [:dev, :test]}
]
Expand Down
4 changes: 4 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
%{
"cldr_utils": {:hex, :cldr_utils, "2.27.0", "a75d5cdaaf6b7432eb10f547e6abe635c94746985c5b78e35bbbd08b16473b6c", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.5", [hex: :certifi, repo: "hexpm", optional: true]}, {:decimal, "~> 1.9 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "516f601e28da10b8f1f3af565321c4e3da3b898a0b50a5e5be425eff76d587e1"},
"decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
"ex_cldr": {:hex, :ex_cldr, "2.39.2", "4a3a77797da8f900369822ea9353adfa035a5bbbbfff09b2d3d1b6fa461768e3", [:mix], [{:cldr_utils, "~> 2.25", [hex: :cldr_utils, repo: "hexpm", optional: false]}, {:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:gettext, "~> 0.19", [hex: :gettext, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: true]}], "hexpm", "02fd8913ef28d1b2a4190fd8016c2dec1f2291c9ce56c17d7649848c0261a6eb"},
"ex_cldr_territories": {:hex, :ex_cldr_territories, "2.9.0", "6db9bc0741688201f6730caa91805181ea4f3d40f3e56ea1bdd8c9ed73edaf73", [:mix], [{:ex_cldr, "~> 2.38", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "93ec0ae059a23680d418101e1972922efe7bdca7f177207a2a8932e7dd51d33f"},
"floki": {:hex, :floki, "0.36.2", "a7da0193538c93f937714a6704369711998a51a6164a222d710ebd54020aa7a3", [:mix], [], "hexpm", "a8766c0bc92f074e5cb36c4f9961982eda84c5d2b8e979ca67f5c268ec8ed580"},
"flow": {:hex, :flow, "1.2.4", "1dd58918287eb286656008777cb32714b5123d3855956f29aa141ebae456922d", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}], "hexpm", "874adde96368e71870f3510b91e35bc31652291858c86c0e75359cbdd35eb211"},
"gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"},
Expand Down
4 changes: 4 additions & 0 deletions mix_tasks/cldr.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
defmodule Location.Cldr do
require Cldr.Territory.Backend
use Cldr, default_locale: "en", locales: ["en"], providers: [Cldr.Territory]
end
100 changes: 0 additions & 100 deletions mix_tasks/scraper.ex

This file was deleted.

8 changes: 0 additions & 8 deletions mix_tasks/update_english_translations.ex

This file was deleted.

81 changes: 80 additions & 1 deletion mix_tasks/update_iso_data.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defmodule Mix.Tasks.UpdateIsoData do
require Logger
use Mix.Task

@countries_src "https://salsa.debian.org/iso-codes-team/iso-codes/-/raw/main/data/iso_3166-1.json"
Expand All @@ -14,7 +15,17 @@ defmodule Mix.Tasks.UpdateIsoData do
%{"3166-2" => new_subdivisions} = Jason.decode!(new_subdivisions)

new_subdivisions =
Enum.map(new_subdivisions, fn subdivision -> Map.delete(subdivision, "parent") end)
Enum.map(new_subdivisions, fn %{"code" => code} = subdivision ->
subdivision
|> Map.delete("parent")
|> Map.update!(
"name",
fn debian_name ->
short_code = String.replace(code, "-", "")
cldr_name(short_code) || manual_en_name(code, debian_name)
end
)
end)

new_subdivisions_codes = MapSet.new(new_subdivisions, fn %{"code" => code} -> code end)

Expand Down Expand Up @@ -51,4 +62,72 @@ defmodule Mix.Tasks.UpdateIsoData do
new_subdivisions = Jason.encode_to_iodata!(%{"3166-2" => new_subdivisions}, pretty: true)
File.write!(@subdivisions_dest, new_subdivisions)
end

defp cldr_name(short_code) do
with {:ok, en_name} <- Location.Cldr.Territory.from_subdivision_code(short_code) do
en_name
else
_ -> nil
end
end

# TODO: add these to CLDR https://github.com/pedberg-icu/cldr/blob/main/common/subdivisions/en.xml
# or https://salsa.debian.org/iso-codes-team/iso-codes/-/blob/main/iso_3166-2/en.po
manual_code_to_en_name = %{
# https://en.wikipedia.org/wiki/ISO_3166-2:ID
"ID-PD" => "Southwest Papua",
"ID-PE" => "Highland Papua",
"ID-PS" => "South Papua",
"ID-PT" => "Central Papua",
# https://en.wikipedia.org/wiki/ISO_3166-2:IS
"IS-HUG" => "Huna Settlement",
"IS-SKR" => "Skagafjordur",
# https://en.wikipedia.org/wiki/ISO_3166-2:KP
"KP-15" => "Kaesong",
# https://en.wikipedia.org/wiki/ISO_3166-2:KZ
"KZ-10" => "Abai",
"KZ-11" => "Akmola",
"KZ-15" => "Aktobe",
"KZ-19" => "Almaty",
"KZ-23" => "Atyrau",
"KZ-27" => "West Kazakhstan",
"KZ-31" => "Jambyl",
"KZ-33" => "Jetisu",
"KZ-35" => "Karaganda",
"KZ-39" => "Kostanay",
"KZ-43" => "Kyzylorda",
"KZ-47" => "Mangystau",
"KZ-55" => "Pavlodar",
"KZ-59" => "North Kazakhstan",
"KZ-61" => "Turkistan",
"KZ-62" => "Ulytau",
"KZ-63" => "East Kazakhstan",
"KZ-71" => "Astana",
"KZ-75" => "Almaty City",
"KZ-79" => "Shymkent"
}

for {code, en_name} <- manual_code_to_en_name do
defp manual_en_name(unquote(code), debian_name) do
warn_if_debian_got_english(unquote(code), unquote(en_name), debian_name)
end
end

defp manual_en_name(code, debian_name) do
Logger.warning(
"no translation override for #{code}, assuming Debian anme is English, please check: #{debian_name}"
)

debian_name
end

defp warn_if_debian_got_english(code, manual_name, debian_name) do
if manual_name == debian_name do
Logger.warning(
"Debian seems to have localized #{code}, the manual translation can be removed"
)
end

manual_name
end
end
Loading
Loading