Skip to content

Commit

Permalink
add russian names to geo data
Browse files Browse the repository at this point in the history
  • Loading branch information
anmarchenko committed Sep 3, 2024
1 parent 1b26a4b commit 83921cb
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 50 deletions.
6 changes: 5 additions & 1 deletion lib/hamster_travel/geo/geonames.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ defmodule HamsterTravel.Geo.Geonames do
def import do
Logger.info("Starting geonames data import...")

# import countries first
Countries.import()

Enum.each(Geo.list_country_iso_codes(), &Features.import/1)
# import features (regions and cities) and translations for each country
Enum.each(Geo.list_country_iso_codes(), fn country_code ->
Features.import(country_code)
end)
end
end
69 changes: 30 additions & 39 deletions lib/hamster_travel/geo/geonames/client.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,74 +9,65 @@ defmodule HamsterTravel.Geo.Geonames.Client do
case Req.get("#{@base_url}/countryInfo.txt", options()) do
{:ok, %Req.Response{body: body} = resp} ->
if resp.status < 400 do
Logger.info("Countries downloaded")
{:ok, body}
else
:telemetry.execute(
[:hamster_travel, :geonames, :fetch_countries],
%{error: 1},
%{
reason: "status_#{resp.status}"
}
)

Logger.error("Failed to download countries: #{inspect(resp)}")
{:error, "HTTP error: #{resp.status}"}
end

{:error, reason} = error_tuple ->
:telemetry.execute(
[:hamster_travel, :geonames, :fetch_countries],
%{error: 1},
%{
reason: "network"
}
)

Logger.error("Failed to download countries: #{inspect(reason)}")
error_tuple
end
end

def fetch_features_for_country(iso_code) do
Logger.info("Downloading features for #{iso_code}...")
def fetch_features_for_country(country_code) do
Logger.info("Downloading features for #{country_code}...")

case Req.get("#{@base_url}/#{iso_code}.zip", options()) do
case Req.get("#{@base_url}/#{country_code}.zip", options()) do
{:ok, resp} ->
if resp.status < 400 do
parse_features_response(resp)
else
:telemetry.execute(
[:hamster_travel, :geonames, :fetch_features],
%{error: 1},
%{
reason: "status_#{resp.status}"
}
)
Logger.info("Features downloaded for #{country_code}")

Logger.error("Failed to download features for #{iso_code}: #{inspect(resp)}")
parse_geonames_archive(resp)
else
Logger.error("Failed to download features for #{country_code}: #{inspect(resp)}")
{:error, "HTTP error: #{resp.status}"}
end

{:error, reason} = error_tuple ->
:telemetry.execute(
[:hamster_travel, :geonames, :fetch_features],
%{error: 1},
%{
reason: "network"
}
)
Logger.error("Failed to download features for #{country_code}: #{inspect(reason)}")
error_tuple
end
end

def fetch_alternate_names_for_country(country_code) do
Logger.info("Downloading alternate names for #{country_code}...")

Logger.error("Failed to download features for #{iso_code}: #{inspect(reason)}")
case Req.get("#{@base_url}/alternatenames/#{country_code}.zip", options()) do
{:ok, resp} ->
if resp.status < 400 do
Logger.info("Alternate names downloaded for #{country_code}")

parse_geonames_archive(resp)
else
Logger.error("Failed to download alternate names for #{country_code}: #{inspect(resp)}")
{:error, "HTTP error: #{resp.status}"}
end

{:error, reason} = error_tuple ->
Logger.error("Failed to download alternate names for #{country_code}: #{inspect(reason)}")
error_tuple
end
end

defp parse_features_response(%Req.Response{body: [{~c"readme.txt", _}, {_, csv}]}) do
defp parse_geonames_archive(%Req.Response{body: [{~c"readme.txt", _}, {_, csv}]}) do
{:ok, csv}
end

# suboptimal - test-only case in production code!!
defp parse_features_response(%Req.Response{body: csv}), do: {:ok, csv}
defp parse_geonames_archive(%Req.Response{body: csv}), do: {:ok, csv}

defp options do
Application.get_env(:hamster_travel, :geonames_req_options, [])
Expand Down
10 changes: 6 additions & 4 deletions lib/hamster_travel/geo/geonames/features.ex
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
defmodule HamsterTravel.Geo.Geonames.Features do
alias HamsterTravel.Geo.Geonames.{Client, FeaturesImporter}
alias HamsterTravel.Geo.Geonames.{Client, FeaturesImporter, Translations}
require Logger

def import(country_code) do
case Client.fetch_features_for_country(country_code) do
{:ok, features} ->
FeaturesImporter.process(features, country_code)
download_translations = Task.async(fn -> Translations.fetch(country_code) end)

with {:ok, features} <- Client.fetch_features_for_country(country_code),
{:ok, translations} <- Task.await(download_translations) do
FeaturesImporter.process(features, country_code, translations)
else
_ ->
nil
end
Expand Down
16 changes: 10 additions & 6 deletions lib/hamster_travel/geo/geonames/features_importer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule HamsterTravel.Geo.Geonames.FeaturesImporter do

require Logger

def process(features, country_code) do
def process(features, country_code, translations) do
Logger.info("Importing features for #{country_code}...")

features =
Expand All @@ -26,14 +26,16 @@ defmodule HamsterTravel.Geo.Geonames.FeaturesImporter do
features
|> Enum.reduce(
%FeaturesImportData{regions: [], cities: [], valid_region_codes: valid_region_codes},
&parse_feature/2
fn feature, import_data ->
parse_feature(feature, import_data, translations)
end
)

{regions_count, _} =
Repo.insert_all(
Region,
features.regions,
on_conflict: {:replace_all_except, [:id, :inserted_at, :name_ru]},
on_conflict: {:replace_all_except, [:id, :inserted_at]},
conflict_target: :geonames_id
)

Expand All @@ -48,7 +50,7 @@ defmodule HamsterTravel.Geo.Geonames.FeaturesImporter do
Repo.insert_all(
City,
chunk,
on_conflict: {:replace_all_except, [:id, :inserted_at, :name_ru]},
on_conflict: {:replace_all_except, [:id, :inserted_at]},
conflict_target: :geonames_id
)

Expand Down Expand Up @@ -80,7 +82,8 @@ defmodule HamsterTravel.Geo.Geonames.FeaturesImporter do
_,
_
],
%FeaturesImportData{} = import_data
%FeaturesImportData{} = import_data,
translations
) do
admin1_code = nilify_invalid_region_code(admin1_code, import_data.valid_region_codes)

Expand All @@ -89,6 +92,7 @@ defmodule HamsterTravel.Geo.Geonames.FeaturesImporter do

geo_map = %{
name: name,
name_ru: Map.get(translations, geoname_id),
country_code: country_code,
region_code: admin1_code,
geonames_id: geoname_id,
Expand Down Expand Up @@ -117,7 +121,7 @@ defmodule HamsterTravel.Geo.Geonames.FeaturesImporter do
end
end

defp parse_feature(_, import_data) do
defp parse_feature(_, import_data, _) do
import_data
end

Expand Down
56 changes: 56 additions & 0 deletions lib/hamster_travel/geo/geonames/translations.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
defmodule HamsterTravel.Geo.Geonames.Translations do
require Logger

alias HamsterTravel.Geo.Geonames.Client

def fetch(country_code) do
Logger.info("Fetching translations for #{country_code}...")

case Client.fetch_alternate_names_for_country(country_code) do
{:ok, alternative_names} ->
translations =
alternative_names
|> String.split("\n")
|> Enum.map(&String.split(&1, "\t"))
|> Enum.map(&parse_translation/1)
|> Enum.reject(&is_nil/1)
|> Enum.reduce(%{}, &store_translation/2)

{:ok, translations}

_ ->
{:error, :network}
end
end

defp parse_translation([
_,
geonames_id,
lang,
altname,
is_preferred,
is_short,
is_colloquial,
is_historic,
_,
_
]) do
if lang != "ru" || is_short == "1" || is_colloquial == "1" || is_historic == "1" do
nil
else
%{geonames_id: geonames_id, name_ru: altname, preferred: is_preferred == "1"}
end
end

defp parse_translation(_), do: nil

def store_translation(translation, acc) do
Map.update(acc, translation.geonames_id, translation.name_ru, fn existing ->
if translation.preferred do
translation.name_ru
else
existing
end
end)
end
end

0 comments on commit 83921cb

Please sign in to comment.