Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pronunciations refactoring #1377

Merged
merged 2 commits into from
Sep 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions tests/test_2_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,6 @@
import pytest

from wikidict import render
from wikidict.lang import pronunciation


@pytest.mark.parametrize(
"regexp, code, expected",
[
(pronunciation["ca"], "{{ca-pron|/as/}}", ["as"]),
(pronunciation["ca"], "{{ca-pron|or=/əɫ/}}", ["əɫ"]),
(pronunciation["ca"], "{{ca-pron|or=/əɫ/|occ=/eɫ/}}", ["əɫ"]),
(pronunciation["ca"], "{{ca-pron|q=àton|or=/əɫ/|occ=/eɫ/|rima=}}", ["əɫ"]),
(pronunciation["en"], "{{IPA|en|/ʌs/}}", ["ʌs"]),
(pronunciation["en"], "{{IPA|en|/ʌs/}}, {{IPA|en|/ʌz/}}", ["ʌs", "ʌz"]),
(pronunciation["en"], "{{IPA|en|/ʌs/|/ʌz/}}", ["ʌs", "ʌz"]),
],
)
def test_find_pronunciations(regexp, code, expected):
assert render.find_pronunciations(code, regexp) == expected


def test_simple():
Expand Down
25 changes: 20 additions & 5 deletions tests/test_ca.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,30 @@
import pytest

from wikidict.lang.ca import find_pronunciations
from wikidict.render import parse_word
from wikidict.utils import process_templates


@pytest.mark.parametrize(
"code, expected",
[
("", []),
("{{ca-pron|/as/}}", ["/as/"]),
("{{ca-pron|or=/əɫ/}}", ["/əɫ/"]),
("{{ca-pron|or=/əɫ/|occ=/eɫ/}}", ["/əɫ/"]),
("{{ca-pron|q=àton|or=/əɫ/|occ=/eɫ/|rima=}}", ["/əɫ/"]),
],
)
def test_find_pronunciations(code, expected):
assert find_pronunciations(code) == expected


@pytest.mark.parametrize(
"word, pronunciations, gender, etymology, definitions",
[
(
"-ass-",
["as"],
["/as/"],
"",
["Del sufix <i>-às</i> amb valor augmentatiu."],
["Infix que afegeix un matís augmentatiu."],
Expand All @@ -25,7 +40,7 @@
),
(
"AFI",
["ˈa.fi"],
["/ˈa.fi/"],
"",
["sigles"],
[
Expand Down Expand Up @@ -95,7 +110,7 @@
),
(
"cas",
["ˈkas"],
["/ˈkas/"],
"m",
["Del llatí <i>casus</i>."],
[
Expand Down Expand Up @@ -181,7 +196,7 @@
),
(
"el",
["əɫ"],
["/əɫ/"],
"f",
[],
[
Expand All @@ -200,7 +215,7 @@
["Cobert per a protegir plantes del vent o del fred extrem."],
),
("Mn.", [], "", [], ["mossèn com a tractament davant el nom"]),
("PMF", ["ˌpeˈe.məˌe.fə"], "", [], ["Preguntes Més Freqüents."]),
("PMF", ["/ˌpeˈe.məˌe.fə/"], "", [], ["Preguntes Més Freqüents."]),
("pen", [], "", [], []),
(
"si",
Expand Down
25 changes: 22 additions & 3 deletions tests/test_de.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,42 @@
import pytest

from wikidict.lang.de import find_pronunciations
from wikidict.render import parse_word
from wikidict.utils import process_templates


@pytest.mark.parametrize(
"code, expected",
[
("", []),
(
":{{IPA}} {{Lautschrift|ˈʁɪndɐˌsteːk}}",
["[ˈʁɪndɐˌsteːk]"],
),
(
":{{IPA}} {{Lautschrift|ˈʁɪndɐˌsteːk}}, {{Lautschrift|ˈʁɪndɐˌʃteːk}}, {{Lautschrift|ˈʁɪndɐˌsteɪ̯k}}",
["[ˈʁɪndɐˌsteːk]", "[ˈʁɪndɐˌʃteːk]", "[ˈʁɪndɐˌsteɪ̯k]"],
),
],
)
def test_find_pronunciations(code, expected):
assert find_pronunciations(code) == expected


@pytest.mark.parametrize(
"word, pronunciations, gender, etymology, definitions, variants",
[
(
"CIA",
["siːaɪ̯ˈɛɪ̯"],
["[siːaɪ̯ˈɛɪ̯]"],
"mf",
["Abkürzung von Central Intelligence Agency"],
["US-amerikanischer Auslandsnachrichtendienst"],
[],
),
(
"volley",
["ˈvɔli", "ˈvɔle", "ˈvɔlɛɪ̯"],
["[ˈvɔli]", "[ˈvɔle]", "[ˈvɔlɛɪ̯]"],
"",
[
"Dem seit 1960 im Duden lexikalisierten Wort liegt die englische Kollokation <i>at/on the <i>volley</i></i> ‚aus der Luft‘ zugrunde.", # noqa
Expand All @@ -27,7 +46,7 @@
],
[],
),
("trage", ["ˈtʁaːɡə"], "", [], [], ["tragen"]),
("trage", ["[ˈtʁaːɡə]"], "", [], [], ["tragen"]),
("daß", [], "", [], [], ["dass"]),
],
)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_el.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
import pytest

from wikidict.lang.el import find_pronunciations
from wikidict.render import parse_word
from wikidict.utils import process_templates


@pytest.mark.parametrize(
"code, expected",
[
("", []),
("{{ΔΦΑ|tɾeˈlos|γλ=el}}", ["tɾeˈlos"]),
("{{ΔΦΑ|γλ=el|ˈni.xta}}", ["ˈni.xta"]),
],
)
def test_find_pronunciations(code, expected):
assert find_pronunciations(code) == expected


@pytest.mark.parametrize(
"word, pronunciations, gender, etymology, definitions, variants",
[
Expand Down
50 changes: 38 additions & 12 deletions tests/test_en.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,31 @@
import pytest

from wikidict.lang.en import find_pronunciations
from wikidict.render import parse_word
from wikidict.utils import process_templates


@pytest.mark.parametrize(
"code, expected",
[
("", []),
("{{IPA|en|/ʌs/}}", ["/ʌs/"]),
("{{IPA|en|/ʌs/|/ʌs/}}", ["/ʌs/"]),
("{{IPA|en|/ʌs/}} {{IPA|en|/ʌs/}}", ["/ʌs/"]),
("{{IPA|en|/ʌs/}}, {{IPA|en|/ʌz/}}", ["/ʌs/", "/ʌz/"]),
("{{IPA|en|/ʌs/|/ʌz/}}", ["/ʌs/", "/ʌz/"]),
],
)
def test_find_pronunciations(code, expected):
assert find_pronunciations(code) == expected


@pytest.mark.parametrize(
"word, pronunciations, etymology, definitions",
[
(
"ab",
["æb"],
["/æb/"],
["Abbreviation of <b>abdominal</b> <b>muscles</b>."],
[
"<i>(informal)</i> abdominal muscle. <small>[Mid 20<sup>th</sup> century.]</small>",
Expand All @@ -23,7 +39,7 @@
),
(
"cum",
["kʌm", "kʊm"],
["/kʌm/", "/kʊm/"],
["Learned borrowing from Latin <i>cum</i> (“with”)."],
[
"<i>Used in indicating a thing with two roles, functions, or natures, or a thing that has changed from one to another.</i>", # noqa
Expand All @@ -39,7 +55,7 @@
),
(
"efficient",
["ɪˈfɪʃənt"],
["/ɪˈfɪʃənt/", "/əˈfɪʃənt/"],
[
"1398, “making,” from Old French, from Latin <i>efficientem</i>, nominative <i>efficiēns</i>, participle of <i>efficere</i> (“work out, accomplish”) (see <b>effect</b>). Meaning “productive, skilled” is from 1787. <i>Efficiency apartment</i> is first recorded 1930, American English." # noqa
],
Expand All @@ -53,7 +69,7 @@
),
(
"it's",
["ɪts"],
["/ɪts/"],
["Contraction of ‘it is’ or ‘it has’."],
[
"<i>Contraction of</i> <b>it is</b>.",
Expand All @@ -65,7 +81,7 @@
),
(
"Mars",
["ˈmɑːz"],
["/ˈmɑːz/", "/ˈmɑɹz/"],
[
"From Middle English <i>Mars</i>, from Latin <i>Mārs</i> (“god of war”), from older Latin (older than 75 <small>B.C.E.</small>) <i>Māvors</i>. <i>𐌌𐌀𐌌𐌄𐌓𐌔</i> was his Oscan name. He was also known as <i>Marmor</i>, <i>Marmar</i> and <i>Maris</i>, the latter from the Etruscan deity Maris." # noqa
],
Expand All @@ -79,7 +95,7 @@
),
(
"portmanteau",
["pɔːtˈmæn.təʊ"],
["/pɔːtˈmæn.təʊ/", "/pɔːɹtˈmæntoʊ/", "/ˌpɔːɹtmænˈtoʊ/"],
[
"Middle French <i>portemanteau</i> (“coat stand”), from <i>porte</i> (“carry”) + <i>manteau</i> (“coat”)." # noqa
],
Expand All @@ -95,7 +111,7 @@
),
(
"someone",
["ˈsʌmwʌn"],
["/ˈsʌmwʌn/"],
["From <i>some</i>&nbsp;+&nbsp;<i>one</i>."],
[
"some person.",
Expand All @@ -105,7 +121,7 @@
),
(
"the",
["ˈðiː"],
["/ˈðiː/", "/ˈðʌ/", "/ði/", "/ðɪ/", "/ðə/"],
[
"From Middle English <i>þe</i>, from Old English <i>þē</i> <i>m</i> (“the, that”, demonstrative pronoun), a late variant of <i>sē</i>, the <i>s-</i> (which occurred in the masculine and feminine nominative singular only) having been replaced by the <i>þ-</i> from the oblique stem.", # noqa
"Originally neutral nominative, in Middle English it superseded all previous Old English nominative forms (<i>sē</i> <i>m</i>, <i>sēo</i> <i>f</i>, <i>þæt</i> <i>n</i>, <i>þā</i> <i>p</i>); <i>sē</i> is from Proto-West Germanic <i>*siz</i>, from Proto-Germanic <i>*sa</i>, ultimately from Proto-Indo-European <i>*só</i>.", # noqa
Expand All @@ -129,7 +145,7 @@
),
(
"um",
["ʌm", "əːm"],
["/ʌm/", "/əːm/"],
["Onomatopoeic."],
[
"<i>Expression of hesitation, uncertainty or space filler in conversation</i>. See uh.",
Expand All @@ -141,7 +157,7 @@
),
(
"us",
["ʌs", "ʌz"],
["/ʌs/", "/ʌz/", "/əs/", "/əz/"],
[
"From Middle English <i>us</i>, from Old English <i>ūs</i> (“us”, dative personal pronoun), from Proto-Germanic <i>*uns</i> (“us”), from Proto-Indo-European <i>*ne-</i>, <i>*nō-</i>, <i>*n-ge-</i>, <i>*n̥smé</i> (“us”). Cognate with Saterland Frisian <i>uus</i> (“us”), West Frisian <i>us</i>, <i>ús</i> (“us”), Low German <i>us</i> (“us”), Dutch <i>ons</i> (“us”), German <i>uns</i> (“us”), Danish <i>os</i> (“us”), Latin <i>nōs</i> (“we, us”)." # noqa
],
Expand All @@ -157,7 +173,17 @@
),
(
"water",
["ˈwɔːtə"],
[
"/ˈwɔːtə/",
"/ˈwɔtər/",
"/ˈwɒtə/",
"/ˈwɒtəɹ/",
"/ˈwɔtəɹ/",
"/ˈwɑtəɹ/",
"/ˈwʊtəɹ/",
"/ˈwoːtə/",
"/ˈwætəɹ/",
],
[
"From Middle English <i>water</i>, from Old English <i>wæter</i> (“water”), from Proto-West Germanic <i>*watar</i>, from Proto-Germanic <i>*watōr</i> (“water”), from Proto-Indo-European <i>*wódr̥</i> (“water”).", # noqa
"Cognate with cf, North Frisian <i>weeter</i> (“water”), Saterland Frisian <i>Woater</i> (“water”), West Frisian <i>wetter</i> (“water”), Dutch <i>water</i> (“water”), Low German <i>Water</i> (“water”), German <i>Wasser</i>, Old Norse <i>vatn</i> (Swedish <i>vatten</i> (“water”), Danish <i>vand</i> (“water”), Norwegian Bokmål <i>vann</i> (“water”), Norwegian Nynorsk and Icelandic <i>vatn</i> (“water”)), Old Irish <i>coin fodorne</i> (“otters”, literally “water-dogs”), Latin <i>unda</i> (“wave”), Lithuanian <i>vanduõ</i> (“water”), Russian <i>вода́</i> (<i>voda</i>, “water”), Albanian <i>ujë</i> (“water”), Ancient Greek <i>ὕδωρ</i> (“water”), Armenian <i>գետ</i> (<i>get</i>, “river”), Sanskrit <i>उदन्</i> (<i>udán</i>, “wave, water”), Hittite <i>𒉿𒀀𒋻</i> (<i>wa-a-tar</i>).", # noqa
Expand Down Expand Up @@ -198,7 +224,7 @@
),
(
"word",
["wɜːd"],
["/wɜːd/", "/wɝd/"],
[
"From Middle English <i>word</i>, from Old English <i>word</i>, from Proto-West Germanic <i>*word</i>, from Proto-Germanic <i>*wurdą</i>, from Proto-Indo-European <i>*wr̥dʰh₁om</i>. Doublet of <i>verb</i> and <i>verve</i>; further related to <b>vrata</b>." # noqa
],
Expand Down
Loading