Skip to content

Commit

Permalink
better time for listening
Browse files Browse the repository at this point in the history
  • Loading branch information
PasaOpasen committed May 30, 2020
1 parent d4cabca commit bf7563f
Show file tree
Hide file tree
Showing 13 changed files with 119,941 additions and 2 deletions.
118,256 changes: 118,256 additions & 0 deletions 4th/cedict_ts.u8

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions 4th/create_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
"""
Created on Sun May 24 13:40:25 2020
@author: qtckp
"""


import os, shutil
import json
import googletrans


directory='./text_logger'

def copy(filename):
shutil.copyfile(filename,os.path.join(directory,filename))


if os.path.exists(directory):
shutil.rmtree(directory)

os.makedirs(directory)


copy('text_logger6.py')
copy("languges_for_transcription.json")



langs = {value: key for key, value in googletrans.LANGUAGES.items()}

with open(os.path.join(directory,"languges.json"), "w") as write_file:
json.dump(langs, write_file, indent=4)


settings = {
'languages':['ru','en','fa'],
'need_to_transcript':[True,False,True],
'listen_time':3,
'stop_word':'+'
}

with open(os.path.join(directory,"settings.json"), "w") as write_file:
json.dump(settings, write_file, indent=4)
69 changes: 69 additions & 0 deletions 4th/epitran tables/all_supported.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
| aar-Latn | Afar |
| amh-Ethi | Amharic |
| ara-Arab | Literary Arabic |
| aze-Cyrl | Azerbaijani (Cyrillic) |
| aze-Latn | Azerbaijani (Latin) |
| ben-Beng | Bengali |
| ben-Beng-red| Bengali (reduced) |
| cat-Latn | Catalan |
| ceb-Latn | Cebuano |
| cmn-Hans | Mandarin (Simplified)\* |
| cmn-Hant | Mandarin (Traditional)\*|
| ckb-Arab | Sorani |
| deu-Latn | German |
| deu-Latn-np | German† |
| deu-Latn-nar| German (more phonetic) |
| eng-Latn | English‡ |
| fas-Arab | Farsi (Perso-Arabic) |
| fra-Latn | French |
| fra-Latn-np | French† |
| hau-Latn | Hausa |
| hin-Deva | Hindi |
| hun-Latn | Hungarian |
| ilo-Latn | Ilocano |
| ind-Latn | Indonesian |
| ita-Latn | Italian |
| jav-Latn | Javanese |
| kaz-Cyrl | Kazakh (Cyrillic) |
| kaz-Latn | Kazakh (Latin) |
| kin-Latn | Kinyarwanda |
| kir-Arab | Kyrgyz (Perso-Arabic) |
| kir-Cyrl | Kyrgyz (Cyrillic) |
| kir-Latn | Kyrgyz (Latin) |
| kmr-Latn | Kurmanji |
| lao-Laoo | Lao |
| mar-Deva | Marathi |
| mlt-Latn | Maltese |
| mya-Mymr | Burmese |
| msa-Latn | Malay |
| nld-Latn | Dutch |
| nya-Latn | Chichewa |
| orm-Latn | Oromo |
| pan-Guru | Punjabi (Eastern) |
| pol-Latn | Polish |
| por-Latn | Portuguese |
| ron-Latn | Romanian |
| rus-Cyrl | Russian |
| sna-Latn | Shona |
| som-Latn | Somali |
| spa-Latn | Spanish |
| swa-Latn | Swahili |
| swe-Latn | Swedish |
| tam-Taml | Tamil |
| tel-Telu | Telugu |
| tgk-Cyrl | Tajik |
| tgl-Latn | Tagalog |
| tha-Thai | Thai |
| tir-Ethi | Tigrinya |
| tpi-Latn | Tok Pisin |
| tuk-Cyrl | Turkmen (Cyrillic) |
| tuk-Latn | Turkmen (Latin) |
| tur-Latn | Turkish (Latin) |
| ukr-Cyrl | Ukranian |
| uig-Arab | Uyghur (Perso-Arabic) |
| uzb-Cyrl | Uzbek (Cyrillic) |
| uzb-Latn | Uzbek (Latin) |
| vie-Latn | Vietnamese |
| xho-Latn | Xhosa |
| yor-Latn | Yoruba |
| zul-Latn | Zulu |
41 changes: 41 additions & 0 deletions 4th/epitran tables/create_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
"""
Created on Tue May 26 13:54:12 2020
@author: qtckp
"""


import json

def to_pair(lst):
return lst[0], lst[1]

with open('./epitran tables/all_supported.txt','r') as fl:
pairs = [to_pair(s[2:].split('|')) for s in fl.readlines()]
all_langs = {key.strip(): value.strip() for key, value in pairs}


with open('./epitran tables/limited_supported.txt','r') as fl:
arr = [s.split('|')[1].strip() for s in fl.readlines()]


for key, val in all_langs.items():
if key in arr:
all_langs[key]= (val, True)
else:
all_langs[key]= (val, False)



with open("languges_for_transcription.json", "w") as write_file:
json.dump(all_langs, write_file, indent=4)









8 changes: 8 additions & 0 deletions 4th/epitran tables/limited_supported.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
| ara-Arab | Arabic |
| cat-Latn | Catalan |
| ckb-Arab | Sorani |
| fas-Arab | Farsi (Perso-Arabic) |
| fra-Latn | French |
| fra-Latn-np | French† |
| mya-Mymr | Burmese |
| por-Latn | Portuguese |
Loading

0 comments on commit bf7563f

Please sign in to comment.