forked from kovidgoyal/calibre
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
When converting non english texts to english, use the users currect c…
…alibre interface language. This allows japanes/korean/vietnamese characters to be correctly converted. Previously they were assumed to be Chinese. Fixes #7622 (Calibre need to switch logic when converting Unicode filename into ASCII)
- Loading branch information
Showing
27 changed files
with
146,309 additions
and
3,292 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -193,6 +193,33 @@ License: GPL-3 | |
The full text of the GPL is distributed as in | ||
/usr/share/common-licenses/GPL-3 on Debian systems. | ||
|
||
Files: src/calibre/ebooks/unihandecode/pykakasi/* | ||
Copyright: 2011, Hiroshi Miura <[email protected]> | ||
Copyright: 1992, Hironobu Takahashi | ||
License: GPL-2+ | ||
The full text of the GPL is distributed as in | ||
/usr/share/common-licenses/GPL on Debian systems. | ||
|
||
Files: resources/kanwadict2.db | ||
Files: resources/itaijidict2.pickle | ||
Copyright: 2011, Hiroshi Miura <[email protected]> | ||
Copyright: 1992 1993 1994, Hironobu Takahashi ([email protected]), | ||
Copyright: 1992 1993 1994, Masahiko Sato ([email protected]), | ||
Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa, | ||
Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi | ||
License: GPL-2+ | ||
The full text of the GPL is distributed as in | ||
/usr/share/common-licenses/GPL on Debian systems. | ||
|
||
Files: src/calibre/ebooks/unihandecode/* | ||
Copyright: 2010-2011, Hiroshi Miura <[email protected]> | ||
Copyright: 2009, John Schember | ||
Copyright: 2007, Russell Norris | ||
Copyright: 2001, Sean M. Burke | ||
License: GPL-3, Perl | ||
The full text of the GPL is distributed as in | ||
/usr/share/common-licenses/GPL-3 on Debian systems. | ||
|
||
Files: src/encutils/__init__.py | ||
Copyright: 2005-2008: Christof Hoeke | ||
License: LGPL-3+, CC-BY-3.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,9 +6,10 @@ | |
__copyright__ = '2009, Kovid Goyal <[email protected]>' | ||
__docformat__ = 'restructuredtext en' | ||
|
||
import os, cPickle | ||
import os, cPickle, re, anydbm, shutil | ||
from zlib import compress | ||
|
||
from setup import Command, basenames | ||
from setup import Command, basenames, __appname__ | ||
|
||
def get_opts_from_parser(parser): | ||
def do_opt(opt): | ||
|
@@ -26,6 +27,9 @@ class Resources(Command): | |
|
||
description = 'Compile various needed calibre resources' | ||
|
||
KAKASI_PATH = os.path.join(Command.SRC, __appname__, | ||
'ebooks', 'unihandecode', 'pykakasi') | ||
|
||
def run(self, opts): | ||
scripts = {} | ||
for x in ('console', 'gui'): | ||
|
@@ -101,11 +105,113 @@ def run(self, opts): | |
import json | ||
json.dump(function_dict, open(dest, 'wb'), indent=4) | ||
|
||
self.run_kakasi(opts) | ||
|
||
def run_kakasi(self, opts): | ||
self.records = {} | ||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8') | ||
dest = self.j(self.RESOURCES, 'localization', | ||
'pykakasi','kanwadict2.db') | ||
base = os.path.dirname(dest) | ||
if not os.path.exists(base): | ||
os.makedirs(base) | ||
|
||
if not self.newer(dest, src): | ||
self.info('\tKanwadict is up to date') | ||
else: | ||
self.info('\tGenerating Kanwadict') | ||
|
||
for line in open(src, "r"): | ||
self.parsekdict(line) | ||
self.kanwaout(dest) | ||
|
||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8') | ||
dest = self.j(self.RESOURCES, 'localization', | ||
'pykakasi','itaijidict2.pickle') | ||
|
||
if not self.newer(dest, src): | ||
self.info('\tItaijidict is up to date') | ||
else: | ||
self.info('\tGenerating Itaijidict') | ||
self.mkitaiji(src, dest) | ||
|
||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8') | ||
dest = self.j(self.RESOURCES, 'localization', | ||
'pykakasi','kanadict2.pickle') | ||
|
||
if not self.newer(dest, src): | ||
self.info('\tKanadict is up to date') | ||
else: | ||
self.info('\tGenerating kanadict') | ||
self.mkkanadict(src, dest) | ||
|
||
return | ||
|
||
|
||
def mkitaiji(self, src, dst): | ||
dic = {} | ||
for line in open(src, "r"): | ||
line = line.decode("utf-8").strip() | ||
if line.startswith(';;'): # skip comment | ||
continue | ||
if re.match(r"^$",line): | ||
continue | ||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line) | ||
dic[pair[0]] = pair[1] | ||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle | ||
|
||
def mkkanadict(self, src, dst): | ||
dic = {} | ||
for line in open(src, "r"): | ||
line = line.decode("utf-8").strip() | ||
if line.startswith(';;'): # skip comment | ||
continue | ||
if re.match(r"^$",line): | ||
continue | ||
(alpha, kana) = line.split(' ') | ||
dic[kana] = alpha | ||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle | ||
|
||
def parsekdict(self, line): | ||
line = line.decode("utf-8").strip() | ||
if line.startswith(';;'): # skip comment | ||
return | ||
(yomi, kanji) = line.split(' ') | ||
if ord(yomi[-1:]) <= ord('z'): | ||
tail = yomi[-1:] | ||
yomi = yomi[:-1] | ||
else: | ||
tail = '' | ||
self.updaterec(kanji, yomi, tail) | ||
|
||
def updaterec(self, kanji, yomi, tail): | ||
key = "%04x"%ord(kanji[0]) | ||
if key in self.records: | ||
if kanji in self.records[key]: | ||
rec = self.records[key][kanji] | ||
rec.append((yomi,tail)) | ||
self.records[key].update( {kanji: rec} ) | ||
else: | ||
self.records[key][kanji]=[(yomi, tail)] | ||
else: | ||
self.records[key] = {} | ||
self.records[key][kanji]=[(yomi, tail)] | ||
|
||
def kanwaout(self, out): | ||
dic = anydbm.open(out, 'c') | ||
for (k, v) in self.records.iteritems(): | ||
dic[k] = compress(cPickle.dumps(v, -1)) | ||
dic.close() | ||
|
||
|
||
def clean(self): | ||
for x in ('scripts', 'recipes', 'ebook-convert-complete'): | ||
x = self.j(self.RESOURCES, x+'.pickle') | ||
if os.path.exists(x): | ||
os.remove(x) | ||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi') | ||
if os.path.exists(kakasi): | ||
shutil.rmtree(kakasi) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.