Skip to content

Commit

Permalink
De-vendor hunspell
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Sep 13, 2019
1 parent 9601d1c commit 22a1481
Show file tree
Hide file tree
Showing 43 changed files with 75 additions and 39,119 deletions.
4 changes: 0 additions & 4 deletions COPYRIGHT
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,6 @@ License: BSD
The full text of the BSD license is distributed as in
/usr/share/common-licenses/BSD on Debian systems.

Files: src/hunspell/*
Copyright: Various
License: GPL-2+

Files: src/calibre/gui2/tweak_book/diff/_patiencediff_c.c
Copyright: Canonical
License: GPL-2+
Expand Down
9 changes: 9 additions & 0 deletions bypy/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,15 @@
}
},

{
"name": "hunspell",
"unix": {
"filename": "hunspell-1.7.0.tar.gz",
"hash": "sha256:57be4e03ae9dd62c3471f667a0d81a14513e314d4d92081292b90435944ff951",
"urls": ["https://github.com/hunspell/hunspell/files/2573619/{filename}"]
}
},

{
"name": "qt-base",
"version": "5.13.0",
Expand Down
5 changes: 5 additions & 0 deletions setup/build_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def get_sip_dir():
icu_lib_dirs = []
zlib_inc_dirs = []
zlib_lib_dirs = []
hunspell_inc_dirs = []
hunspell_lib_dirs = []
openssl_inc_dirs, openssl_lib_dirs = [], []
ICU = sw = ''

Expand All @@ -152,6 +154,7 @@ def get_sip_dir():
sw_inc_dir = os.path.join(sw, 'include')
sw_lib_dir = os.path.join(sw, 'lib')
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
hunspell_inc_dirs = [os.path.join(sw_inc_dir, 'hunspell')]
podofo_lib = sw_lib_dir
ft_libs = ['freetype']
ft_inc_dirs = [sw + '/include/freetype2']
Expand All @@ -163,6 +166,8 @@ def get_sip_dir():
'/usr/include/freetype2')
ft_lib_dirs = pkgconfig_lib_dirs('freetype2', 'FT_LIB_DIR', '/usr/lib')
ft_libs = pkgconfig_libs('freetype2', '', '')
hunspell_inc_dirs = pkgconfig_include_dirs('hunspell', 'HUNSPELL_INC_DIR', '/usr/include/hunspell')
hunspell_lib_dirs = pkgconfig_lib_dirs('hunspell', 'HUNSPELL_LIB_DIR', '/usr/lib')
sw = os.environ.get('SW', os.path.expanduser('~/sw'))
podofo_inc = '/usr/include/podofo'
podofo_lib = '/usr/lib'
Expand Down
10 changes: 5 additions & 5 deletions setup/extensions.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[
{
"name": "hunspell",
"sources": "hunspell/affentry.cxx hunspell/affixmgr.cxx hunspell/csutil.cxx hunspell/dictmgr.cxx hunspell/filemgr.cxx hunspell/hashmgr.cxx hunspell/hunspell.cxx hunspell/phonet.cxx hunspell/replist.cxx hunspell/suggestmgr.cxx calibre/utils/spell/hunspell_wrapper.cpp",
"inc_dirs": "hunspell",
"defines": "HUNSPELL_STATIC",
"windows_defines": "HUNSPELL_STATIC _CRT_SECURE_NO_WARNINGS UNICODE _UNICODE",
"optimize_level": 2
"sources": "calibre/utils/spell/hunspell_wrapper.cpp",
"inc_dirs": "!hunspell_inc_dirs",
"lib_dirs": "!hunspell_lib_dirs",
"libraries": "hunspell",
"needs_c++11": true
},
{
"name": "monotonic",
Expand Down
47 changes: 34 additions & 13 deletions src/calibre/spell/dictionary.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2014, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals

__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

import os, glob, shutil, re, sys
from collections import namedtuple, defaultdict
from itertools import chain
import glob
import os
import re
import shutil
import sys
from collections import defaultdict, namedtuple
from functools import partial
from itertools import chain

from calibre import prints
from calibre.constants import plugins, config_dir
from calibre.constants import (
config_dir, filesystem_encoding, ispy3, iswindows, plugins
)
from calibre.spell import parse_lang_code
from calibre.utils.config import JSONConfig
from calibre.utils.icu import capitalize
from calibre.utils.localization import get_lang, get_system_locale
from polyglot.builtins import iteritems, itervalues, unicode_type, filter
from polyglot.builtins import filter, iteritems, itervalues, unicode_type


Dictionary = namedtuple('Dictionary', 'primary_locale locales dicpath affpath builtin name id')
LoadedDictionary = namedtuple('Dictionary', 'primary_locale locales obj builtin name id')
Expand Down Expand Up @@ -163,11 +168,18 @@ def get_dictionary(locale, exact_match=False):


def load_dictionary(dictionary):
from calibre.spell.import_from import convert_to_utf8
with open(dictionary.dicpath, 'rb') as dic, open(dictionary.affpath, 'rb') as aff:
dic_data, aff_data = dic.read(), aff.read()
dic_data, aff_data = convert_to_utf8(dic_data, aff_data)
obj = hunspell.Dictionary(dic_data, aff_data)

def fix_path(path):
if isinstance(path, bytes):
path = path.decode(filesystem_encoding)
path = os.path.abspath(path)
if iswindows:
path = r'\\?\{}'.format(path)
if not ispy3:
path = path.encode('utf-8')
return path

obj = hunspell.Dictionary(fix_path(dictionary.dicpath), fix_path(dictionary.affpath))
return LoadedDictionary(dictionary.primary_locale, dictionary.locales, obj, dictionary.builtin, dictionary.name, dictionary.id)


Expand Down Expand Up @@ -414,6 +426,14 @@ def add_suggestion(w, ans):
return ans


def build_test():
dictionaries = Dictionaries()
dictionaries.initialize()
eng = parse_lang_code('en')
if not dictionaries.recognized('recognized', locale=eng):
raise AssertionError('The word recognized was not recognized')


def find_tests():
import unittest

Expand All @@ -438,5 +458,6 @@ def test_dictionaries(self):
self.assertIn('one\u2010half', self.suggestions('oone\u2010half'))
self.assertIn('adequately', self.suggestions('ade-quately'))
self.assertIn('magic. Wand', self.suggestions('magic.wand'))
self.assertIn('List', self.suggestions('Lis𝑘t'))

return unittest.TestLoader().loadTestsFromTestCase(TestDictionaries)
4 changes: 4 additions & 0 deletions src/calibre/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ def test_regex(self):
self.assertEqual(regex.findall(r'(?i)(a)(b)', 'ab cd AB 1a1b'), [('a', 'b'), ('A', 'B')])
self.assertEqual(regex.escape('a b', literal_spaces=True), 'a b')

def test_hunspell(self):
from calibre.spell.dictionary import build_test
build_test()

def test_chardet(self):
from chardet import detect
raw = 'mūsi Füße'.encode('utf-8')
Expand Down
39 changes: 18 additions & 21 deletions src/calibre/utils/spell/hunspell_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ static PyObject *HunspellError = NULL;
static int
init_type(Dictionary *self, PyObject *args, PyObject *kwds) {
char *dic = NULL, *aff = NULL;
Py_ssize_t diclen, afflen;

self->handle = NULL;
self->encoding = NULL;

if (!PyArg_ParseTuple(args, "s#s#", &dic, &diclen, &aff, &afflen)) return 1;
if (!PyArg_ParseTuple(args, "ss", &dic, &aff)) return 1;

try {
self->handle = new (std::nothrow) Hunspell(aff, afflen, dic, diclen);
self->handle = new (std::nothrow) Hunspell(aff, dic);
} catch (const std::exception &ex) {
PyErr_SetString(HunspellError, ex.what());
return 1;
Expand All @@ -58,35 +57,33 @@ dealloc(Dictionary *self) {

static PyObject *
recognized(Dictionary *self, PyObject *args) {
char *word = NULL;
if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;
char *w = NULL;
if (!PyArg_ParseTuple(args, "es", self->encoding, &w)) return NULL;
std::string word(w);
PyMem_Free(w);

if (self->handle->spell(word) == 0) { PyMem_Free(word); Py_RETURN_FALSE;}
PyMem_Free(word);
if (!self->handle->spell(word)) { Py_RETURN_FALSE;}
Py_RETURN_TRUE;
}

static PyObject *
suggest(Dictionary *self, PyObject *args) {
char *word = NULL, **slist = NULL;
int i, num_slist;
char *w = NULL;
PyObject *ans, *temp;

if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;
if (!PyArg_ParseTuple(args, "es", self->encoding, &w)) return NULL;
const std::string word(w);
PyMem_Free(w);

num_slist = self->handle->suggest(&slist, word);
ans = PyTuple_New(num_slist);
const std::vector<std::string>& word_list = self->handle->suggest(word);
ans = PyTuple_New(word_list.size());
if (ans == NULL) PyErr_NoMemory();
else {
for (i = 0; i < num_slist; i++) {
temp = PyUnicode_Decode(slist[i], strlen(slist[i]), self->encoding, "strict");
if (temp == NULL) { Py_DECREF(ans); ans = NULL; break; }
PyTuple_SET_ITEM(ans, i, temp);
}
Py_ssize_t i = 0;
for(auto const& s: word_list) {
temp = PyUnicode_Decode(s.c_str(), s.size(), self->encoding, "strict");
if (temp == NULL) { Py_DECREF(ans); ans = NULL; break; }
PyTuple_SET_ITEM(ans, i++, temp);
}

if (slist != NULL) self->handle->free_list(&slist, num_slist);
PyMem_Free(word);
return ans;
}

Expand Down
19 changes: 0 additions & 19 deletions src/hunspell/Makefile.am

This file was deleted.

Loading

0 comments on commit 22a1481

Please sign in to comment.