Skip to content

Commit

Permalink
Initial import of hunspell for spell checking
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Dec 21, 2013
1 parent 497fea0 commit 5f41105
Show file tree
Hide file tree
Showing 44 changed files with 39,516 additions and 1 deletion.
4 changes: 4 additions & 0 deletions COPYRIGHT
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ Files: srx/regex/*
Copyright: Matthew Barnett
License: Python Software Foundation License

Files: src/hunspell/*
Copyright: Various
License: GPL-2+

Files: src/calibre/ebooks/hyphenate.py
Copyright: Copyright (C) 1990, 2004, 2005 Gerard D.C. Kuiken.
License: other
Expand Down
9 changes: 9 additions & 0 deletions setup/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags):

extensions = [

Extension('hunspell',
['hunspell/'+x for x in
'affentry.cxx affixmgr.cxx csutil.cxx dictmgr.cxx filemgr.cxx hashmgr.cxx hunspell.cxx hunzip.cxx phonet.cxx replist.cxx suggestmgr.cxx'.split()
] + ['calibre/utils/spell/hunspell_wrapper.cpp',],
inc_dirs=['hunspell'],
cflags='/DHUNSPELL_STATIC /D_CRT_SECURE_NO_WARNINGS /DUNICODE /D_UNICODE'.split() if iswindows else ['-DHUNSPELL_STATIC'],
optimize_level=2,
),

Extension('_regex',
['regex/_regex.c', 'regex/_regex_unicode.c'],
headers=['regex/_regex.h']
Expand Down
3 changes: 2 additions & 1 deletion src/calibre/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def __init__(self):
'woff',
'unrar',
'qt_hack',
'_regex'
'_regex',
'hunspell',
]
if iswindows:
plugins.extend(['winutil', 'wpd', 'winfonts'])
Expand Down
10 changes: 10 additions & 0 deletions src/calibre/utils/spell/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)

__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'



182 changes: 182 additions & 0 deletions src/calibre/utils/spell/hunspell_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* hunspell.c
* Python wrapper for the hunspell library.
* Copyright (C) 2013 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/

#include <Python.h>
#include <new>
#include <string>
#include <hunspell.hxx>

typedef struct {
PyObject_HEAD
Hunspell *handle;
char *encoding;
} Dictionary;

static PyObject *HunspellError = NULL;

static int
init_type(Dictionary *self, PyObject *args, PyObject *kwds) {
char *dpath = NULL, *apath = NULL;

self->handle = NULL;
self->encoding = NULL;

if (!PyArg_ParseTuple(args, "ss", &dpath, &apath)) return 1;

try {
self->handle = new (std::nothrow) Hunspell(apath, dpath);
} catch (const std::exception &ex) {
PyErr_SetString(HunspellError, ex.what());
return 1;
} catch (const std::string &ex) {
PyErr_SetString(HunspellError, ex.c_str());
return 1;
} catch (...) {
PyErr_SetString(HunspellError, "Failed to create dictionary, unknown error");
return 1;
}
if (self->handle == NULL) { PyErr_NoMemory(); return 1; }
self->encoding = self->handle->get_dic_encoding();
if (self->encoding == NULL) { delete self->handle; self->handle = NULL; PyErr_SetString(HunspellError, "Failed to get dictionary encoding"); return 1; }
return 0;
}

static void
dealloc(Dictionary *self) {
if (self->handle != NULL) delete self->handle;
/* We do not free encoding, since it is managed by hunspell */
self->encoding = NULL; self->handle = NULL;
self->ob_type->tp_free((PyObject *)self);
}

static PyObject *
recognized(Dictionary *self, PyObject *args) {
char *word;
if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;

if (self->handle->spell(word) == 0) Py_RETURN_FALSE;
Py_RETURN_TRUE;
}

static PyObject *
suggest(Dictionary *self, PyObject *args) {
char *word, **slist = NULL;
int i, num_slist;
PyObject *ans, *temp;

if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;

num_slist = self->handle->suggest(&slist, word);
ans = PyTuple_New(num_slist);
if (ans == NULL) PyErr_NoMemory();
else {
for (i = 0; i < num_slist; i++) {
temp = PyUnicode_Decode(slist[i], strlen(slist[i]), self->encoding, "strict");
if (temp == NULL) { Py_DECREF(ans); ans = NULL; break; }
PyTuple_SET_ITEM(ans, i, temp);
}
}

if (slist != NULL) self->handle->free_list(&slist, num_slist);
return ans;
}

static PyObject *
add(Dictionary *self, PyObject *args) {
char *word;

if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;
if (self->handle->add(word) == 0) Py_RETURN_TRUE;
Py_RETURN_FALSE;
}

static PyObject *
remove_word(Dictionary *self, PyObject *args) {
char *word;

if (!PyArg_ParseTuple(args, "es", self->encoding, &word)) return NULL;
if (self->handle->remove(word) == 0) Py_RETURN_TRUE;
Py_RETURN_FALSE;
}

static PyMethodDef HunSpell_methods[] = {
{"recognized", (PyCFunction)recognized, METH_VARARGS,
"Checks the spelling of the given word. The word must be a unicode object. If encoding of the word to the encoding of the dictionary fails, a UnicodeEncodeError is raised. Returns False if the input word is not recognized."},
{"suggest", (PyCFunction)suggest, METH_VARARGS,
"Provide suggestions for the given word. The input word must be a unicode object. If encoding of the word to the encoding of the dictionary fails, a UnicodeEncodeError is raised. Returns the list of suggested words as unicode objects."},
{"add", (PyCFunction)add, METH_VARARGS,
"Adds the given word into the runtime dictionary"},
{"remove", (PyCFunction)remove_word, METH_VARARGS,
"Removes the given word from the runtime dictionary"},
{NULL}
};

static PyTypeObject DictionaryType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"Dictionary", /* tp_name */
sizeof(Dictionary), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
"Dictionary object", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
HunSpell_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc) init_type, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};


PyMODINIT_FUNC
inithunspell(void) {
PyObject *mod;

// Create the module
mod = Py_InitModule3("hunspell", NULL,
"A wrapper for the hunspell spell checking library");
if (mod == NULL) return;

HunspellError = PyErr_NewException((char*)"hunspell.HunspellError", NULL, NULL);
if (HunspellError == NULL) return;
PyModule_AddObject(mod, "HunspellError", HunspellError);

// Fill in some slots in the type, and make it ready
DictionaryType.tp_new = PyType_GenericNew;
if (PyType_Ready(&DictionaryType) < 0) return;
// Add the type to the module.
Py_INCREF(&DictionaryType);
PyModule_AddObject(mod, "Dictionary", (PyObject *)&DictionaryType);
}

19 changes: 19 additions & 0 deletions src/hunspell/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
lib_LTLIBRARIES = libhunspell-1.3.la
libhunspell_1_3_includedir = $(includedir)/hunspell
libhunspell_1_3_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \
dictmgr.cxx hashmgr.cxx hunspell.cxx \
suggestmgr.cxx license.myspell license.hunspell \
phonet.cxx filemgr.cxx hunzip.cxx replist.cxx

libhunspell_1_3_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \
csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \
suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \
phonet.hxx filemgr.hxx hunzip.hxx w_char.hxx replist.hxx \
hunvisapi.h

libhunspell_1_3_la_DEPENDENCIES=utf_info.cxx
libhunspell_1_3_la_LDFLAGS=-no-undefined

AM_CXXFLAGS=$(CFLAG_VISIBILITY) -DBUILDING_LIBHUNSPELL

EXTRA_DIST=hunspell.dsp makefile.mk README utf_info.cxx
Loading

0 comments on commit 5f41105

Please sign in to comment.