diff --git a/pke/base.py b/pke/base.py index 14d9df32..f2c6b4ca 100644 --- a/pke/base.py +++ b/pke/base.py @@ -58,7 +58,8 @@ def load_document(self, input, language=None, stoplist=None, stoplist (list): custom list of stopwords, defaults to pke.lang.stopwords[language]. normalization (str): word normalization method, defaults to - 'stemming'. Other possible value is 'none' + 'stemming'. Other possible values are 'lemmatization' + for using lemmas as stems and 'none' for using word surface forms instead of stems/lemmas. spacy_model (spacy.lang): preloaded spacy model when input is a string. @@ -119,6 +120,10 @@ def load_document(self, input, language=None, stoplist=None, for i, sentence in enumerate(self.sentences): self.sentences[i].stems = [stemmer.stem(w).lower() for w in sentence.words] + elif self.normalization == 'lemmatization': + for i, sentence in enumerate(self.sentences): + self.sentences[i].stems = sentence.meta['lemmas'] + else: for i, sentence in enumerate(self.sentences): self.sentences[i].stems = [w.lower() for w in sentence.words]