-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
37 changed files
with
678 additions
and
620 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
<component name="ProjectRunConfigurationManager"> | ||
<configuration default="false" name="Lint, format and test project" type="CompoundRunConfigurationType"> | ||
<toRun name="Pre-commit Hooks" type="PythonConfigurationType" /> | ||
<toRun name="Backend Tests" type="tests" /> | ||
<toRun name="Tests" type="tests" /> | ||
<method v="2" /> | ||
</configuration> | ||
</component> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from nerwhal.core import recognize, evaluate, Pii, Config # noqa: F401 | ||
from nerwhal.core import recognize, evaluate # noqa: F401 | ||
from nerwhal.types import NamedEntity, Config # noqa: F401 | ||
import nerwhal.recognizer_bases # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from typing import Type | ||
|
||
from spacy.pipeline import EntityRuler | ||
|
||
from .base import Backend | ||
from nerwhal.recognizer_bases import EntityRulerRecognizer | ||
from nerwhal.types import NamedEntity | ||
from ..nlp_utils import load_nlp, configure_entity_extension_attributes, set_entity_extension_attributes | ||
|
||
configure_entity_extension_attributes() | ||
|
||
|
||
class EntityRulerBackend(Backend): | ||
def __init__(self, model_name): | ||
self.nlp = load_nlp(model_name, "tokenize,pos,lemma") | ||
|
||
def register_recognizer(self, recognizer_cls: Type[EntityRulerRecognizer]): | ||
recognizer = recognizer_cls(self.nlp) | ||
|
||
name = recognizer_cls.__name__ | ||
ruler = EntityRuler(self.nlp) | ||
self.nlp.add_pipe(ruler, name) | ||
rules = [{"label": recognizer.TAG, "pattern": pattern} for pattern in recognizer.patterns] | ||
ruler.add_patterns(rules) | ||
self.nlp.add_pipe(set_entity_extension_attributes(recognizer.SCORE, name), name="label_" + name, after=name) | ||
|
||
def run(self, text): | ||
doc = self.nlp(text) | ||
|
||
ents = [] | ||
for ent in doc.ents: | ||
ents += [NamedEntity(ent.start_char, ent.end_char, ent.label_, ent.text, ent._.score, ent._.model)] | ||
|
||
return ents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,29 @@ | ||
from typing import Type | ||
|
||
from flashtext import KeywordProcessor | ||
|
||
from nerwhal import Pii | ||
from nerwhal.backends.base import Backend | ||
from .base import Backend | ||
from nerwhal.types import NamedEntity | ||
from ..recognizer_bases import FlashtextRecognizer | ||
|
||
|
||
class FlashtextBackend(Backend): | ||
def __init__(self): | ||
self.keyword_processors = [] | ||
self.entities = [] | ||
self.precisions = [] | ||
self.score = [] | ||
|
||
def register_recognizer(self, recognizer_cls: Type[FlashtextRecognizer]): | ||
recognizer = recognizer_cls() | ||
|
||
def register_recognizer(self, recognizer): | ||
keyword_processor = KeywordProcessor() | ||
self.keyword_processors.append(keyword_processor.add_keywords_from_list(recognizer.keywords)) | ||
self.entities.append(recognizer.entity) | ||
self.precisions.append(recognizer.precision) | ||
self.entities.append(recognizer.TAG) | ||
self.score.append(recognizer.SCORE) | ||
|
||
def run(self, text): | ||
piis = [] | ||
for keyword_processor, entity, precision in zip(self.keyword_processors, self.entities, self.precisions): | ||
ents = [] | ||
for keyword_processor, entity, score in zip(self.keyword_processors, self.entities, self.score): | ||
keywords = keyword_processor.extract_keywords(text, span_info=True) | ||
piis += [Pii(start, end, entity, keyword, precision, "flashtext") for keyword, start, end in keywords] | ||
return piis | ||
ents += [NamedEntity(start, end, entity, keyword, score, "flashtext") for keyword, start, end in keywords] | ||
return ents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,26 @@ | ||
import re | ||
from typing import Type | ||
|
||
from nerwhal import Pii | ||
from nerwhal.backends.base import Backend | ||
from .base import Backend | ||
from nerwhal.types import NamedEntity | ||
from ..recognizer_bases import ReRecognizer | ||
|
||
|
||
class ReBackend(Backend): | ||
def __init__(self): | ||
self.compiled_regexps = [] | ||
self.entities = [] | ||
self.precisions = [] | ||
self.score = [] | ||
|
||
def register_recognizer(self, recognizer_cls: Type[ReRecognizer]): | ||
recognizer = recognizer_cls() | ||
|
||
def register_recognizer(self, recognizer): | ||
self.compiled_regexps += [re.compile(recognizer.regexp, flags=recognizer.flags)] | ||
self.entities.append(recognizer.entity) | ||
self.precisions.append(recognizer.precision) | ||
self.entities.append(recognizer.TAG) | ||
self.score.append(recognizer.SCORE) | ||
|
||
def run(self, text): | ||
piis = [] | ||
for pattern, entity, precision in zip(self.compiled_regexps, self.entities, self.precisions): | ||
piis += [Pii(m.start(), m.end(), entity, m.group(), precision, "re") for m in pattern.finditer(text)] | ||
return piis | ||
ents = [] | ||
for pattern, entity, score in zip(self.compiled_regexps, self.entities, self.score): | ||
ents += [NamedEntity(m.start(), m.end(), entity, m.group(), score, "re") for m in pattern.finditer(text)] | ||
return ents |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.