Skip to content

Commit

Permalink
Support span._. in component decorator attrs (explosion#4555)
Browse files Browse the repository at this point in the history
* Support span._. in component decorator attrs

* Adjust error [ci skip]
  • Loading branch information
ines authored Oct 30, 2019
1 parent 4e1de85 commit 85f2b04
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
13 changes: 8 additions & 5 deletions spacy/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import OrderedDict
from wasabi import Printer

from .tokens import Doc, Token
from .tokens import Doc, Token, Span
from .errors import Errors, Warnings, user_warning


Expand Down Expand Up @@ -78,12 +78,15 @@ def validate_attrs(values):
RETURNS (iterable): The checked attributes.
"""
data = dot_to_dict(values)
objs = {"doc": Doc, "token": Token}
objs = {"doc": Doc, "token": Token, "span": Span}
for obj_key, attrs in data.items():
if obj_key not in objs: # first element is not doc/token
if obj_key == "span":
span_attrs = [attr for attr in values if attr.startswith("span.")]
if obj_key == "span":
# Support Span only for custom extension attributes
span_attrs = [attr for attr in values if attr.startswith("span.")]
span_attrs = [attr for attr in span_attrs if not attr.startswith("span._.")]
if span_attrs:
raise ValueError(Errors.E180.format(attrs=", ".join(span_attrs)))
if obj_key not in objs: # first element is not doc/token/span
invalid_attrs = ", ".join(a for a in values if a.startswith(obj_key))
raise ValueError(Errors.E181.format(obj=obj_key, attrs=invalid_attrs))
if not isinstance(attrs, dict): # attr is something like "doc"
Expand Down
3 changes: 2 additions & 1 deletion spacy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,8 @@ class Errors(object):
"in a list. For example: matcher.add('{key}', [doc])")
E180 = ("Span attributes can't be declared as required or assigned by "
"components, since spans are only views of the Doc. Use Doc and "
"Token attributes only and remove the following: {attrs}")
"Token attributes (or custom extension attributes) only and remove "
"the following: {attrs}")
E181 = ("Received invalid attributes for unkown object {obj}: {attrs}. "
"Only Doc and Token attributes are supported.")
E182 = ("Received invalid attribute declaration: {attr}\nDid you forget "
Expand Down
3 changes: 2 additions & 1 deletion spacy/tests/pipeline/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def __call__(self, doc):


def test_analysis_validate_attrs_valid():
attrs = ["doc.sents", "doc.ents", "token.tag", "token._.xyz"]
attrs = ["doc.sents", "doc.ents", "token.tag", "token._.xyz", "span._.xyz"]
assert validate_attrs(attrs)
for attr in attrs:
assert validate_attrs([attr])
Expand All @@ -139,6 +139,7 @@ def test_analysis_validate_attrs_valid():
"token.tag_",
"token.tag.xyz",
"token._.xyz.abc",
"span.label",
],
)
def test_analysis_validate_attrs_invalid(attr):
Expand Down

0 comments on commit 85f2b04

Please sign in to comment.