Skip to content

Commit

Permalink
Add support for Universal Dependencies v2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rominf committed Mar 3, 2017
1 parent 8dff040 commit 66e1109
Show file tree
Hide file tree
Showing 14 changed files with 155 additions and 74 deletions.
6 changes: 4 additions & 2 deletions spacy/attrs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ NAMES = [key for key, value in sorted(IDS.items(), key=lambda item: item[1])]

def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
'''Normalize a dictionary of attributes, converting them to ints.
Arguments:
stringy_attrs (dict):
Dictionary keyed by attribute string names. Values can be ints or strings.
Expand Down Expand Up @@ -125,7 +125,9 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
'VerbForm', 'PronType', 'Aspect', 'Tense', 'PartType', 'Poss',
'Hyph', 'ConjType', 'NumType', 'Foreign', 'VerbType', 'NounType',
'Number', 'PronType', 'AdjType', 'Person', 'Variant', 'AdpType',
'Reflex', 'Negative', 'Mood', 'Aspect', 'Case']
'Reflex', 'Negative', 'Mood', 'Aspect', 'Case',
'Polarity', # U20
]
for key in morph_keys:
if key in stringy_attrs:
stringy_attrs.pop(key)
Expand Down
2 changes: 1 addition & 1 deletion spacy/de/tag_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"PRF": {POS: PRON, "PronType": "prs", "Reflex": "yes"},
"PTKA": {POS: PART},
"PTKANT": {POS: PART, "PartType": "res"},
"PTKNEG": {POS: PART, "Negative": "yes"},
"PTKNEG": {POS: PART, "Polarity": "Neg"},
"PTKVZ": {POS: PART, "PartType": "vbp"},
"PTKZU": {POS: PART, "PartType": "inf"},
"PWAT": {POS: DET, "PronType": "int"},
Expand Down
2 changes: 1 addition & 1 deletion spacy/en/tag_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"$": {POS: SYM, "Other": {"SymType": "currency"}},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
"AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CONJ, "ConjType": "coor"},
"CC": {POS: CCONJ, "ConjType": "coor"},
"CD": {POS: NUM, "NumType": "card"},
"DT": {POS: DET},
"EX": {POS: ADV, "AdvType": "ex"},
Expand Down
1 change: 1 addition & 0 deletions spacy/language_data/tag_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"AUX": {POS: AUX},
"X": {POS: X},
"CONJ": {POS: CONJ},
"CCONJ": {POS: CCONJ}, # U20
"ADJ": {POS: ADJ},
"VERB": {POS: VERB},
"PART": {POS: PART}
Expand Down
6 changes: 5 additions & 1 deletion spacy/morphology.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ cdef class Morphology:
cdef int assign_tag(self, TokenC* token, tag) except -1

cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1

cdef int assign_feature(self, uint64_t* morph, univ_morph_t feat_id, bint value) except -1


Expand Down Expand Up @@ -80,6 +80,7 @@ cpdef enum univ_morph_t:
Definite_two
Definite_def
Definite_red
Definite_cons # U20
Definite_ind
Degree_cmp
Degree_comp
Expand All @@ -103,6 +104,8 @@ cpdef enum univ_morph_t:
Negative_neg
Negative_pos
Negative_yes
Polarity_neg # U20
Polarity_pos # U20
Number_com
Number_dual
Number_none
Expand Down Expand Up @@ -151,6 +154,7 @@ cpdef enum univ_morph_t:
VerbForm_partPres
VerbForm_sup
VerbForm_trans
VerbForm_conv # U20
VerbForm_gdv # la
Voice_act
Voice_cau
Expand Down
4 changes: 4 additions & 0 deletions spacy/morphology.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ IDS = {
"Definite_two": Definite_two,
"Definite_def": Definite_def,
"Definite_red": Definite_red,
"Definite_cons": Definite_cons, # U20
"Definite_ind": Definite_ind,
"Degree_cmp": Degree_cmp,
"Degree_comp": Degree_comp,
Expand All @@ -215,6 +216,8 @@ IDS = {
"Negative_neg": Negative_neg,
"Negative_pos": Negative_pos,
"Negative_yes": Negative_yes,
"Polarity_neg": Polarity_neg, # U20
"Polarity_pos": Polarity_pos, # U20
"Number_com": Number_com,
"Number_dual": Number_dual,
"Number_none": Number_none,
Expand Down Expand Up @@ -263,6 +266,7 @@ IDS = {
"VerbForm_partPres": VerbForm_partPres,
"VerbForm_sup": VerbForm_sup,
"VerbForm_trans": VerbForm_trans,
"VerbForm_conv": VerbForm_conv, # U20
"VerbForm_gdv ": VerbForm_gdv, # la,
"Voice_act": Voice_act,
"Voice_cau": Voice_cau,
Expand Down
1 change: 1 addition & 0 deletions spacy/parts_of_speech.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cpdef enum univ_pos_t:
ADV
AUX
CONJ
CCONJ # U20
DET
INTJ
NOUN
Expand Down
3 changes: 2 additions & 1 deletion spacy/parts_of_speech.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ IDS = {
"ADP": ADP,
"ADV": ADV,
"AUX": AUX,
"CONJ": CONJ,
"CONJ": CONJ, # U20
"CCONJ": CCONJ,
"DET": DET,
"INTJ": INTJ,
"NOUN": NOUN,
Expand Down
42 changes: 38 additions & 4 deletions spacy/symbols.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cpdef enum symbol_t:
LIKE_EMAIL
IS_STOP
IS_OOV

FLAG14 = 14
FLAG15
FLAG16
Expand Down Expand Up @@ -90,6 +90,7 @@ cpdef enum symbol_t:
ADV
AUX
CONJ
CCONJ # U20
DET
INTJ
NOUN
Expand All @@ -107,11 +108,14 @@ cpdef enum symbol_t:

Animacy_anim
Animacy_inam
Animacy_hum # U20
Aspect_freq
Aspect_imp
Aspect_mod
Aspect_none
Aspect_perf
Aspect_iter # U20
Aspect_hab # U20
Case_abe
Case_abl
Case_abs
Expand All @@ -120,10 +124,12 @@ cpdef enum symbol_t:
Case_all
Case_cau
Case_com
Case_cmp # U20
Case_dat
Case_del
Case_dis
Case_ela
Case_equ # U20
Case_ess
Case_gen
Case_ill
Expand All @@ -142,7 +148,9 @@ cpdef enum symbol_t:
Definite_two
Definite_def
Definite_red
Definite_cons # U20
Definite_ind
Definite_spec # U20
Degree_cmp
Degree_comp
Degree_none
Expand All @@ -151,6 +159,8 @@ cpdef enum symbol_t:
Degree_abs
Degree_com
Degree_dim # du
Degree_equ # U20
Evident_nfh # U20
Gender_com
Gender_fem
Gender_masc
Expand All @@ -162,16 +172,21 @@ cpdef enum symbol_t:
Mood_pot
Mood_sub
Mood_opt
Mood_prp # U20
Mood_adm # U20
Negative_neg
Negative_pos
Negative_yes
Polarity_neg # U20
Polarity_pos # U20
Number_com
Number_dual
Number_none
Number_plur
Number_sing
Number_ptan # bg
Number_count # bg
Number_count # bg, U20
Number_tri # U20
NumType_card
NumType_dist
NumType_frac
Expand All @@ -197,7 +212,8 @@ cpdef enum symbol_t:
PronType_rel
PronType_tot
PronType_clit
PronType_exc # es, ca, it, fa
PronType_exc # es, ca, it, fa, U20
PronType_emp # U20
Reflex_yes
Tense_fut
Tense_imp
Expand All @@ -213,12 +229,17 @@ cpdef enum symbol_t:
VerbForm_partPres
VerbForm_sup
VerbForm_trans
VerbForm_conv # U20
VerbForm_gdv # la
VerbForm_vnoun # U20
Voice_act
Voice_cau
Voice_pass
Voice_mid # gkc
Voice_mid # gkc, U20
Voice_int # hb
Voice_antip # U20
Voice_dir # U20
Voice_inv # U20
Abbr_yes # cz, fi, sl, U
AdpType_prep # cz, U
AdpType_post # U
Expand Down Expand Up @@ -284,6 +305,10 @@ cpdef enum symbol_t:
Number_psee_plur # U
Number_psor_sing # cz, fi, sl, U
Number_psor_plur # cz, fi, sl, U
Number_pauc # U20
Number_grpa # U20
Number_grpl # U20
Number_inv # U20
NumForm_digit # cz, sl, U
NumForm_roman # cz, sl, U
NumForm_word # cz, sl, U
Expand Down Expand Up @@ -311,6 +336,8 @@ cpdef enum symbol_t:
Person_psor_one # fi, U
Person_psor_two # fi, U
Person_psor_three # fi, U
Person_zero # U20
Person_four # U20
Polite_inf # bq, U
Polite_pol # bq, U
Polite_abs_inf # bq, U
Expand All @@ -319,6 +346,10 @@ cpdef enum symbol_t:
Polite_erg_pol # bq, U
Polite_dat_inf # bq, U
Polite_dat_pol # bq, U
Polite_infm # U20
Polite_form # U20
Polite_form_elev # U20
Polite_form_humb # U20
Prefix_yes # U
PrepCase_npr # cz
PrepCase_pre # U
Expand Down Expand Up @@ -383,6 +414,7 @@ cpdef enum symbol_t:
ccomp
complm
conj
cop # U20
csubj
csubjpass
dep
Expand All @@ -405,6 +437,8 @@ cpdef enum symbol_t:
num
number
oprd
obj # U20
obl # U20
parataxis
partmod
pcomp
Expand Down
Loading

0 comments on commit 66e1109

Please sign in to comment.