-
Notifications
You must be signed in to change notification settings - Fork 93
/
Copy pathtext_readability_transformers.py
103 lines (60 loc) · 2.56 KB
/
text_readability_transformers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
Custom Recipe to extract Readability features from the text data
"""
from h2oaicore.transformer_utils import CustomTransformer
import datatable as dt
import numpy as np
import string
class ReadabilityTransformer:
_modules_needed_by_name = ['textstat==0.6.0']
_method = NotImplemented
_parallel_task = False
_testing_can_skip_failure = False # ensure tested as if shouldn't fail
@staticmethod
def do_acceptance_test():
return True
@staticmethod
def get_default_properties():
return dict(col_type="text", min_cols=1, max_cols=1, relative_importance=1)
def fit_transform(self, X: dt.Frame, y: np.array = None):
return self.transform(X)
def transform(self, X: dt.Frame):
import textstat
method = getattr(textstat, self.__class__._method)
return X.to_pandas().astype(str).iloc[:, 0].apply(lambda x: method(x))
class AvgSentenceLengthTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "avg_sentence_length"
class AvgSyllablesPerWordTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "avg_syllables_per_word"
class AvgCharacterPerWordTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "avg_character_per_word"
class SyllableCountTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "syllable_count"
class PolySyllableCountTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "polysyllabcount"
class SmogIndexTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "smog_index"
class GunningFogTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "gunning_fog"
class FleschReadingEaseTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "flesch_reading_ease"
class ColemanLiauIndexTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "coleman_liau_index"
class AutomatedReadabilityIndexTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "automated_readability_index"
class DaleChallReadabilityScoreTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "dale_chall_readability_score"
class LinsearWriteFormulaTransformer(ReadabilityTransformer, CustomTransformer):
_unsupervised = True
_method = "linsear_write_formula"