-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimprover.py
executable file
·79 lines (54 loc) · 1.81 KB
/
improver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/python
from __future__ import division
import re
try:
import nltk.corpus
except ImportError:
print "install python-nltk"
exit(1)
try:
import numpy as np
except ImportError:
print "install python-numpy"
exit(1)
print "all modules have been installed"
corpus_root = './texts/' #looks for text files in "./texts" directory
text=PlaintextCorpusReader(corpus_root, '.*txt') #loads all files w/ name ending in txt
#class DocFixer:
def rule_sentlen(text,maxlimit): #avg. length of sentences should be <threshold
avgsenlength=len(text.words())/len(text.sents())
return avgsenlength<maxlimit
def rule_commas(text,maxlimit): #number of commas should not exceed threshold
allsents=text.sents()
for sent in allsents:
if sent.count(',')>maxlimit:
print "More commas than %s in this sentence: %s"%(maxlimit, ' '.join(sent))
print
def rule_variations(text,varlist): #if many variations of same expression are used, suggests using one option only
wds=text.words()
for var in varlist:
kinds=0
for opt in var:
if opt in wds:
kinds+=1
if kinds>1:
print "You have used these variations of the same word: %s. Consider using only one."%(','.join(var))
print
def rule_wrong_context(text,word,context,alt): #if word is used in context, suggests using alt
sents=text.sents()
for sent in sents:
concat=' '.join(sent)
if context in concat and word in concat:
print "Consiver using \'%s\' instead of \'%s\' in this sentence: %s"%(alt,word,concat)
print "Average length of sentences is: %s"%("Ok" if rule_sentlen(text,12) else "too long")
print
rule_commas(text,4)
rule_variations(text,[("Fig", "Figure")])
rule_wrong_context(text, 'Letter', 'Physical Review', 'Letters')
#
#{prl}
#sentence length<15
#abstract length<300
#output suggestion:
#line number/paragraph number
#errorcode