Skip to content

Commit

Permalink
ENH: Check for whitespace (#616)
Browse files Browse the repository at this point in the history
  • Loading branch information
larsoner authored Aug 12, 2018
1 parent 0e92056 commit e3c9652
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 65 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ dist
ld
codespell.egg-info
*.pyc
*.orig
.cache/
.pytest_cache/
33 changes: 0 additions & 33 deletions codespell_lib/data/dictionary.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
1nd->1st
a a->a
a bout->about, a bout,
aare->are
abailable->available
abandonded->abandoned
Expand Down Expand Up @@ -419,7 +417,6 @@ affilate->affiliate
affilliate->affiliate
affort->afford, effort,
affraid->afraid
afore mentioned->aforementioned
aforememtioned->aforementioned
aforementionned->aforementioned
aformentioned->aforementioned
Expand Down Expand Up @@ -678,12 +675,6 @@ amoutns->amounts
amuch->much
amung->among
amunition->ammunition
an an->an
an other->another
an we->and we
an will->and will
an window->a window
an with->and with
analagous->analogous
analises->analysis, analyses,
analitic->analytic
Expand Down Expand Up @@ -711,7 +702,6 @@ ancapsulate->encapsulate
ancestory->ancestry
anchord->anchored
ancilliary->ancillary
and and->and
andd->and
andlers->handlers, antlers,
androgenous->androgynous
Expand Down Expand Up @@ -1398,7 +1388,6 @@ basicly->basically
batery->battery
bcak->back
bcause->because
be be->be
beacause->because
beachead->beachhead
beacuse->because
Expand Down Expand Up @@ -2197,7 +2186,6 @@ cnat->can't
cnter->center
co-incided->coincided
cobvers->covers
Coca Cola->Coca-Cola
coctail->cocktail
codepoitn->codepoint
codespel->codespell
Expand Down Expand Up @@ -4169,7 +4157,6 @@ dota->data
doub->doubt, daub,
doube->double
doubel->double
double click->double-click
doubleclick->double-click
doucment->document
doulbe->double
Expand Down Expand Up @@ -4200,7 +4187,6 @@ dreasm->dreams
dreawn->drawn
driectly->directly
drnik->drink
drop down->drop-down
dropable->droppable
droped->dropped
droping->dropping
Expand Down Expand Up @@ -5988,7 +5974,6 @@ idicates->indicates
idicating->indicating
idiosyncracy->idiosyncrasy
idividual->individual
if if->if
iff->if, disabled due to valid mathematical concept
ignonre->ignore
ignorence->ignorance
Expand Down Expand Up @@ -6925,7 +6910,6 @@ irrelvant->irrelevant
irreplacable->irreplaceable
irresistable->irresistible
irresistably->irresistibly
is is->is, it is, is it,
is'nt->isn't
isconnection->isconnected
iserting->inserting
Expand All @@ -6948,7 +6932,6 @@ istance->instance
istead->instead
istener->listeners
isue->issue
it it->it, it is, is it,
iteger->integer
iterater->iterator
iteratered->iterated
Expand All @@ -6967,7 +6950,6 @@ itializing->initializing
itnernal->internal
itnervals->intervals
itnroduced->introduced
its is->it is, it's,
itsef->itself
itselfs->itself
itselt->itself
Expand Down Expand Up @@ -7889,7 +7871,6 @@ muscial->musical
muscician->musician
muscicians->musicians
musn't->mustn't
must aligned->must align, must be aligned,
mustator->mutator
muste->must
mut->must, mutt, moot,
Expand Down Expand Up @@ -8065,8 +8046,6 @@ normnal->normal
northen->northern
northereastern->northeastern
nortmally->normally
not either->neither
not quit->not quite
notabley->notably
notaion->notation
notasion->notation
Expand Down Expand Up @@ -8233,7 +8212,6 @@ ocurrence->occurrence
ocurrences->occurrences
oder->order, odor,
oen->one
of of->of
offcers->officers
offcially->officially
offereings->offerings
Expand Down Expand Up @@ -8389,7 +8367,6 @@ optmizations->optimizations
optmize->optimize
optmized->optimized
optomism->optimism
or or->or
orded->ordered
orderd->ordered
orgamise->organise
Expand Down Expand Up @@ -8481,7 +8458,6 @@ ouputarea->outputarea
ouputs->outputs
ouputted->outputted
ouputting->outputting
our our->our
ourselfs->ourselves
ourselve->ourselves
ourselvs->ourselves
Expand Down Expand Up @@ -11862,14 +11838,12 @@ thansk->thanks
thant->than
thare->there
thast->that, that's,
that that->that that, that, that the, that they, that this,
that;s->that's
thatn->that, than,
thats'->that's
thats->that's
thats;->that's
thck->thick
the the->the
theather->theater
theer->there
theese->these
Expand Down Expand Up @@ -11899,7 +11873,6 @@ thess->this, these,
thest->test
thether->tether, whether,
thev->the
they they->they
theyre->they're
thgat->that
thge->the
Expand All @@ -11917,7 +11890,6 @@ thikns->thinks
thimngs->things
thinigs->things
thinn->thin
this this->this, this is, is this,
thise->these
thist->this
thiunk->think
Expand Down Expand Up @@ -11959,7 +11931,6 @@ thron->thrown, throne,
throrough->thorough
throttoling->throttling
throug->through
through out->throughout
throughly->thoroughly
throught->thought, through, throughout,
througout->throughout
Expand Down Expand Up @@ -12023,7 +11994,6 @@ tkae->take
tkaes->takes
tkaing->taking
tlaking->talking
to to->to, to do,
tobbaco->tobacco
tobot->robot
toches->touches
Expand Down Expand Up @@ -12958,8 +12928,6 @@ warnig->warning
warnigs->warnings
warrent->warrant
warrriors->warriors
was occured->has occurred
was occurred->has occurred
was'nt->wasn't
was't->wasn't
was;t->wasn't
Expand All @@ -12978,7 +12946,6 @@ wavelenght->wavelength
wavelenghts->wavelengths
wavelnes->wavelines
wayword->wayward
we we->we
weant->want, wean,
weaponary->weaponry
weas->was
Expand Down
32 changes: 0 additions & 32 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
import os.path as op
import subprocess
import re
import sys
import tempfile
import warnings
Expand Down Expand Up @@ -398,37 +397,6 @@ def FakeStdin(text):
sys.stdin = oldin


def test_dictionary_formatting():
"""Test that all dictionary entries are in lower case and non-empty."""
err_dict = dict()
with open(op.join(op.dirname(__file__), '..', 'data',
'dictionary.txt'), 'rb') as fid:
for line in fid:
err, rep = line.decode('utf-8').split('->')
err = err.lower()
assert err not in err_dict, 'entry already exists'
rep = rep.rstrip('\n')
assert len(rep) > 0, ('%s: correction %r must be non-empty'
% (err, rep))
assert not re.match('^\s.*', rep), ('%s: correction %r cannot '
'start with whitespace'
% (err, rep))
if rep.count(','):
if not rep.endswith(','):
assert 'disabled' in rep.split(',')[-1], \
('currently corrections must end with trailing "," (if'
' multiple corrections are available) or '
'have "disabled" in the comment')
err_dict[err] = rep
reps = [r.strip() for r in rep.lower().split(',')]
reps = [r for r in reps if len(r)]
unique = list()
for r in reps:
if r not in unique:
unique.append(r)
assert reps == unique, 'entries are not (lower-case) unique'


def test_case_handling(reload_codespell_lib):
"""Test that capitalized entries get detected properly."""
# Some simple Unicode things
Expand Down
37 changes: 37 additions & 0 deletions codespell_lib/tests/test_dictionary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-

import os.path as op
import re


def test_dictionary_formatting():
"""Test that all dictionary entries are in lower case and non-empty."""
err_dict = dict()
ws = re.compile(r'.*\s.*') # whitespace
with open(op.join(op.dirname(__file__), '..', 'data',
'dictionary.txt'), 'rb') as fid:
for line in fid:
err, rep = line.decode('utf-8').split('->')
err = err.lower()
assert err not in err_dict, 'error %r already exists' % err
assert ws.match(err) is None, 'error %r has whitespace' % err
rep = rep.rstrip('\n')
assert len(rep) > 0, ('error %s: correction %r must be non-empty'
% (err, rep))
assert not re.match('^\s.*', rep), ('error %s: correction %r '
'cannot start with whitespace'
% (err, rep))
if rep.count(','):
if not rep.endswith(','):
assert 'disabled' in rep.split(',')[-1], \
('currently corrections must end with trailing "," (if'
' multiple corrections are available) or '
'have "disabled" in the comment')
err_dict[err] = rep
reps = [r.strip() for r in rep.lower().split(',')]
reps = [r for r in reps if len(r)]
unique = list()
for r in reps:
if r not in unique:
unique.append(r)
assert reps == unique, 'entries are not (lower-case) unique'

0 comments on commit e3c9652

Please sign in to comment.