forked from FilippoBovo/production-data-science
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_data.py
71 lines (65 loc) · 3.05 KB
/
test_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from titanic import data
import pandas as pd
from pandas.util.testing import assert_frame_equal
def test_extract_title():
df = pd.DataFrame.from_dict(
{
'Capt': 'Crosby, Capt. Edward Gifford',
'Col': 'Simonius-Blumer, Col. Oberst Alfons',
'Don': 'Uruchurtu, Don. Manuel E',
'Dr': 'Minahan, Dr. William Edward',
'Jonkheer': 'Reuchlin, Jonkheer. John George',
'Lady': 'Duff Gordon, Lady. (Lucille Christiana Sutherland) ("Mrs Morgan")',
'Major': 'Peuchen, Major. Arthur Godfrey',
'Master': 'Palsson, Master. Gosta Leonard',
'Miss': 'Heikkinen, Miss. Laina',
'Mlle': 'Sagesser, Mlle. Emma',
'Mme': 'Aubart, Mme. Leontine Pauline',
'Mr': 'Braund, Mr. Owen Harris',
'Mrs': 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)',
'Ms': 'Reynaldo, Ms. Encarnacion',
'Rev': 'Byles, Rev. Thomas Roussel Davids',
'Sir': 'Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")',
'the Countess': 'Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)'
},
orient='index',
).rename(columns={0: 'Name'})
result = data.extract_title(df)
expected = pd.DataFrame.from_dict(
{'Name': {'Capt': 'Crosby, Capt. Edward Gifford',
'Col': 'Simonius-Blumer, Col. Oberst Alfons',
'Don': 'Uruchurtu, Don. Manuel E',
'Dr': 'Minahan, Dr. William Edward',
'Jonkheer': 'Reuchlin, Jonkheer. John George',
'Lady': 'Duff Gordon, Lady. (Lucille Christiana Sutherland) ("Mrs Morgan")',
'Major': 'Peuchen, Major. Arthur Godfrey',
'Master': 'Palsson, Master. Gosta Leonard',
'Miss': 'Heikkinen, Miss. Laina',
'Mlle': 'Sagesser, Mlle. Emma',
'Mme': 'Aubart, Mme. Leontine Pauline',
'Mr': 'Braund, Mr. Owen Harris',
'Mrs': 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)',
'Ms': 'Reynaldo, Ms. Encarnacion',
'Rev': 'Byles, Rev. Thomas Roussel Davids',
'Sir': 'Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")',
'the Countess': 'Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)'},
'Title': {'Capt': 'Officer',
'Col': 'Officer',
'Don': 'Royalty',
'Dr': 'Officer',
'Jonkheer': 'Royalty',
'Lady': 'Royalty',
'Major': 'Officer',
'Master': 'Master',
'Miss': 'Miss',
'Mlle': 'Miss',
'Mme': 'Mrs',
'Mr': 'Mr',
'Mrs': 'Mrs',
'Ms': 'Mrs',
'Rev': 'Officer',
'Sir': 'Royalty',
'the Countess': 'Royalty'}}
)
expected['Title'] = expected['Title'].astype('category')
assert_frame_equal(result, expected)