-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlesson3-series.py
90 lines (58 loc) · 1.54 KB
/
lesson3-series.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 25 08:18:05 2018
@author: lpa2a
"""
# import convention
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
# series
ser = Series([4, 7, -5, 3])
print(ser)
ser = Series([4,'pizza',-5,3])
print(ser)
type(ser)
# index on the left and values on the right
# to recover values
ser.values
type(ser.values) # numpy.ndarray
# to recover indecies
ser.index
# setting indecies
ser2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
# two ways to index
print(ser2[0])
print(ser2['d'])
# the return of fancy indexing
print(ser2[['a','b','c']])
# our usual tricks still work
print(ser2[ser2>0])
print(ser2*2)
print(np.exp(ser2))
# and many more
# before we've generated from lists, here's for dicts
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
ser3 = Series(sdata)
print(ser3)
# what if we have a different format to the dict
sdata2 = {'Ohio': {'baseball':35000}, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
ser3b = Series(sdata2)
print(ser3b)
# now lets set the indecies by hand
states = ['California', 'Ohio', 'Oregon', 'Texas']
ser4 = Series(sdata, index=states)
print(ser4)
### NaN (not a number) which is considered in pandas to mark missing or NA values.
# use these guys to detect NaN
print(pd.isnull(ser4))
print(pd.notnull(ser4))
### major point: indecies are aligned for operations
print(ser3)
print(ser4)
print(ser3+ser4)
# you can edit a series index in place
ser4.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
print(ser4)
ser4