-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtestProvisioning.py
292 lines (239 loc) · 13.2 KB
/
testProvisioning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import boto3
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import requests
import json
from creditCalculation import calculate_credits, plot_credits
# instance type 종류
instance_types = [
{"name": "t3.nano", "price": 0.0052, "vCPU": 2, "base_util": 0.05 ,"memory": "0.5GiB"},
{"name": "t3.micro", "price": 0.0104, "vCPU": 2,"base_util": 0.1, "memory": "1GiB"},
{"name": "t3.small", "price": 0.0208, "vCPU": 2,"base_util": 0.2, "memory": "2GiB"},
{"name": "t3.medium", "price": 0.0416, "vCPU": 2, "base_util": 0.2,"memory": "4GiB"},
{"name": "t3.large", "price": 0.0832, "vCPU": 2,"base_util": 0.3, "memory": "8GiB"},
{"name": "t3.xlarge", "price": 0.1664, "vCPU": 4,"base_util": 0.3, "memory": "16GiB"},
{"name": "t3.2xlarge", "price": 0.3328, "vCPU": 8, "base_util": 0.4,"memory": "32GiB"},
{"name": "m4.large", "price": 0.10, "vCPU": 2, "memory": "8GiB"},
{"name": "m4.xlarge", "price": 0.20, "vCPU": 4, "memory": "16GiB"},
{"name": "m4.2xlarge", "price": 0.40, "vCPU": 8, "memory": "32GiB"},
{"name": "m4.4xlarge", "price": 0.80, "vCPU": 16, "memory": "64GiB"},
{"name": "m4.10xlarge", "price": 2.00, "vCPU": 40, "memory": "160GiB"},
{"name": "m4.16xlarge", "price": 3.20, "vCPU": 64, "memory": "256GiB"},
{"name": "m5.large", "price": 0.096, "vCPU": 2, "memory": "8GiB"},
{"name": "m5.xlarge", "price": 0.192, "vCPU": 4, "memory": "16GiB"},
{"name": "m5.2xlarge", "price": 0.384, "vCPU": 8, "memory": "32GiB"},
{"name": "m5.4xlarge", "price": 0.768, "vCPU": 16, "memory": "64GiB"},
{"name": "m5.8xlarge", "price": 1.536, "vCPU": 32, "memory": "128GiB"},
{"name": "m5.12xlarge", "price": 2.304, "vCPU": 48, "memory": "192GiB"},
{"name": "m5.16xlarge", "price": 3.072, "vCPU": 64, "memory": "256GiB"},
{"name": "m5.24xlarge", "price": 4.608, "vCPU": 96, "memory": "384GiB"},
{"name": "m5.metal", "price": 4.608, "vCPU": 96, "memory": "384GiB"},
{"name": "c4.large", "price": 0.10, "vCPU": 2, "memory": "3.75GiB"},
{"name": "c4.xlarge", "price": 0.199, "vCPU": 4, "memory": "7.5GiB"},
{"name": "c4.2xlarge", "price": 0.398, "vCPU": 8, "memory": "15GiB"},
{"name": "c4.4xlarge", "price": 0.796, "vCPU": 16, "memory": "30GiB"},
{"name": "c4.8xlarge", "price": 1.591, "vCPU": 36, "memory": "60GiB"},
{"name": "c5.large", "price": 0.085, "vCPU": 2, "memory": "4GiB"},
{"name": "c5.xlarge", "price": 0.17, "vCPU": 4, "memory": "8GiB"},
{"name": "c5.2xlarge", "price": 0.34, "vCPU": 8, "memory": "16GiB"},
{"name": "c5.4xlarge", "price": 0.68, "vCPU": 16, "memory": "32GiB"},
{"name": "c5.9xlarge", "price": 1.53, "vCPU": 36, "memory": "72GiB"},
{"name": "c5.12xlarge", "price": 2.04, "vCPU": 48, "memory": "96GiB"},
{"name": "c5.18xlarge", "price": 3.06, "vCPU": 72, "memory": "144GiB"},
{"name": "c5.24xlarge", "price": 4.08, "vCPU": 96, "memory": "192GiB"},
{"name": "c5.metal", "price": 4.08, "vCPU": 96, "memory": "192GiB"},
]
# CloudWatch 클라이언트 생성
cloudwatch = boto3.client('cloudwatch')
# 지난 7일 동안의 데이터를 가져오기 위한 시작 시간과 종료 시간 설정
end_time = datetime.utcnow()
start_time = end_time - timedelta(days=1)
metrics = ['CPUUtilization', 'CPUCreditBalance', 'CPUCreditUsage', 'mem_used_percent']
# 모든 인스턴스 가져오기
ec2 = boto3.client('ec2')
response = ec2.describe_instances()
# 'eta-'로 시작하는 인스턴스 ID 목록 생성
eta_instance_ids = []
for reservation in response["Reservations"]:
for instance in reservation["Instances"]:
instance_id = instance["InstanceId"]
for tag in instance["Tags"]:
if tag["Key"] == "Name" and tag["Value"].startswith("eta-"):
eta_instance_ids.append(instance_id)
# 서버 정보를 저장할 리스트
servers = []
# Instance Data 5분 단위로 가져오기
def get_metric_data(metric_name, namespace):
response = cloudwatch.get_metric_statistics(
Namespace=namespace,
MetricName=metric_name,
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
StartTime=start_time,
EndTime=end_time,
Period=300,
Statistics=['Average']
)
if response['Datapoints']:
df = pd.DataFrame(response['Datapoints'])
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df = df.set_index('Timestamp')
df = df.sort_index()
return df
else:
print(f"No data for {metric_name}")
return pd.DataFrame()
return pd.DataFrame(response['Datapoints']).set_index('Timestamp').sort_index()
# peak 횟수 & 지속 시간
def count_peaks_and_duration(data, threshold, metric_name):
over_threshold = data[metric_name]['Average'] > threshold
changes = over_threshold.ne(over_threshold.shift())
peaks_start = data[metric_name][changes & over_threshold].index
peaks_end = data[metric_name][changes & ~over_threshold].index
if len(peaks_start) == 0 or len(peaks_end) == 0:
print(f"{metric_name}에서 피크를 찾을 수 없습니다.")
return 0,0,0,0
if peaks_start[0] > peaks_end[0]: # 첫 번째 피크 종료 시간이 첫 번째 피크 시작 시간보다 먼저인 경우
peaks_end = peaks_end[1:]
if len(peaks_start) > len(peaks_end): # 마지막 피크가 종료되지 않은 경우
peaks_end = peaks_end.append(pd.Index([data[metric_name].index[-1]]))
durations = peaks_end - peaks_start
durations_in_seconds = np.array([duration.total_seconds() for duration in durations])
# 피크 기간 동안의 CPU 활용률 평균 계산
peak_cpu_utilization_means = []
peak_cpu_utilization_max = []
for start, end in zip(peaks_start, peaks_end):
peak_data = data[metric_name][(data[metric_name].index >= start) & (data[metric_name].index <= end)]
peak_cpu_utilization_means.append(peak_data['Average'].mean())
peak_cpu_utilization_max.append(peak_data['Average'].max())
return len(peaks_start), durations_in_seconds.mean(),np.mean(peak_cpu_utilization_means),max(peak_cpu_utilization_max) # peak 갯수 return
# Overprovisioning, UnderProvisioning, Optimized 상태 판단
def check_provisioning_status(data):
cpu_avg = data['CPUUtilization']['Average'].mean()
# memory agent 설치 안한 경우
try:
memory_avg = data['mem_used_percent']['Average'].mean()
except (KeyError, AttributeError):
memory_avg = 25
# 필요한 vCPU 수
target_cpu_usage = 50
required_vcpus = 2 * (cpu_avg / target_cpu_usage)
if instance_type.startswith('t'):
credit_avg = data['CPUCreditBalance']['Average'].mean()
recommand_type = None
peak_num ,avg_duration,avg_cpu_usage_peak,max_cpu_usage_peak = count_peaks_and_duration(data, 40, 'CPUUtilization')
cpu_usage_values = data['CPUUtilization']['Average'].tolist()
# t type 이 다른 type 보다 저렴하므로 항상 고려
# 't'로 시작하는 인스턴스 타입들을 순회
for instance in filter(lambda i: i['name'].startswith('t'), instance_types):
vcpu_count, baseline_utilization = instance['vCPU'], instance['base_util']
# 필요한 CPU를 충족시키지 못하면 다음 't' 타입 인스턴스로 넘어감
if vcpu_count < required_vcpus:
continue
# 크레딧 계산
credits = calculate_credits(cpu_usage_values, vcpu_count, baseline_utilization)
# 크레딧이 0 이하면 다음 인스턴스 타입으로 넘어감
if credits is None:
continue
# 크레딧 시각화
t_recommand_type = instance['name']
plot_credits(credits, instance_name,instance_type)
break
# 't' type 인스턴스 가격
t_price = next(i for i in instance_types if i['name'] == t_recommand_type)['price']
# 'm' 또는 'c' 타입 인스턴스 추천 받기
recommand_type, recommand_price = recommend_instance_type(required_vcpus)
# 't' type 인스턴스가 더 비싸면 'm' 또는 'c' 타입 인스턴스를 추천
if t_price > recommand_price:
recommand_type = recommand_type
else:
recommand_type = t_recommand_type
# 현재 인스턴스 타입의 가격 (시간당)
current_price = next(i for i in instance_types if i['name'] == instance_type)['price']
# 추천 인스턴스 타입의 가격 (시간당)
recommand_price = next(i for i in instance_types if i['name'] == recommand_type)['price']
# 현재 인스턴스 타입과 추천 인스턴스 타입의 하루 비용 계산 및 출력
current_daily_cost = current_price * 24
recommand_daily_cost = recommand_price * 24
server_info = {
'name': instance_name,
'type' : instance_type,
'cpu': round(cpu_avg, 3),
'memory': round(memory_avg, 3),
'credit': round(credit_avg, 3) if instance_type.startswith('t') else "N/A",
'peak_num': peak_num,
'avg_duration': round(avg_duration, 3) if instance_type.startswith('t') else "N/A",
'avg_cpu_usage_peak': round(avg_cpu_usage_peak, 3) if instance_type.startswith('t') else "N/A",
'max_cpu_usage_peak': round(max_cpu_usage_peak, 3) if instance_type.startswith('t') else "N/A",
'recommended_type': recommand_type,
'current_cost': round(current_daily_cost, 3),
'recommended_cost': round(recommand_daily_cost, 3)
}
return server_info
def recommend_instance_type(required_vcpus):
instance_candidates = []
instance_types_to_consider = ['m', 'c']
for instance_type in instance_types_to_consider:
instance_candidates.append(min((i for i in instance_types if i['name'].startswith(instance_type) and i['vCPU'] >= required_vcpus), key=lambda x: x['price']))
selected_instance = min(instance_candidates, key=lambda x: x['price'])
return selected_instance['name'], selected_instance['price']
def send_to_slack(server):
webhook_url = os.environment["SLACK_WEBHOOK_URL"]
image_url = f"https://eta-credit-balance-graph.s3.us-east-2.amazonaws.com/{datetime.today().strftime('%Y-%m-%d')}/{server['name']}_{server['type']}.png"
# 메시지 생성
message = f"안녕하세요! :wave: \n *{server['type']} type의 {server['name']}* 서버의 성능 리포트를 전해드릴게요:mag_right:\n\n"
message += ":cloud: *현재 CPU 사용량*\n"
server_message = f" - 평균 CPU 사용률: {server['cpu']}%\n - 평균 메모리 사용률: {server['memory']}%\n"
if server['type'].startswith('t'):
server_message += f" - CPU 크레딧 밸런스: {server['credit']}\n"
if server['peak_num'] > 0:
server_message += f":cloud: *T type Peak 분석*\n - 피크 수: {server['peak_num']}\n - 평균 피크 지속시간: {server['avg_duration']}s\n - 피크 기간 동안 CPU 평균 활용률: {server['avg_cpu_usage_peak']}%\n - 피크 기간 동안 CPU 최대 활용률: {server['max_cpu_usage_peak']}%\n"
else:
server_message += ":cloud: 피크가 존재하지 않습니다.\n"
server_message += ":rocket: *사용량을 기반으로 EC2 type 을 추천합니다*\n"
server_message += f" - 추천 서버 타입: {server['recommended_type']}\n - 현재 일일 비용: ${server['current_cost']}\n - 추천된 일일 비용: ${server['recommended_cost']}\n - 한 시간당 절감될 비용: ${server['current_cost'] - server['recommended_cost']}\n"
if server['recommended_type'].startswith('t'):
image_url = f"https://eta-credit-balance-graph.s3.us-east-2.amazonaws.com/{datetime.today().strftime('%Y-%m-%d')}/{server['name']}_{server['type']}.png"
server_message += f"\n*추천된 서버의 예상 CreditBalance 사용량 그래프*\n"
server_message += f"{image_url}"
message += server_message
message += "\n잘못된 점이나 개선할 사항이 있다면 언제든지 알려주세요!"
# Slack으로 메시지 전송
slack_data = {
'attachments': [
{
'fallback': 'Required plain-text summary of the attachment.',
'color': '#FF9900', # 색상 바의 색상을 설정합니다. 이는 HEX 코드를 사용합니다.
'text': message # 이 텍스트가 색상 바 옆에 표시됩니다.
}
]
}
response = requests.post(
webhook_url, data=json.dumps(slack_data),
headers={'Content-Type': 'application/json'}
)
if response.status_code != 200:
raise ValueError(
'Request to slack returned an error %s, the response is:%s' % (response.status_code, response.text)
)
for instance_id in eta_instance_ids:
# 인스턴스 정보 가져오기
ec2 = boto3.resource('ec2')
instance = ec2.Instance(instance_id)
instance_type = instance.instance_type
instance_name = ""
for tag in instance.tags:
if tag['Key'] == 'Name':
instance_name = tag['Value']
break
# cloudwatch 데이터 요청
data = {}
for metric in metrics:
if metric in ['CPUUtilization', 'CPUCreditBalance', 'CPUCreditUsage']:
namespace = 'AWS/EC2'
else:
namespace = 'CWAgent'
data[metric] = get_metric_data(metric, namespace)
server_info = check_provisioning_status(data)
if server_info is not None:
send_to_slack(server_info)