-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathaimet_pruner_utils.py
480 lines (373 loc) · 22.9 KB
/
aimet_pruner_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
import os, pickle
from decimal import Decimal
import math
from typing import Tuple, List, Optional, Dict
import statistics
import torch
from aimet_common.defs import CostMetric, RankSelectScheme, EvalFunction, LayerCompRatioPair, GreedyCompressionRatioSelectionStats
from aimet_common.cost_calculator import SpatialSvdCostCalculator, WeightSvdCostCalculator
from aimet_common.comp_ratio_select import CompRatioSelectAlgo, TarRankSelectAlgo, ManualCompRatioSelectAlgo
from aimet_common.comp_ratio_rounder import RankRounder, ChannelRounder, CompRatioRounder
from aimet_common.compression_algo import CompressionAlgo
from aimet_common.bokeh_plots import BokehServerSession
from aimet_common.pruner import Pruner
from aimet_common import cost_calculator as cc
from aimet_common.utils import AimetLogger
from aimet_common.layer_database import Layer
from aimet_common.bokeh_plots import LinePlot, DataTable, LinePlot, ProgressBar
from aimet_common.curve_fit import MonotonicIncreasingCurveFit
from aimet_torch.utils import create_rand_tensors_given_shapes, get_device
from aimet_torch.defs import SpatialSvdParameters, WeightSvdParameters, ChannelPruningParameters, ModuleCompRatioPair
from aimet_torch.layer_selector import ConvFcLayerSelector, ConvNoDepthwiseLayerSelector, ManualLayerSelector
from aimet_torch.layer_database import LayerDatabase
from aimet_torch.svd.svd_pruner import SpatialSvdPruner, WeightSvdPruner
from aimet_torch.channel_pruning.channel_pruner import InputChannelPruner, ChannelPruningCostCalculator
from aimet_torch import pymo_utils
from aimet_torch.compression_factory import CompressionFactory
logger = AimetLogger.get_area_logger(AimetLogger.LogAreas.CompRatioSelect)
class CustomInputChannelPruner(InputChannelPruner) :
def _data_subsample_and_reconstruction(self, orig_layer: torch.nn.Conv2d, pruned_layer: torch.nn.Conv2d,
orig_model: torch.nn.Module, comp_model: torch.nn.Module):
#disabling with weight reconstruction step
pass
class CustomGreedyCompRatioSelectAlgo(CompRatioSelectAlgo):
"""
Implements the greedy compression-ratio select algorithm
"""
# pylint: disable=too-many-locals
def __init__(self, layer_db: LayerDatabase, pruner: Pruner, cost_calculator: cc.CostCalculator,
eval_func: EvalFunction, eval_iterations, cost_metric: CostMetric, target_comp_ratio: float,
num_candidates: int, use_monotonic_fit: bool, saved_eval_scores_dict: Optional[str],
comp_ratio_rounding_algo: CompRatioRounder, use_cuda: bool, bokeh_session, min_comp_ratio):
# pylint: disable=too-many-arguments
CompRatioSelectAlgo.__init__(self, layer_db, cost_calculator, cost_metric, comp_ratio_rounding_algo)
self._eval_func = eval_func
self.bokeh_session = bokeh_session
self._eval_iter = eval_iterations
self._is_cuda = use_cuda
self._pruner = pruner
self._saved_eval_scores_dict = saved_eval_scores_dict
self._target_comp_ratio = target_comp_ratio
self._use_monotonic_fit = use_monotonic_fit
if saved_eval_scores_dict:
self._comp_ratio_candidates = 0
else:
ratios = []
step = (1 - min_comp_ratio)/num_candidates
for i in range(num_candidates) :
ratios.append(Decimal(min_comp_ratio + step*i))
self._comp_ratio_candidates = ratios
self.target_ratios = ratios
CompRatioSelectAlgo.__init__(self, layer_db, cost_calculator, cost_metric, comp_ratio_rounding_algo)
def _pickle_eval_scores_dict(self, eval_scores_dict):
if not os.path.exists('./data'):
os.makedirs('./data')
with open(self.PICKLE_FILE_EVAL_DICT, 'wb') as file:
pickle.dump(eval_scores_dict, file)
logger.info("Greedy selection: Saved eval dict to %s", self.PICKLE_FILE_EVAL_DICT)
@staticmethod
def _unpickle_eval_scores_dict(saved_eval_scores_dict_path: str):
with open(saved_eval_scores_dict_path, 'rb') as f:
eval_dict = pickle.load(f)
logger.info("Greedy selection: Read eval dict from %s", saved_eval_scores_dict_path)
return eval_dict
@staticmethod
def _calculate_function_value_by_interpolation(comp_ratio: Decimal, layer_eval_score_dict: dict,
comp_ratio_list: List):
"""
Calculates eval score for a comp ratio by interpolation
:param comp_ratio:
:param layer_eval_score_dict:
:param comp_ratio_list:
:return:
"""
if comp_ratio in comp_ratio_list:
eval_score = layer_eval_score_dict[comp_ratio]
else:
ind = 0
for ind, _ in enumerate(comp_ratio_list, start=0):
if comp_ratio < comp_ratio_list[ind]:
break
if ind == len(comp_ratio_list) - 1:
eval_score = layer_eval_score_dict[comp_ratio_list[-1]]
else:
x1 = comp_ratio_list[ind]
y1 = layer_eval_score_dict[comp_ratio_list[ind]]
x2 = comp_ratio_list[ind - 1]
y2 = layer_eval_score_dict[comp_ratio_list[ind - 1]]
eval_score = (float(comp_ratio) - float(x1)) * (y1 - y2) / (float(x1) - float(x2)) + y1
return eval_score
def _update_eval_dict_with_rounding(self, eval_scores_dict, rounding_algo, cost_metric):
updated_eval_dict = {}
for layer_name in eval_scores_dict:
layer_eval_dict = eval_scores_dict[layer_name]
eval_dict_per_layer = {}
layer = self._layer_db.find_layer_by_name(layer_name)
comp_ratio_list = sorted(list(layer_eval_dict.keys()), key=float)
for comp_ratio in layer_eval_dict:
rounded_comp_ratio = rounding_algo.round(layer, comp_ratio, cost_metric)
eval_score = self._calculate_function_value_by_interpolation(rounded_comp_ratio, layer_eval_dict,
comp_ratio_list)
eval_dict_per_layer[Decimal(rounded_comp_ratio)] = eval_score
updated_eval_dict[layer_name] = eval_dict_per_layer
return updated_eval_dict
@staticmethod
def _fit_eval_dict_to_monotonic_function(eval_scores_dict):
for layer in eval_scores_dict:
layer_eval_dict = eval_scores_dict[layer]
# Convert dict of eval-scores and comp-ratios to lists
eval_scores = list(layer_eval_dict.values())
comp_ratios = list(layer_eval_dict.keys())
eval_scores, polynomial_coefficients = MonotonicIncreasingCurveFit.fit(comp_ratios, eval_scores)
logger.debug("The coefficients for layer %s are %s", layer, str(polynomial_coefficients))
# Update the layer_eval_dict
for index, comp_ratio in enumerate(comp_ratios):
layer_eval_dict[comp_ratio] = eval_scores[index]
def _construct_eval_dict(self):
# If the user already passed in a previously saved eval scores dict, we just use that
if self._saved_eval_scores_dict:
eval_scores_dict = self._unpickle_eval_scores_dict(self._saved_eval_scores_dict)
else:
# Create the eval scores dictionary
eval_scores_dict = self._compute_eval_scores_for_all_comp_ratio_candidates()
# save the dictionary to file (in case the user wants to reuse the dictionary in the future)
self._pickle_eval_scores_dict(eval_scores_dict)
return eval_scores_dict
def select_per_layer_comp_ratios(self):
# Compute eval scores for each candidate comp-ratio in each layer
eval_scores_dict = self._construct_eval_dict()
# Fit the scores to a monotonically increasing function
if self._use_monotonic_fit:
self._fit_eval_dict_to_monotonic_function(eval_scores_dict)
updated_eval_scores_dict = self._update_eval_dict_with_rounding(eval_scores_dict, self._rounding_algo,
self._cost_metric)
# Get the overall min and max scores
current_min_score, current_max_score = self._find_min_max_eval_scores(updated_eval_scores_dict)
exit_threshold = (current_max_score - current_min_score) * 0.0001
logger.info("Greedy selection: overall_min_score=%f, overall_max_score=%f",
current_min_score, current_max_score)
# Base cost
original_model_cost = self._cost_calculator.compute_model_cost(self._layer_db)
logger.info("Greedy selection: Original model cost=%s", original_model_cost)
while True:
# Current mid-point score
current_mid_score = statistics.mean([current_max_score, current_min_score])
current_comp_ratio = self._calculate_model_comp_ratio_for_given_eval_score(current_mid_score,
updated_eval_scores_dict,
original_model_cost)
logger.debug("Greedy selection: current candidate - comp_ratio=%f, score=%f, search-window=[%f:%f]",
current_comp_ratio, current_mid_score, current_min_score, current_max_score)
# Exit condition: is the binary search window too small to continue?
should_exit, selected_score = self._evaluate_exit_condition(current_min_score, current_max_score,
exit_threshold,
current_comp_ratio, self._target_comp_ratio)
if should_exit:
break
if current_comp_ratio > self._target_comp_ratio:
# Not enough compression: Binary search the lower half of the scores
current_max_score = current_mid_score
else:
# Too much compression: Binary search the upper half of the scores
current_min_score = current_mid_score
# Search finished, return the selected comp ratios per layer
# Calculate the compression ratios for each layer based on this score
layer_ratio_list = self._find_all_comp_ratios_given_eval_score(selected_score, updated_eval_scores_dict)
selected_comp_ratio = self._calculate_model_comp_ratio_for_given_eval_score(selected_score,
updated_eval_scores_dict,
original_model_cost)
logger.info("Greedy selection: final choice - comp_ratio=%f, score=%f",
selected_comp_ratio, selected_score)
return layer_ratio_list, GreedyCompressionRatioSelectionStats(updated_eval_scores_dict)
@staticmethod
def _evaluate_exit_condition(min_score, max_score, exit_threshold, current_comp_ratio, target_comp_ratio):
if math.isclose(min_score, max_score, abs_tol=exit_threshold):
return True, min_score
if math.isclose(current_comp_ratio, target_comp_ratio, abs_tol=0.001):
return True, statistics.mean([max_score, min_score])
return False, None
def _calculate_model_comp_ratio_for_given_eval_score(self, eval_score, eval_scores_dict,
original_model_cost):
# Calculate the compression ratios for each layer based on this score
layer_ratio_list = self._find_all_comp_ratios_given_eval_score(eval_score, eval_scores_dict)
for layer in self._layer_db:
if layer not in self._layer_db.get_selected_layers():
layer_ratio_list.append(LayerCompRatioPair(layer, None))
# Calculate compressed model cost
compressed_model_cost = self._cost_calculator.calculate_compressed_cost(self._layer_db,
layer_ratio_list,
self._cost_metric)
if self._cost_metric == CostMetric.memory:
current_comp_ratio = Decimal(compressed_model_cost.memory / original_model_cost.memory)
else:
current_comp_ratio = Decimal(compressed_model_cost.mac / original_model_cost.mac)
return current_comp_ratio
def _find_all_comp_ratios_given_eval_score(self, given_eval_score, eval_scores_dict):
layer_ratio_list = []
for layer in self._layer_db.get_selected_layers():
comp_ratio = self._find_layer_comp_ratio_given_eval_score(eval_scores_dict,
given_eval_score, layer)
layer_ratio_list.append(LayerCompRatioPair(layer, comp_ratio))
return layer_ratio_list
@staticmethod
def _find_layer_comp_ratio_given_eval_score(eval_scores_dict: Dict[str, Dict[Decimal, float]],
given_eval_score, layer: Layer):
# Find the closest comp ratio candidate for the current eval score
eval_scores_for_layer = eval_scores_dict[layer.name]
# Sort the eval scores by increasing order of compression
comp_ratios = list(eval_scores_for_layer.keys())
sorted_comp_ratios = sorted(comp_ratios, reverse=True)
# Special cases
# Case1: Eval score is higher than even our most conservative comp ratio: then no compression
if given_eval_score > eval_scores_for_layer[sorted_comp_ratios[0]]:
return None
if given_eval_score < eval_scores_for_layer[sorted_comp_ratios[-1]]:
return sorted_comp_ratios[-1]
# Start with a default of no compression
selected_comp_ratio = None
for index, comp_ratio in enumerate(sorted_comp_ratios[1:]):
if given_eval_score > eval_scores_for_layer[comp_ratio]:
selected_comp_ratio = sorted_comp_ratios[index]
break
return selected_comp_ratio
@staticmethod
def _find_min_max_eval_scores(eval_scores_dict: Dict[str, Dict[Decimal, float]]):
first_layer_scores = list(eval_scores_dict.values())[0]
first_score = list(first_layer_scores.values())[0]
min_score = first_score
max_score = first_score
for layer_scores in eval_scores_dict.values():
for eval_score in layer_scores.values():
if eval_score < min_score:
min_score = eval_score
if eval_score > max_score:
max_score = eval_score
return min_score, max_score
def _compute_eval_scores_for_all_comp_ratio_candidates(self) -> Dict[str, Dict[Decimal, float]]:
"""
Creates and returns the eval scores dictionary
:return: Dictionary of {layer_name: {compression_ratio: eval_score}} for all selected layers
and all compression-ratio candidates
"""
selected_layers = self._layer_db.get_selected_layers()
# inputs to initialize a TabularProgress object
num_candidates = len(self._comp_ratio_candidates)
num_layers = len(selected_layers)
if self.bokeh_session:
column_names = [str(i) for i in self._comp_ratio_candidates]
layer_names = [i.name for i in selected_layers]
progress_bar = ProgressBar(total=num_layers * num_candidates, title="Eval Scores Table", color="green",
bokeh_session=self.bokeh_session)
data_table = DataTable(num_layers, num_candidates, column_names, bokeh_session=self.bokeh_session,
row_index_names=layer_names)
else:
data_table = None
progress_bar = None
eval_scores_dict = {}
for layer in selected_layers:
layer_wise_eval_scores = self._compute_layerwise_eval_score_per_comp_ratio_candidate(data_table,
progress_bar, layer)
eval_scores_dict[layer.name] = layer_wise_eval_scores
return eval_scores_dict
def _compute_layerwise_eval_score_per_comp_ratio_candidate(self, tabular_progress_object, progress_bar,
layer: Layer) -> Dict[Decimal, float]:
"""
Computes eval scores for each compression-ratio candidate for a given layer
:param layer: Layer for which to calculate eval scores
:return: Dictionary of {compression_ratio: eval_score} for each compression-ratio candidate
"""
layer_wise_eval_scores_dict = {}
# Only publish plots to a document if a bokeh server session exists
if self.bokeh_session:
# plot to visualize the evaluation scores as they update for each layer
layer_wise_eval_scores_plot = LinePlot(x_axis_label="Compression Ratios", y_axis_label="Eval Scores",
title=layer.name, bokeh_session=self.bokeh_session)
# Loop over each candidate
#logger.info("Candidate Ratios",self.target_ratios)
for comp_ratio in self.target_ratios:
logger.info("Analyzing compression ratio: %s =====================>", comp_ratio)
# Prune layer given this comp ratio
pruned_layer_db = self._pruner.prune_model(self._layer_db,
[LayerCompRatioPair(layer, comp_ratio)],
self._cost_metric,
trainer=None)
eval_score = self._eval_func(pruned_layer_db.model, self._eval_iter, use_cuda=self._is_cuda)
layer_wise_eval_scores_dict[comp_ratio] = eval_score
# destroy the layer database
pruned_layer_db.destroy()
pruned_layer_db = None
logger.info("Layer %s, comp_ratio %f ==> eval_score=%f", layer.name, comp_ratio,
eval_score)
if self.bokeh_session:
layer_wise_eval_scores_plot.update(new_x_coordinate=comp_ratio, new_y_coordinate=eval_score)
# Update the data table by adding the computed eval score
tabular_progress_object.update_table(str(comp_ratio), layer.name, eval_score)
# Update the progress bar
progress_bar.update()
# remove plot so that we have a fresh figure to visualize for the next layer.
if self.bokeh_session:
layer_wise_eval_scores_plot.remove_plot()
return layer_wise_eval_scores_dict
class CustomCompressionFactory(CompressionFactory) :
@classmethod
def create_channel_pruning_algo(
cls,
model: torch.nn.Module,
eval_callback: EvalFunction,
eval_iterations,
input_shape: Tuple, cost_metric: CostMetric,
params: ChannelPruningParameters,
bokeh_session: BokehServerSession,
min_comp_ratio:float = 0,
) -> CompressionAlgo:
"""
Factory method to construct ChannelPruningCompressionAlgo
:param model: Model to compress
:param eval_callback: Evaluation callback for the model
:param eval_iterations: Evaluation iterations
:param input_shape: Shape of the input tensor for model
:param cost_metric: Cost metric (mac or memory)
:param params: Channel Pruning compression parameters
:param bokeh_session: The Bokeh session to display plots
:return: An instance of ChannelPruningCompressionAlgo
"""
# pylint: disable=too-many-locals
# Rationale: Factory functions unfortunately need to deal with a lot of parameters
device = get_device(model)
dummy_input = create_rand_tensors_given_shapes(input_shape, device)
# Create a layer database
layer_db = LayerDatabase(model, dummy_input)
use_cuda = next(model.parameters()).is_cuda
# Create a pruner
pruner = CustomInputChannelPruner(data_loader=params.data_loader, input_shape=input_shape,
num_reconstruction_samples=params.num_reconstruction_samples,
allow_custom_downsample_ops=params.allow_custom_downsample_ops)
comp_ratio_rounding_algo = ChannelRounder(params.multiplicity)
# Create a comp-ratio selection algorithm
cost_calculator = ChannelPruningCostCalculator(pruner)
if params.mode == ChannelPruningParameters.Mode.auto:
greedy_params = params.mode_params.greedy_params
comp_ratio_select_algo = CustomGreedyCompRatioSelectAlgo(layer_db, pruner, cost_calculator, eval_callback,
eval_iterations, cost_metric,
greedy_params.target_comp_ratio,
greedy_params.num_comp_ratio_candidates,
greedy_params.use_monotonic_fit,
greedy_params.saved_eval_scores_dict,
comp_ratio_rounding_algo, use_cuda,
bokeh_session=bokeh_session,
min_comp_ratio=min_comp_ratio
)
layer_selector = ConvNoDepthwiseLayerSelector()
modules_to_ignore = params.mode_params.modules_to_ignore
else:
# Convert (module,comp-ratio) pairs to (layer,comp-ratio) pairs
layer_comp_ratio_pairs = cls._get_layer_pairs(layer_db, params.mode_params.list_of_module_comp_ratio_pairs)
comp_ratio_select_algo = ManualCompRatioSelectAlgo(layer_db,
layer_comp_ratio_pairs,
comp_ratio_rounding_algo, cost_metric=cost_metric)
layer_selector = ManualLayerSelector(layer_comp_ratio_pairs)
modules_to_ignore = []
# Create the overall Channel Pruning compression algorithm
channel_pruning_algo = CompressionAlgo(layer_db, comp_ratio_select_algo, pruner, eval_callback,
layer_selector, modules_to_ignore, cost_calculator, use_cuda)
print("internal:", channel_pruning_algo._comp_ratio_select_algo)
return channel_pruning_algo