-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSQL Performance Tips and Tricks.sql
448 lines (361 loc) · 14.9 KB
/
SQL Performance Tips and Tricks.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
-- ###############################################################
-- # FETCHING DATA #
-- ###############################################################
-- ============================================
-- Tip 1: Select Only What You Need
-- ============================================
-- Bad Practice
SELECT * FROM Sales.Customers
-- Good Practice
SELECT CustomerID, FirstName, LastName FROM Sales.Customers
-- ============================================
-- Tip 2: Avoid unnecessary DISTINCT & ORDER BY
-- ============================================
-- Bad Practice
SELECT DISTINCT
FirstName
FROM Sales.Customers
ORDER BY FirstName
-- Good Practice
SELECT
FirstName
FROM Sales.Customers
-- ============================================
-- Tip 3: For Exploration Purpose, Limit Rows!
-- ============================================
-- Bad Practice
SELECT
OrderID,
Sales
FROM Sales.Orders
-- Good Practice
SELECT TOP 10
OrderID,
Sales
FROM Sales.Orders
-- ###########################################################
-- # FILTERING #
-- ###########################################################
-- ============================================================================
-- Tip 4: Create nonclustered Index on frequently used Columns in WHERE clause
-- ============================================================================
SELECT * FROM Sales.Orders WHERE OrderStatus = 'Delivered'
CREATE NONCLUSTERED INDEX Idx_Orders_OrderStatus ON Sales.Orders(OrderStatus)
-- ===========================================================
-- Tip 5: Avoid applying functions to columns in WHERE clauses
-- ===========================================================
-- Bad Practice
SELECT * FROM Sales.Orders
WHERE LOWER(OrderStatus) = 'delivered'
-- Good Practice
SELECT * FROM Sales.Orders
WHERE OrderStatus = 'Delivered'
---------------------------------------------------------
-- Bad Practice
SELECT *
FROM Sales.Customers
WHERE SUBSTRING(FirstName, 1, 1) = 'A'
-- Good Practice
SELECT *
FROM Sales.Customers
WHERE FirstName LIKE 'A%'
---------------------------------------------------------
-- Bad Practice
SELECT *
FROM Sales.Orders
WHERE YEAR(OrderDate) = 2025
-- Good Practice
SELECT *
FROM Sales.Orders
WHERE OrderDate BETWEEN '2025-01-01' AND '2025-12-31'
-- ==========================================================
-- Tip 6: Avoid leading wildcards as they prevent index usage
-- ==========================================================
-- Bad Practice
SELECT *
FROM Sales.Customers
WHERE LastName LIKE '%Gold%'
-- Good Practice
SELECT *
FROM Sales.Customers
WHERE LastName LIKE 'Gold%'
-- ======================================
-- Tip 7: Use IN instead of Multiple OR
-- ======================================
-- Bad Practice
SELECT *
FROM Sales.Orders
WHERE CustomerID = 1 OR CustomerID = 2 OR CustomerID = 3
-- Good Practice
SELECT *
FROM Sales.Orders
WHERE CustomerID IN (1, 2, 3)
-- #######################################################
-- # JOINS #
-- #######################################################
-- ===================================================================
-- Tip 8: Understand The Speed of Joins & Use INNER JOIN when possible
-- ===================================================================
-- Best Performance
SELECT c.FirstName, o.OrderID FROM Sales.Customers c INNER JOIN Sales.Orders o ON c.CustomerID = o.CustomerID
-- Slightly Slower Performance
SELECT c.FirstName, o.OrderID FROM Sales.Customers c RIGHT JOIN Sales.Orders o ON c.CustomerID = o.CustomerID
SELECT c.FirstName, o.OrderID FROM Sales.Customers c LEFT JOIN Sales.Orders o ON c.CustomerID = o.CustomerID
-- Worst Performance
SELECT c.FirstName, o.OrderID FROM Sales.Customers c OUTER JOIN Sales.Orders o ON c.CustomerID = o.CustomerID
-- =============================================================================
-- Tip 9: Use Explicit Join (ANSI Join) Instead of Implicit Join (non-ANSI Join)
-- =============================================================================
-- Bad Practice
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c, Sales.Orders o
WHERE c.CustomerID = o.CustomerID
-- Good Practice
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
--For simple queries: There is no measurable performance difference if both ANSI and non-ANSI queries are correctly written.
--For complex queries: ANSI joins are usually easier to optimize and debug because their structure makes the intent of the query clearer.
-- ============================================================
-- Tip 10: Make sure to Index the columns used in the ON clause
-- ============================================================
SELECT c.FirstName, o.OrderID
FROM Sales.Orders o
INNER JOIN Sales.Customers c
ON c.CustomerID = o.CustomerID
CREATE NONCLUSTERED INDEX IX_Orders_CustomerID ON Sales.Orders(CustomerID)
-- ==========================================
-- Tip 11: Filter Before Joining (Big Tables)
-- ==========================================
-- Best Practice For Small-Medium Tables
-- Filter After Join (WHERE)
SELECT c.FirstName, o.OrderID
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
WHERE o.OrderStatus = 'Delivered'
-- Filter During Join (ON)
SELECT c.FirstName, o.OrderID
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
AND o.OrderStatus = 'Delivered'
-- Best Practice For Big Tables
-- Filter Before Join (SUBQUERY)
SELECT c.FirstName, o.OrderID
FROM Sales.Customers c
INNER JOIN (SELECT OrderID, CustomerID FROM Sales.Orders WHERE OrderStatus = 'Delivered') o
ON c.CustomerID = o.CustomerID
-- =============================================
-- Tip 12: Aggregate Before Joining (Big Tables)
-- =============================================
-- Best Practice For Small-Medium Tables
-- Grouping and Joining
SELECT c.CustomerID, c.FirstName, COUNT(o.OrderID) AS OrderCount
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
GROUP BY c.CustomerID, c.FirstName
-- Best Practice For Big Tables
-- Pre-aggregated Subquery
SELECT c.CustomerID, c.FirstName, o.OrderCount
FROM Sales.Customers c
INNER JOIN (
SELECT CustomerID, COUNT(OrderID) AS OrderCount
FROM Sales.Orders
GROUP BY CustomerID
) o
ON c.CustomerID = o.CustomerID
-- Bad Practice
-- Correlated Subquery
SELECT
c.CustomerID,
c.FirstName,
(SELECT COUNT(o.OrderID)
FROM Sales.Orders o
WHERE o.CustomerID = c.CustomerID) AS OrderCount
FROM Sales.Customers c
-- ========================================
-- Tip 13: Use Union Instead of OR in Joins
-- ========================================
-- Bad Practice
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
OR c.CustomerID = o.SalesPersonID
-- Best Practice
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
UNION
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.SalesPersonID
-- ================================================
-- Tip 14: Check for Nested Loops and Use SQL HINTS
-- ================================================
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
-- Good Practice for Having Big Table & Small Table
SELECT o.OrderID, c.FirstName
FROM Sales.Customers c
INNER JOIN Sales.Orders o
ON c.CustomerID = o.CustomerID
OPTION (HASH JOIN)
-- ################################################################
-- # UNION #
-- ################################################################
-- ========================================================================
-- Tip 15: Use UNION ALL instead of using UNION | duplicates are acceptable
-- ========================================================================
-- Bad Practice
SELECT CustomerID FROM Sales.Orders
UNION
SELECT CustomerID FROM Sales.OrdersArchive
-- Best Practice
SELECT CustomerID FROM Sales.Orders
UNION ALL
SELECT CustomerID FROM Sales.OrdersArchive
-- =======================================================================================
-- Tip 16: Use UNION ALL + Distinct instead of using UNION | duplicates are not acceptable
-- =======================================================================================
-- Bad Practice
SELECT CustomerID FROM Sales.Orders
UNION
SELECT CustomerID FROM Sales.OrdersArchive
-- Best Practice
SELECT DISTINCT CustomerID
FROM (
SELECT CustomerID FROM Sales.Orders
UNION ALL
SELECT CustomerID FROM Sales.OrdersArchive
) AS CombinedData
-- ##########################################################
-- # AGREGATIONS #
-- ##########################################################
-- =============================================================
-- Tip 17: Use Columnstore Index for Aggregations on Large Table
-- =============================================================
SELECT CustomerID, COUNT(OrderID) AS OrderCount
FROM Sales.Orders
GROUP BY CustomerID
CREATE CLUSTERED COLUMNSTORE INDEX Idx_Orders_Columnstore ON Sales.Orders
-- ==================================================================
-- Tip 18: Pre-Aggregate Data and store it in new Table for Reporting
-- ==================================================================
SELECT MONTH(OrderDate) OrderYear, SUM(Sales) AS TotalSales
INTO Sales.SalesSummary
FROM Sales.Orders
GROUP BY MONTH(OrderDate)
SELECT OrderYear, TotalSales FROM Sales.SalesSummary
-- ##############################################################
-- # SUBQUERIES, CTE #
-- ##############################################################
-- ============================================
-- Tip 19: JOIN vs EXISTS vs IN (Avoid using IN)
-- ============================================
-- JOIN (Best Practice: If the Performance equals to EXISTS)
SELECT o.OrderID, o.Sales
FROM Sales.Orders o
INNER JOIN Sales.Customers c
ON o.CustomerID = c.CustomerID
WHERE c.Country = 'USA'
-- EXISTS (Best Practice: Use it for Large Tables)
SELECT o.OrderID, o.Sales
FROM Sales.Orders o
WHERE EXISTS (
SELECT 1
FROM Sales.Customers c
WHERE c.CustomerID = o.CustomerID
AND c.Country = 'USA'
)
-- IN (Bad Practice)
SELECT o.OrderID, o.Sales
FROM Sales.Orders o
WHERE o.CustomerID IN (
SELECT CustomerID
FROM Sales.Customers
WHERE Country = 'USA'
)
-- ===========================================
-- Tip 20: Avoid Redundant Logic in Your Query
-- ===========================================
-- Bad Practice
SELECT EmployeeID, FirstName, 'Above Average' Status
FROM Sales.Employees
WHERE Salary > (SELECT AVG(Salary) FROM Sales.Employees)
UNION ALL
SELECT EmployeeID, FirstName, 'Below Average' Status
FROM Sales.Employees
WHERE Salary < (SELECT AVG(Salary) FROM Sales.Employees)
-- Good Practice
SELECT
EmployeeID,
FirstName,
CASE
WHEN Salary > AVG(Salary) OVER () THEN 'Above Average'
WHEN Salary < AVG(Salary) OVER () THEN 'Below Average'
ELSE 'Average'
END AS Status
FROM Sales.Employees
-- ##############################################################
-- # DDL #
-- ##############################################################
-- =============================================================================
-- Tip 21: Avoid VARCHAR Data Type If Possible
-- =============================================================================
-- Tip 22: Avoid Using MAX or Overly Large Lengths
-- =============================================================================
-- Tip 23: Use NOT NULL If possible
-- =============================================================================
-- Tip 24: Make sure all tables have a CLUSTERED PRIMARY KEY
-- =============================================================================
-- Tip 25: Creeate Nonclustered Index on Foreign Key if they are frequently used
-- =============================================================================
-- Bad Practice
CREATE TABLE CustomersInfo (
CustomerID INT,
FirstName VARCHAR(MAX),
LastName TEXT,
Country VARCHAR(255),
TotalPurchases FLOAT,
Score VARCHAR(255),
BirthDate VARCHAR(255),
EmployeeID INT,
CONSTRAINT FK_Bad_Customers_EmployeeID FOREIGN KEY (EmployeeID) REFERENCES Sales.Employees(EmployeeID)
)
-- Good Practice Practice
CREATE TABLE CustomersInfo (
CustomerID INT PRIMARY KEY CLUSTERED,
FirstName VARCHAR(50) NOT NULL,
LastName VARCHAR(50) NOT NULL,
Country VARCHAR(50) NOT NULL,
TotalPurchases FLOAT,
Score INT,
BirthDate DATE,
EmployeeID INT,
CONSTRAINT FK_CustomersInfo_EmployeeID FOREIGN KEY (EmployeeID)
REFERENCES Sales.Employees(EmployeeID)
)
CREATE NONCLUSTERED INDEX IX_CustomersInfo_EmployeeID
ON CustomersInfo(EmployeeID)
-- ##############################################################
-- # INDEXING #
-- ##############################################################
-- =================================================================================================================================
-- Tip 26: Avoid Over Indexing, as it can slow down insert, update, and delete operations
-- =================================================================================================================================
-- Tip 27: Regularly review and drop unused indexes to save space and improve write performance
-- =================================================================================================================================
-- Tip 28: Update table statistics weekly to ensure the query optimizer has the most up-to-date information
-- =================================================================================================================================
-- Tip 29: Reorganize and rebuild fragmented indexes weekly to maintain query performance.
-- =================================================================================================================================
-- Tip 30: For large tables (e.g., fact tables), partition the data and then apply a columnstore index for best performance results
-- =================================================================================================================================