feat(minor): Avoid rebuilds (#207)

We used to unconditionally build all benchmark targets regardless of operation. Changed logic so that we for baseline read/list/check operations don't rebuild, as we will not execute the benchmark for such operations.
ordo-one · Dec 15, 2023 · 05695c9 · 05695c9
1 parent dba4133
commit 05695c9
Show file tree

Hide file tree

Showing 9 changed files with 140 additions and 88 deletions.
diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift
@@ -127,37 +127,7 @@ import PackagePlugin
         }
 
         let swiftSourceModuleTargets: [SwiftSourceModuleTarget]
-
-        // don't build any targets if we're creating a benchmark, otherwise specified targets
-        if commandToPerform == .`init` {
-            swiftSourceModuleTargets = []
-        } else {
-            if specifiedTargets.isEmpty {
-                swiftSourceModuleTargets = context.package.targets(ofType: SwiftSourceModuleTarget.self)
-            } else {
-                swiftSourceModuleTargets = specifiedTargets
-            }
-        }
-
-        let filteredTargets = swiftSourceModuleTargets
-            .filter { $0.kind == .executable }
-            .filter { benchmark in
-                let path = benchmark.directory.removingLastComponent()
-                return path.lastComponent == "Benchmarks" ? true : false
-            }
-            .filter { benchmark in
-                swiftSourceModuleTargets.first(where: { $0.name == benchmark.name }) != nil ? true : false
-            }
-            .filter { benchmark in
-                skipTargets.first(where: { $0.name == benchmark.name }) == nil ? true : false
-            }
-
-        // Build the targets
-        if outputFormat == .text {
-            if quietRunning == 0 {
-                print("Building benchmark targets in release mode for benchmark run...")
-            }
-        }
+        var shouldBuildTargets = true // We don't rebuild the targets when we dont need to execute them, e.g. baseline read/compare
 
         let benchmarkTool = try context.tool(named: "BenchmarkTool")
 
@@ -167,39 +137,6 @@ import PackagePlugin
                               "--format", outputFormat.rawValue,
                               "--grouping", grouping]
 
-        try filteredTargets.forEach { target in
-            if outputFormat == .text {
-                if quietRunning == 0 {
-                    print("Building \(target.name)")
-                }
-            }
-
-            let buildResult = try packageManager.build(
-                .product(target.name), // .all(includingTests: false),
-                parameters: .init(configuration: .release)
-            )
-
-            guard buildResult.succeeded else {
-                print(buildResult.logText)
-                print("Benchmark failed to run due to build error.")
-                return
-            }
-
-            // Filter out all executable products which are Benchmarks we should run
-            let benchmarks = buildResult.builtArtifacts
-                .filter { benchmark in
-                    filteredTargets.first(where: { $0.name == benchmark.path.lastComponent }) != nil ? true : false
-                }
-
-            if benchmarks.isEmpty {
-                throw ArgumentParsingError.noMatchingTargetsForRegex
-            }
-
-            benchmarks.forEach { benchmark in
-                args.append(contentsOf: ["--benchmark-executable-paths", benchmark.path.string])
-            }
-        }
-
         metricsToUse.forEach { metric in
             args.append(contentsOf: ["--metrics", metric.description])
         }
@@ -286,22 +223,97 @@ import PackagePlugin
                         print("Must specify exactly zero or one baseline for check against absolute thresholds, got: \(positionalArguments)")
                         throw MyError.invalidArgument
                     }
+                    if positionalArguments.count == validRange.upperBound { // dont check if we just read baselines
+                        shouldBuildTargets = false
+                    }
                 } else {
                     let validRange = 1 ... 2
                     guard validRange.contains(positionalArguments.count) else {
                         print("Must specify exactly one or two baselines for comparisons or threshold violation checks, got: \(positionalArguments)")
                         throw MyError.invalidArgument
                     }
+                    if positionalArguments.count == validRange.upperBound { // dont check if we just read baselines
+                        shouldBuildTargets = false
+                    }
                 }
-            default:
-                break
+            case .read, .list, .delete:
+                shouldBuildTargets = false
             }
 
             positionalArguments.forEach { baseline in
                 args.append(contentsOf: ["--baseline", baseline])
             }
         }
 
+        // don't build any targets if we don't need to run it for the operation, otherwise specified targets
+        if commandToPerform == .`init` {
+            swiftSourceModuleTargets = []
+        } else {
+            if specifiedTargets.isEmpty {
+                swiftSourceModuleTargets = context.package.targets(ofType: SwiftSourceModuleTarget.self)
+            } else {
+                swiftSourceModuleTargets = specifiedTargets
+            }
+        }
+
+        let filteredTargets = swiftSourceModuleTargets
+            .filter { $0.kind == .executable }
+            .filter { benchmark in
+                let path = benchmark.directory.removingLastComponent()
+                return path.lastComponent == "Benchmarks" ? true : false
+            }
+            .filter { benchmark in
+                swiftSourceModuleTargets.first(where: { $0.name == benchmark.name }) != nil ? true : false
+            }
+            .filter { benchmark in
+                skipTargets.first(where: { $0.name == benchmark.name }) == nil ? true : false
+            }
+
+        // Build the targets
+        if outputFormat == .text {
+            if quietRunning == 0 && shouldBuildTargets {
+                print("Building benchmark targets in release mode for benchmark run...")
+            }
+        }
+
+        // Build targets in release mode
+        try filteredTargets.forEach { target in
+            args.append(contentsOf: ["--targets", target.name])
+
+            if shouldBuildTargets {
+                if outputFormat == .text {
+                    if quietRunning == 0 {
+                        print("Building \(target.name)")
+                    }
+                }
+
+                let buildResult = try packageManager.build(
+                    .product(target.name), // .all(includingTests: false),
+                    parameters: .init(configuration: .release)
+                )
+
+                guard buildResult.succeeded else {
+                    print(buildResult.logText)
+                    print("Benchmark failed to run due to build error.")
+                    return
+                }
+
+                // Filter out all executable products which are Benchmarks we should run
+                let benchmarks = buildResult.builtArtifacts
+                    .filter { benchmark in
+                        filteredTargets.first(where: { $0.name == benchmark.path.lastComponent }) != nil ? true : false
+                    }
+
+                if benchmarks.isEmpty {
+                    throw ArgumentParsingError.noMatchingTargetsForRegex
+                }
+
+                benchmarks.forEach { benchmark in
+                    args.append(contentsOf: ["--benchmark-executable-paths", benchmark.path.string])
+                }
+            }
+        }
+
         try withCStrings(args) { cArgs in
             let newPath = benchmarkTool.path
             // This doesn't work for external dependents
@@ -333,6 +345,8 @@ import PackagePlugin
                         switch waitStatus {
                         case .success:
                             break
+                        case .baselineNotFound:
+                            throw MyError.baselineNotFound
                         case .genericFailure:
                             print("One or more benchmark suites crashed during runtime.")
                             throw MyError.benchmarkCrashed
@@ -362,6 +376,7 @@ import PackagePlugin
         case benchmarkThresholdImprovement
         case benchmarkCrashed
         case benchmarkUnexpectedReturnCode
+        case baselineNotFound
         case invalidArgument
     }
 }
diff --git a/Plugins/BenchmarkCommandPlugin/Command+Helpers.swift b/Plugins/BenchmarkCommandPlugin/Command+Helpers.swift
@@ -63,4 +63,5 @@ enum ExitCode: Int32 {
     case thresholdRegression = 2
     case benchmarkJobFailed = 3
     case thresholdImprovement = 4
+    case baselineNotFound = 5
 }
diff --git a/Plugins/BenchmarkHelpGenerator/Command+Helpers.swift b/Plugins/BenchmarkHelpGenerator/Command+Helpers.swift
@@ -63,4 +63,5 @@ enum ExitCode: Int32 {
     case thresholdRegression = 2
     case benchmarkJobFailed = 3
     case thresholdImprovement = 4
+    case baselineNotFound = 5
 }
diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift
@@ -98,8 +98,7 @@ extension BenchmarkTool {
 
             switch baselineOperation {
             case .delete:
-                benchmarkExecutablePaths.forEach { path in
-                    let target = FilePath(path).lastComponent!.description
+                targets.forEach { target in
                     baseline.forEach {
                         removeBaselinesNamed(target: target, baselineName: $0)
                     }
@@ -137,16 +136,36 @@ extension BenchmarkTool {
                         print("Updated baseline '\(baselineName)'")
                     }
                 } else {
-                    fatalError("Could not get first baselinename")
+                    failBenchmark("Could not get first baselinename.", exitCode: .baselineNotFound)
                 }
 
             case .check:
                 if checkAbsolute {
-                    guard benchmarkBaselines.count == 1 else {
+                    guard benchmarkBaselines.count == 1,
+                            let currentBaseline = benchmarkBaselines.first,
+                            let baselineName = baseline.first else {
                         print("Can only do absolute threshold violation checks for a single benchmark baseline, got: \(benchmarkBaselines.count) baselines.")
                         return
                     }
-                    if let benchmarkPath = checkAbsolutePath { // load statically defined threshods for .p90
+
+                    if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare
+                        currentBaseline.results.keys.forEach { baselineKey in
+                            if let benchmark: Benchmark = .init(baselineKey.name, closure:{_ in}) {
+                                benchmark.target = baselineKey.target
+                                benchmarks.append(benchmark)
+                            }
+                        }
+                    }
+
+                    benchmarks = benchmarks.filter {
+                        do {
+                            return try shouldIncludeBenchmark($0.name)
+                        } catch {
+                            return false
+                        }
+                    }
+
+                    if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90
                         var thresholdsFound = false
                         benchmarks.forEach { benchmark in
                             let thresholds = BenchmarkTool.makeBenchmarkThresholds(path: benchmarkPath,
@@ -167,14 +186,15 @@ extension BenchmarkTool {
                             }
                         }
                         if !thresholdsFound {
-                            print("")
+                            if benchmarks.count == 0 {
+                                failBenchmark("No benchmarks matching filter selection, failing threshold check.",
+                                              exitCode: .thresholdRegression)
+                            }
                             failBenchmark("Could not find any matching absolute thresholds at path [\(benchmarkPath)], failing threshold check.",
                                           exitCode: .thresholdRegression)
                         }
                     }
                     print("")
-                    let currentBaseline = benchmarkBaselines[0]
-                    let baselineName = baseline[0]
 
                     let deviationResults = currentBaseline.failsAbsoluteThresholdChecks(benchmarks: benchmarks)
 

diff --git a/Plugins/BenchmarkTool/BenchmarkTool.swift b/Plugins/BenchmarkTool/BenchmarkTool.swift
@@ -40,7 +40,10 @@ typealias BenchmarkResults = [BenchmarkIdentifier: [BenchmarkResult]]
 @main
 struct BenchmarkTool: AsyncParsableCommand {
     @Option(name: .long, help: "The paths to the benchmarks to run")
-    var benchmarkExecutablePaths: [String]
+    var benchmarkExecutablePaths: [String] = []
+
+    @Option(name: .long, help: "The targets")
+    var targets: [String]
 
     @Option(name: .long, help: "The command to perform")
     var command: BenchmarkOperation
@@ -103,10 +106,6 @@ struct BenchmarkTool: AsyncParsableCommand {
     @Option(name: .long, help: "Benchmarks matching the regexp filter that should be skipped")
     var skip: [String] = []
 
-    var targets: [String] {
-        benchmarkExecutablePaths.map { FilePath($0).lastComponent!.description }
-    }
-
     var inputFD: CInt = 0
     var outputFD: CInt = 0
 
@@ -185,9 +184,7 @@ struct BenchmarkTool: AsyncParsableCommand {
             if let baseline = try readBaseline(baselineName) {
                 benchmarkBaselines.append(baseline)
             } else {
-                if quiet == false {
-                    print("Warning: Failed to load specified baseline '\(baselineName)'.")
-                }
+                failBenchmark("Failed to load specified baseline '\(baselineName)'.", exitCode: .baselineNotFound)
             }
         }
     }
@@ -232,9 +229,18 @@ struct BenchmarkTool: AsyncParsableCommand {
             return
         }
 
-        if let operation = baselineOperation, [.compare, .check].contains(operation), benchmarkBaselines.count > 1 {
-            try postProcessBenchmarkResults()
-            return
+        if let operation = baselineOperation, [.compare, .check].contains(operation) {
+            if checkAbsolute {
+                if benchmarkBaselines.count > 0 {
+                    try postProcessBenchmarkResults()
+                    return
+                }
+            } else {
+                if benchmarkBaselines.count > 1 {
+                    try postProcessBenchmarkResults()
+                    return
+                }
+            }
         }
 
         guard command != .query else {

diff --git a/Plugins/BenchmarkTool/Command+Helpers.swift b/Plugins/BenchmarkTool/Command+Helpers.swift
@@ -63,4 +63,5 @@ enum ExitCode: Int32 {
     case thresholdRegression = 2
     case benchmarkJobFailed = 3
     case thresholdImprovement = 4
+    case baselineNotFound = 5
 }
diff --git a/Sources/Benchmark/Documentation.docc/GettingStarted.md b/Sources/Benchmark/Documentation.docc/GettingStarted.md
@@ -115,6 +115,14 @@ The following example shows an benchmark suite named `My-Benchmark` with the req
 ),
 ```
 
+### Run benchmark plugin in release mode
+If you find that the runtime for e.g. baseline processing is long, it's recommended to try running the benchmark
+plugin in release mode configuration, e.g.
+
+```
+swift package -c release benchmark baseline read myBaseLine
+```
+
 ### Dedicated GitHub runner instances
 
 For reproducible and good comparable results, it is *highly* recommended to set up a private GitHub runner that is completely dedicated for performance benchmark runs, as the standard GitHub CI runners are deployed on a shared infrastructure the deviations between runs can be significant and difficult to assess.

diff --git a/Tests/BenchmarkTests/AdditionalTests.swift b/Tests/BenchmarkTests/AdditionalTests.swift
@@ -24,7 +24,7 @@ final class AdditionalTests: XCTestCase {
         }
 
         var results: [ContinuousClock.Duration] = []
-        var testIterations = 2_000_000
+        var testIterations = 100_000
         for _ in 0 ..< 3 {
             results.append(runWork(testIterations))
             testIterations *= 10

diff --git a/Thresholds/P90AbsoluteThresholdsBenchmark.P90Malloc.p90.json b/Thresholds/P90AbsoluteThresholdsBenchmark.P90Malloc.p90.json
@@ -1,4 +1,4 @@
 {
-  "syscalls" : 2,
-  "mallocCountTotal" : 1000
+  "syscalls" : 6,
+  "mallocCountTotal" : 1012
 }