forked from ggerganov/whisper.cpp
-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
examples : add whisper.swiftui demo app (ggerganov#308)
* Add SwiftUI demo project. * Add -DGGML_USE_ACCELERATE
- Loading branch information
Showing
18 changed files
with
1,023 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
A sample SwiftUI app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions. | ||
See also: [whisper.objc](https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.objc). | ||
|
||
To use: | ||
|
||
1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1] | ||
2. Add the model to "whisper.swiftui.demo/Resources/models" via Xcode. | ||
3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)). | ||
4. Add the model to "whisper.swiftui.demo/Resources/samples" via Xcode. | ||
5. Select the "release" build configuration under "Run", then deploy and run to your device. | ||
|
||
[^1]: I recommend the tiny, base or small models for running on an iOS device. |
70 changes: 70 additions & 0 deletions
70
examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import Foundation | ||
|
||
enum WhisperError: Error { | ||
case couldNotInitializeContext | ||
} | ||
|
||
// Meet Whisper C++ constraint: Don't access from more than one thread at a time. | ||
actor WhisperContext { | ||
private var context: OpaquePointer | ||
|
||
init(context: OpaquePointer) { | ||
self.context = context | ||
} | ||
|
||
deinit { | ||
whisper_free(context) | ||
} | ||
|
||
func fullTranscribe(samples: [Float]) { | ||
// Leave 2 processors free (i.e. the high-efficiency cores). | ||
let maxThreads = max(1, min(8, cpuCount() - 2)) | ||
print("Selecting \(maxThreads) threads") | ||
var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY) | ||
"en".withCString { en in | ||
// Adapted from whisper.objc | ||
params.print_realtime = true | ||
params.print_progress = false | ||
params.print_timestamps = true | ||
params.print_special = false | ||
params.translate = false | ||
params.language = en | ||
params.n_threads = Int32(maxThreads) | ||
params.offset_ms = 0 | ||
params.no_context = true | ||
params.single_segment = false | ||
|
||
whisper_reset_timings(context) | ||
print("About to run whisper_full") | ||
samples.withUnsafeBufferPointer { samples in | ||
if (whisper_full(context, params, samples.baseAddress, Int32(samples.count)) != 0) { | ||
print("Failed to run the model") | ||
} else { | ||
whisper_print_timings(context) | ||
} | ||
} | ||
} | ||
} | ||
|
||
func getTranscription() -> String { | ||
var transcription = "" | ||
for i in 0..<whisper_full_n_segments(context) { | ||
transcription += String.init(cString: whisper_full_get_segment_text(context, i)) | ||
} | ||
return transcription | ||
} | ||
|
||
static func createContext(path: String) throws -> WhisperContext { | ||
let context = whisper_init(path) | ||
if let context { | ||
return WhisperContext(context: context) | ||
} else { | ||
print("Couldn't load model at \(path)") | ||
throw WhisperError.couldNotInitializeContext | ||
} | ||
} | ||
} | ||
|
||
fileprivate func cpuCount() -> Int { | ||
ProcessInfo.processInfo.processorCount | ||
} |
4 changes: 4 additions & 0 deletions
4
examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
// | ||
// Use this file to import your target's public headers that you would like to expose to Swift. | ||
// | ||
#import "whisper.h" |
162 changes: 162 additions & 0 deletions
162
examples/whisper.swiftui/whisper.swiftui.demo/Models/WhisperState.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
import Foundation | ||
import SwiftUI | ||
import AVFoundation | ||
|
||
@MainActor | ||
class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { | ||
@Published var isModelLoaded = false | ||
@Published var messageLog = "" | ||
@Published var canTranscribe = false | ||
@Published var isRecording = false | ||
|
||
private var whisperContext: WhisperContext? | ||
private let recorder = Recorder() | ||
private var recordedFile: URL? = nil | ||
private var audioPlayer: AVAudioPlayer? | ||
|
||
private var modelUrl: URL? { | ||
Bundle.main.url(forResource: "ggml-tiny.en", withExtension: "bin", subdirectory: "models") | ||
} | ||
|
||
private var sampleUrl: URL? { | ||
Bundle.main.url(forResource: "jfk", withExtension: "wav", subdirectory: "samples") | ||
} | ||
|
||
private enum LoadError: Error { | ||
case couldNotLocateModel | ||
} | ||
|
||
override init() { | ||
super.init() | ||
do { | ||
try loadModel() | ||
canTranscribe = true | ||
} catch { | ||
print(error.localizedDescription) | ||
messageLog += "\(error.localizedDescription)\n" | ||
} | ||
} | ||
|
||
private func loadModel() throws { | ||
messageLog += "Loading model...\n" | ||
if let modelUrl { | ||
whisperContext = try WhisperContext.createContext(path: modelUrl.path()) | ||
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n" | ||
} else { | ||
messageLog += "Could not locate model\n" | ||
} | ||
} | ||
|
||
func transcribeSample() async { | ||
if let sampleUrl { | ||
await transcribeAudio(sampleUrl) | ||
} else { | ||
messageLog += "Could not locate sample\n" | ||
} | ||
} | ||
|
||
private func transcribeAudio(_ url: URL) async { | ||
if (!canTranscribe) { | ||
return | ||
} | ||
guard let whisperContext else { | ||
return | ||
} | ||
|
||
do { | ||
canTranscribe = false | ||
messageLog += "Reading wave samples...\n" | ||
let data = try readAudioSamples(url) | ||
messageLog += "Transcribing data...\n" | ||
await whisperContext.fullTranscribe(samples: data) | ||
let text = await whisperContext.getTranscription() | ||
messageLog += "Done: \(text)\n" | ||
} catch { | ||
print(error.localizedDescription) | ||
messageLog += "\(error.localizedDescription)\n" | ||
} | ||
|
||
canTranscribe = true | ||
} | ||
|
||
private func readAudioSamples(_ url: URL) throws -> [Float] { | ||
stopPlayback() | ||
try startPlayback(url) | ||
return try decodeWaveFile(url) | ||
} | ||
|
||
func toggleRecord() async { | ||
if isRecording { | ||
await recorder.stopRecording() | ||
isRecording = false | ||
if let recordedFile { | ||
await transcribeAudio(recordedFile) | ||
} | ||
} else { | ||
requestRecordPermission { granted in | ||
if granted { | ||
Task { | ||
do { | ||
self.stopPlayback() | ||
let file = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true) | ||
.appending(path: "output.wav") | ||
try await self.recorder.startRecording(toOutputFile: file, delegate: self) | ||
self.isRecording = true | ||
self.recordedFile = file | ||
} catch { | ||
print(error.localizedDescription) | ||
self.messageLog += "\(error.localizedDescription)\n" | ||
self.isRecording = false | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
private func requestRecordPermission(response: @escaping (Bool) -> Void) { | ||
#if os(macOS) | ||
response(true) | ||
#else | ||
AVAudioSession.sharedInstance().requestRecordPermission { granted in | ||
response(granted) | ||
} | ||
#endif | ||
} | ||
|
||
private func startPlayback(_ url: URL) throws { | ||
audioPlayer = try AVAudioPlayer(contentsOf: url) | ||
audioPlayer?.play() | ||
} | ||
|
||
private func stopPlayback() { | ||
audioPlayer?.stop() | ||
audioPlayer = nil | ||
} | ||
|
||
// MARK: AVAudioRecorderDelegate | ||
|
||
nonisolated func audioRecorderEncodeErrorDidOccur(_ recorder: AVAudioRecorder, error: Error?) { | ||
if let error { | ||
Task { | ||
await handleRecError(error) | ||
} | ||
} | ||
} | ||
|
||
private func handleRecError(_ error: Error) { | ||
print(error.localizedDescription) | ||
messageLog += "\(error.localizedDescription)\n" | ||
isRecording = false | ||
} | ||
|
||
nonisolated func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) { | ||
Task { | ||
await onDidFinishRecording() | ||
} | ||
} | ||
|
||
private func onDidFinishRecording() { | ||
isRecording = false | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
.../whisper.swiftui.demo/Supporting files/Assets.xcassets/AccentColor.colorset/Contents.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"colors" : [ | ||
{ | ||
"idiom" : "universal" | ||
} | ||
], | ||
"info" : { | ||
"author" : "xcode", | ||
"version" : 1 | ||
} | ||
} |
63 changes: 63 additions & 0 deletions
63
...ui/whisper.swiftui.demo/Supporting files/Assets.xcassets/AppIcon.appiconset/Contents.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
{ | ||
"images" : [ | ||
{ | ||
"idiom" : "universal", | ||
"platform" : "ios", | ||
"size" : "1024x1024" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "1x", | ||
"size" : "16x16" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "2x", | ||
"size" : "16x16" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "1x", | ||
"size" : "32x32" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "2x", | ||
"size" : "32x32" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "1x", | ||
"size" : "128x128" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "2x", | ||
"size" : "128x128" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "1x", | ||
"size" : "256x256" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "2x", | ||
"size" : "256x256" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "1x", | ||
"size" : "512x512" | ||
}, | ||
{ | ||
"idiom" : "mac", | ||
"scale" : "2x", | ||
"size" : "512x512" | ||
} | ||
], | ||
"info" : { | ||
"author" : "xcode", | ||
"version" : 1 | ||
} | ||
} |
6 changes: 6 additions & 0 deletions
6
examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/Contents.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"info" : { | ||
"author" : "xcode", | ||
"version" : 1 | ||
} | ||
} |
6 changes: 6 additions & 0 deletions
6
...isper.swiftui.demo/Supporting files/Preview Content/Preview Assets.xcassets/Contents.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"info" : { | ||
"author" : "xcode", | ||
"version" : 1 | ||
} | ||
} |
12 changes: 12 additions & 0 deletions
12
examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/WhisperCppDemo.entitlements
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | ||
<plist version="1.0"> | ||
<dict> | ||
<key>com.apple.security.app-sandbox</key> | ||
<true/> | ||
<key>com.apple.security.device.audio-input</key> | ||
<true/> | ||
<key>com.apple.security.files.user-selected.read-only</key> | ||
<true/> | ||
</dict> | ||
</plist> |
43 changes: 43 additions & 0 deletions
43
examples/whisper.swiftui/whisper.swiftui.demo/UI/ContentView.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import SwiftUI | ||
import AVFoundation | ||
|
||
struct ContentView: View { | ||
@StateObject var whisperState = WhisperState() | ||
|
||
var body: some View { | ||
NavigationStack { | ||
VStack { | ||
HStack { | ||
Button("Transcribe", action: { | ||
Task { | ||
await whisperState.transcribeSample() | ||
} | ||
}) | ||
.buttonStyle(.bordered) | ||
.disabled(!whisperState.canTranscribe) | ||
|
||
Button(whisperState.isRecording ? "Stop recording" : "Start recording", action: { | ||
Task { | ||
await whisperState.toggleRecord() | ||
} | ||
}) | ||
.buttonStyle(.bordered) | ||
.disabled(!whisperState.canTranscribe) | ||
} | ||
|
||
ScrollView { | ||
Text(verbatim: whisperState.messageLog) | ||
.frame(maxWidth: .infinity, alignment: .leading) | ||
} | ||
} | ||
.navigationTitle("Whisper SwiftUI Demo") | ||
.padding() | ||
} | ||
} | ||
} | ||
|
||
struct ContentView_Previews: PreviewProvider { | ||
static var previews: some View { | ||
ContentView() | ||
} | ||
} |
Oops, something went wrong.