Skip to content

Commit

Permalink
examples : add whisper.swiftui demo app (ggerganov#308)
Browse files Browse the repository at this point in the history
* Add SwiftUI demo project.

* Add -DGGML_USE_ACCELERATE
  • Loading branch information
Digipom authored Dec 23, 2022
1 parent 4c1fe0c commit 0f4227d
Show file tree
Hide file tree
Showing 18 changed files with 1,023 additions and 0 deletions.
12 changes: 12 additions & 0 deletions examples/whisper.swiftui/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
A sample SwiftUI app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions.
See also: [whisper.objc](https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.objc).

To use:

1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1]
2. Add the model to "whisper.swiftui.demo/Resources/models" via Xcode.
3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)).
4. Add the model to "whisper.swiftui.demo/Resources/samples" via Xcode.
5. Select the "release" build configuration under "Run", then deploy and run to your device.

[^1]: I recommend the tiny, base or small models for running on an iOS device.
70 changes: 70 additions & 0 deletions examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import Foundation

enum WhisperError: Error {
case couldNotInitializeContext
}

// Meet Whisper C++ constraint: Don't access from more than one thread at a time.
actor WhisperContext {
private var context: OpaquePointer

init(context: OpaquePointer) {
self.context = context
}

deinit {
whisper_free(context)
}

func fullTranscribe(samples: [Float]) {
// Leave 2 processors free (i.e. the high-efficiency cores).
let maxThreads = max(1, min(8, cpuCount() - 2))
print("Selecting \(maxThreads) threads")
var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
"en".withCString { en in
// Adapted from whisper.objc
params.print_realtime = true
params.print_progress = false
params.print_timestamps = true
params.print_special = false
params.translate = false
params.language = en
params.n_threads = Int32(maxThreads)
params.offset_ms = 0
params.no_context = true
params.single_segment = false

whisper_reset_timings(context)
print("About to run whisper_full")
samples.withUnsafeBufferPointer { samples in
if (whisper_full(context, params, samples.baseAddress, Int32(samples.count)) != 0) {
print("Failed to run the model")
} else {
whisper_print_timings(context)
}
}
}
}

func getTranscription() -> String {
var transcription = ""
for i in 0..<whisper_full_n_segments(context) {
transcription += String.init(cString: whisper_full_get_segment_text(context, i))
}
return transcription
}

static func createContext(path: String) throws -> WhisperContext {
let context = whisper_init(path)
if let context {
return WhisperContext(context: context)
} else {
print("Couldn't load model at \(path)")
throw WhisperError.couldNotInitializeContext
}
}
}

fileprivate func cpuCount() -> Int {
ProcessInfo.processInfo.processorCount
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//
// Use this file to import your target's public headers that you would like to expose to Swift.
//
#import "whisper.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import Foundation
import SwiftUI
import AVFoundation

@MainActor
class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
@Published var isModelLoaded = false
@Published var messageLog = ""
@Published var canTranscribe = false
@Published var isRecording = false

private var whisperContext: WhisperContext?
private let recorder = Recorder()
private var recordedFile: URL? = nil
private var audioPlayer: AVAudioPlayer?

private var modelUrl: URL? {
Bundle.main.url(forResource: "ggml-tiny.en", withExtension: "bin", subdirectory: "models")
}

private var sampleUrl: URL? {
Bundle.main.url(forResource: "jfk", withExtension: "wav", subdirectory: "samples")
}

private enum LoadError: Error {
case couldNotLocateModel
}

override init() {
super.init()
do {
try loadModel()
canTranscribe = true
} catch {
print(error.localizedDescription)
messageLog += "\(error.localizedDescription)\n"
}
}

private func loadModel() throws {
messageLog += "Loading model...\n"
if let modelUrl {
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
} else {
messageLog += "Could not locate model\n"
}
}

func transcribeSample() async {
if let sampleUrl {
await transcribeAudio(sampleUrl)
} else {
messageLog += "Could not locate sample\n"
}
}

private func transcribeAudio(_ url: URL) async {
if (!canTranscribe) {
return
}
guard let whisperContext else {
return
}

do {
canTranscribe = false
messageLog += "Reading wave samples...\n"
let data = try readAudioSamples(url)
messageLog += "Transcribing data...\n"
await whisperContext.fullTranscribe(samples: data)
let text = await whisperContext.getTranscription()
messageLog += "Done: \(text)\n"
} catch {
print(error.localizedDescription)
messageLog += "\(error.localizedDescription)\n"
}

canTranscribe = true
}

private func readAudioSamples(_ url: URL) throws -> [Float] {
stopPlayback()
try startPlayback(url)
return try decodeWaveFile(url)
}

func toggleRecord() async {
if isRecording {
await recorder.stopRecording()
isRecording = false
if let recordedFile {
await transcribeAudio(recordedFile)
}
} else {
requestRecordPermission { granted in
if granted {
Task {
do {
self.stopPlayback()
let file = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true)
.appending(path: "output.wav")
try await self.recorder.startRecording(toOutputFile: file, delegate: self)
self.isRecording = true
self.recordedFile = file
} catch {
print(error.localizedDescription)
self.messageLog += "\(error.localizedDescription)\n"
self.isRecording = false
}
}
}
}
}
}

private func requestRecordPermission(response: @escaping (Bool) -> Void) {
#if os(macOS)
response(true)
#else
AVAudioSession.sharedInstance().requestRecordPermission { granted in
response(granted)
}
#endif
}

private func startPlayback(_ url: URL) throws {
audioPlayer = try AVAudioPlayer(contentsOf: url)
audioPlayer?.play()
}

private func stopPlayback() {
audioPlayer?.stop()
audioPlayer = nil
}

// MARK: AVAudioRecorderDelegate

nonisolated func audioRecorderEncodeErrorDidOccur(_ recorder: AVAudioRecorder, error: Error?) {
if let error {
Task {
await handleRecError(error)
}
}
}

private func handleRecError(_ error: Error) {
print(error.localizedDescription)
messageLog += "\(error.localizedDescription)\n"
isRecording = false
}

nonisolated func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
Task {
await onDidFinishRecording()
}
}

private func onDidFinishRecording() {
isRecording = false
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"images" : [
{
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
},
{
"idiom" : "mac",
"scale" : "1x",
"size" : "16x16"
},
{
"idiom" : "mac",
"scale" : "2x",
"size" : "16x16"
},
{
"idiom" : "mac",
"scale" : "1x",
"size" : "32x32"
},
{
"idiom" : "mac",
"scale" : "2x",
"size" : "32x32"
},
{
"idiom" : "mac",
"scale" : "1x",
"size" : "128x128"
},
{
"idiom" : "mac",
"scale" : "2x",
"size" : "128x128"
},
{
"idiom" : "mac",
"scale" : "1x",
"size" : "256x256"
},
{
"idiom" : "mac",
"scale" : "2x",
"size" : "256x256"
},
{
"idiom" : "mac",
"scale" : "1x",
"size" : "512x512"
},
{
"idiom" : "mac",
"scale" : "2x",
"size" : "512x512"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.app-sandbox</key>
<true/>
<key>com.apple.security.device.audio-input</key>
<true/>
<key>com.apple.security.files.user-selected.read-only</key>
<true/>
</dict>
</plist>
43 changes: 43 additions & 0 deletions examples/whisper.swiftui/whisper.swiftui.demo/UI/ContentView.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import SwiftUI
import AVFoundation

struct ContentView: View {
@StateObject var whisperState = WhisperState()

var body: some View {
NavigationStack {
VStack {
HStack {
Button("Transcribe", action: {
Task {
await whisperState.transcribeSample()
}
})
.buttonStyle(.bordered)
.disabled(!whisperState.canTranscribe)

Button(whisperState.isRecording ? "Stop recording" : "Start recording", action: {
Task {
await whisperState.toggleRecord()
}
})
.buttonStyle(.bordered)
.disabled(!whisperState.canTranscribe)
}

ScrollView {
Text(verbatim: whisperState.messageLog)
.frame(maxWidth: .infinity, alignment: .leading)
}
}
.navigationTitle("Whisper SwiftUI Demo")
.padding()
}
}
}

struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
Loading

0 comments on commit 0f4227d

Please sign in to comment.