pyannote for speaker detection (WIP, works only in macOS unit tests)

Telosnex · Dec 8, 2024 · 4a12abe · 4a12abe
1 parent b6da12a
commit 4a12abe
Show file tree

Hide file tree

Showing 15 changed files with 911 additions and 5 deletions.
diff --git a/example/assets/models/pyannote/pyannote_seg3.onnx b/example/assets/models/pyannote/pyannote_seg3.onnx
diff --git a/lib/fonnx.dart b/lib/fonnx.dart
@@ -51,4 +51,18 @@ class Fonnx {
       previousState: previousState,
     );
   }
+
+  Future<List<Map<String, dynamic>>?> pyannote({
+    required String modelPath,
+    required String modelName,
+    required Float32List audioData,
+    required double step,
+  }) {
+    return FonnxPlatform.instance.pyannote(
+      modelPath: modelPath,
+      modelName: modelName,
+      audioData: audioData,
+      step: step,
+    );
+  }
 }
diff --git a/lib/fonnx_platform_interface.dart b/lib/fonnx_platform_interface.dart
@@ -35,14 +35,23 @@ abstract class FonnxPlatform extends PlatformInterface {
   }) {
     throw UnimplementedError('magika() has not been implemented.');
   }
-  
+
   Future<Float32List?> miniLm({
     required String modelPath,
     required List<int> inputs,
   }) {
     throw UnimplementedError('miniLm() has not been implemented.');
   }
 
+  Future<List<Map<String, dynamic>>?> pyannote({
+    required String modelPath,
+    required String modelName,
+    required Float32List audioData,
+    required double step,
+  }) {
+    throw UnimplementedError('pyannote() has not been implemented.');
+  }
+
   Future<String?> whisper({
     required String modelPath,
     required List<int> audioBytes,

diff --git a/lib/models/pyannote/pyannote.dart b/lib/models/pyannote/pyannote.dart
@@ -0,0 +1,35 @@
+import 'dart:typed_data';
+
+import 'pyannote_none.dart'
+    if (dart.library.io) 'pyannote_native.dart'
+    if (dart.library.js) 'pyannote_web.dart';
+
+abstract class Pyannote {
+  static Pyannote? _instance;
+  String get modelPath;
+  String get modelName;
+
+  static Pyannote load(String path, String modelName) {
+    _instance ??= getPyannote(path, modelName);
+    return _instance!;
+  }
+
+  /// Process audio data and return speaker segments
+  /// 
+  /// Returns a list of segments. For regular segmentation models:
+  /// ```dart
+  /// {
+  ///   'speaker': int,    // Speaker index
+  ///   'start': double,   // Start time in seconds
+  ///   'stop': double,    // End time in seconds
+  /// }
+  /// ```
+  /// 
+  /// For short_scd_bigdata model:
+  /// ```dart
+  /// {
+  ///   'timestamp': double,  // Change point time in seconds
+  /// }
+  /// ```
+  Future<List<Map<String, dynamic>>> process(Float32List audioData, {double? step});
+}