isolate

Mobile-Artificial-Intelligence · Feb 10, 2025 · a5b19d2 · a5b19d2
1 parent e95846f
commit a5b19d2
Show file tree

Hide file tree

Showing 10 changed files with 179 additions and 114 deletions.
diff --git a/example/lib/main.dart b/example/lib/main.dart
@@ -21,7 +21,7 @@ class LlamaApp extends StatefulWidget {
 class _LlamaAppState extends State<LlamaApp> {
   final TextEditingController _controller = TextEditingController();
   final List<ChatMessage> _messages = [];
-  LlamaNative? _model;
+  LlamaCPP? _model;
   String? _modelPath;
 
   void _test() {
@@ -48,7 +48,7 @@ class _LlamaAppState extends State<LlamaApp> {
       throw Exception('File does not exist');
     }
 
-    final llamaCpp = LlamaNative(
+    final llamaCpp = LlamaCPP(
       modelParams: ModelParams(
         path: result.files.single.path!
       ),

diff --git a/example/macos/Podfile.lock b/example/macos/Podfile.lock
@@ -15,7 +15,7 @@ EXTERNAL SOURCES:
 
 SPEC CHECKSUMS:
   FlutterMacOS: 8f6f14fa908a6fb3fba0cd85dbd81ec4b251fb24
-  llama: 8d80067a92079951df69f9344e697b213f06c1d1
+  llama: c81650b42b6d7dc2086fe0c920bd86eb8e74ad67
 
 PODFILE CHECKSUM: 9ebaf0ce3d369aaa26a9ea0e159195ed94724cf3
 

diff --git a/ios/llama.podspec b/ios/llama.podspec
@@ -55,63 +55,38 @@ A new Flutter FFI plugin project.
     echo "Copy completed successfully."
   CMD
 
-  s.source_files = 'build-info.c', 
+  s.source_files = 'build-info.c',
                    'llama_cpp/src/*.cpp',
                    'llama_cpp/common/*.cpp',
+                   'llama_cpp/ggml/src/*.c',
                    'llama_cpp/ggml/src/*.cpp',
-                   'llama_cpp/ggml/src/ggml-cpu/*.cpp',
                    'llama_cpp/ggml/src/ggml-cpu/*.c',
-                   'llama_cpp/ggml/src/ggml-metal/*.cpp',
+                   'llama_cpp/ggml/src/ggml-cpu/*.cpp',
+                   'llama_cpp/ggml/src/ggml-cpu/**/*.cpp',
                    'llama_cpp/ggml/src/ggml-metal/*.m',
-                   'llama_cpp/src/*.c',
-                   'llama_cpp/src/llama.cpp',
-                   'llama_cpp/src/llama-sampling.cpp',
-                   'llama_cpp/src/llama-grammar.cpp',
-                   'llama_cpp/ggml/src/ggml.c',
-                   'llama_cpp/ggml/src/ggml-alloc.c',
-                   'llama_cpp/ggml/src/ggml-backend.c',
-                   'llama_cpp/ggml/src/ggml-metal.m',
-                   'llama_cpp/ggml/src/ggml-quants.c',
-                   'llama_cpp/ggml/src/ggml-aarch64.c',
-                   'llama_cpp/src/llama-vocab.cpp',
-                   'llama_cpp/src/unicode.cpp',
-                   'llama_cpp/src/unicode-data.cpp',
-                   'llama_cpp/common/common.cpp',
-                   'llama_cpp/common/build-info.cpp',
-                   'llama_cpp/common/grammar-parser.cpp',
-                   'llama_cpp/common/json-schema-to-grammar.cpp',
-                   'llama_cpp/common/sampling.cpp',
-                   'llama_cpp/common/stb_image.h',
   s.frameworks = 'Foundation', 'Metal', 'MetalKit'
   s.pod_target_xcconfig = {
     'DEFINES_MODULE' => 'YES',
     'USER_HEADER_SEARCH_PATHS' => [
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include/*.h',
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include',
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/include',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/include/llama.h',
       '$(PODS_TARGET_SRCROOT)/llama_cpp/src',
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/**/*.h', 
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/common/**/*.h',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/common', 
       '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include',
       '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src',
       '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src/ggml-cpu',
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/include',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src/ggml-cpu/**',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src/ggml-metal',
+    ],
+    'HEADER_SEARCH_PATHS' => [
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/include/llama.h',
       '$(PODS_TARGET_SRCROOT)/llama_cpp/src',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/common', 
       '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include',
       '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src',
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/common',
-      '$(PODS_TARGET_SRCROOT)/llama_cpp/common'],
-      'HEADER_SEARCH_PATHS' => [
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include',
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include/*.h',
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/include',
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/src',
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/**/*.h', 
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/common/**/*.h',
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/common',
-        '$(PODS_TARGET_SRCROOT)/llama_cpp/common',
-        '$(SRCROOT)/../ios/llama_cpp/common'],
-    # -w is to suppress warnings from llama_cpp, there's tons of them
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src/ggml-cpu',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src/ggml-cpu/**',
+      '$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/src/ggml-metal',
+    ],
     'OTHER_CFLAGS' => ['$(inherited)', '-O3', '-flto', '-fno-objc-arc', '-w', '-I$(PODS_TARGET_SRCROOT)/llama_cpp/include', '-I$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include', '-I$(PODS_TARGET_SRCROOT)/llama_cpp/common', '-DGGML_LLAMAFILE=OFF', '-DGGML_USE_CPU'],
     'OTHER_CPLUSPLUSFLAGS' => ['$(inherited)', '-O3', '-flto', '-fno-objc-arc', '-w', '-std=c++17', '-fpermissive', '-I$(PODS_TARGET_SRCROOT)/llama_cpp/include', '-I$(PODS_TARGET_SRCROOT)/llama_cpp/ggml/include', '-I$(PODS_TARGET_SRCROOT)/llama_cpp/common', '-DGGML_LLAMAFILE=OFF', '-DGGML_USE_CPU'],
     'GCC_PREPROCESSOR_DEFINITIONS' => ['$(inherited)', 'GGML_USE_METAL=1'],

diff --git a/lib/llama.dart b/lib/llama.dart
@@ -15,6 +15,7 @@ part 'src/llama_cpp.dart';
 part 'src/llama_exception.dart';
 part 'src/library.dart';
 part 'src/llama_native.dart';
+part 'src/llama_isolate_entry.dart';
 part 'src/model_params.dart';
 part 'src/chat_message.dart';
 part 'src/context_params.dart';

diff --git a/lib/src/library.dart b/lib/src/library.dart
@@ -11,7 +11,7 @@ llama get lib {
       _lib = llama(ffi.DynamicLibrary.open('libllama.so'));
     } 
     else if (Platform.isMacOS || Platform.isIOS) {
-      _lib = llama(ffi.DynamicLibrary.open('libllama.dylib'));
+      _lib = llama(ffi.DynamicLibrary.open('llama.framework/llama'));
     } 
     else {
       throw Exception('Unsupported platform');

diff --git a/lib/src/llama_cpp.dart b/lib/src/llama_cpp.dart
@@ -5,35 +5,26 @@ typedef StringResponse = (
   String message
 );
 
-typedef IsolateArguments = (
-  String modelPath,
-  String modelParams,
-  String contextParams,
-  String samplingParams,
-  SendPort sendPort
-);
-
 class LlamaCPP {
   final Completer _initialized = Completer();
   StreamController<String> _responseController = StreamController<String>()..close();
   SendPort? _sendPort;
 
-  LlamaCPP(String modelPath, ModelParams modelParams, ContextParams contextParams, SamplingParams samplingParams) {
-    _listener(modelPath, modelParams, contextParams, samplingParams);
+  LlamaCPP({required ModelParams modelParams, required ContextParams contextParams, required SamplingParams samplingParams}) {
+    _listener(modelParams, contextParams, samplingParams);
   }
 
-  void _listener(String modelPath, ModelParams modelParams, ContextParams contextParams, SamplingParams samplingParams) async {
+  void _listener(ModelParams modelParams, ContextParams contextParams, SamplingParams samplingParams) async {
     final receivePort = ReceivePort();
 
     final isolateParams = (
-      modelPath,
-      modelParams.toJson(),
-      contextParams.toJson(),
-      samplingParams.toJson(),
-      receivePort.sendPort
+      modelParams: modelParams,
+      contextParams: contextParams,
+      samplingParams: samplingParams,
+      sendPort: receivePort.sendPort
     );
 
-    await Isolate.spawn(entryPoint, isolateParams);
+    await Isolate.spawn(LlamaIsolateEntry.entry, isolateParams.toSerializable);
 
     await for (var data in receivePort) {
       if (data is StringResponse) {
@@ -49,7 +40,7 @@ class LlamaCPP {
         _initialized.complete();
       }
       else if (data is bool) {
-        print('Isolate stopped');
+        log('Isolate stopped');
       }
       else if (data == null) {
         _responseController.close();
@@ -78,41 +69,4 @@ class LlamaCPP {
 
     _sendPort!.send(true);
   }
-}
-
-void entryPoint(IsolateArguments args) async {
-  final SendPort sendPort = args.$5;
-  final LlamaNative llamaCppNative;
-
-  try {
-    final receivePort = ReceivePort();
-    sendPort.send(receivePort.sendPort);
-
-    llamaCppNative = LlamaNative(
-      modelParams: ModelParams.fromJson(args.$2),
-      contextParams: ContextParams.fromJson(args.$3),
-      samplingParams: SamplingParams.fromJson(args.$4)
-    );
-
-    await for (var data in receivePort) {
-      if (data is List<ChatMessageRecord>) {
-
-        final messages = ChatMessages.fromRecords(data);
-
-        final response = llamaCppNative.prompt(messages);
-
-        await for (var message in response) {
-          sendPort.send((false, message));
-        }
-
-        sendPort.send(null);
-      }
-      else if (data is bool) {
-        sendPort.send(data);
-      }
-    }
-  }
-  catch (e) {
-    sendPort.send((message: e.toString()));
-  }
 }
diff --git a/lib/src/llama_isolate_entry.dart b/lib/src/llama_isolate_entry.dart
@@ -0,0 +1,79 @@
+part of '../llama.dart';
+
+typedef IsolateArguments = ({
+  ModelParams modelParams,
+  ContextParams contextParams,
+  SamplingParams samplingParams,
+  SendPort sendPort
+});
+
+extension IsolateArgumentsExtension on IsolateArguments {
+  SerializableIsolateArguments get toSerializable => (
+    modelParams.toJson(),
+    contextParams.toJson(),
+    samplingParams.toJson(),
+    sendPort
+  );
+}
+
+typedef SerializableIsolateArguments = (
+  String,
+  String,
+  String,
+  SendPort
+);
+
+extension SerializableIsolateArgumentsExtension on SerializableIsolateArguments {
+  ModelParams get modelParams => ModelParams.fromJson(this.$1);
+
+  ContextParams get contextParams => ContextParams.fromJson(this.$2);
+
+  SamplingParams get samplingParams => SamplingParams.fromJson(this.$3);
+
+  SendPort get sendPort => this.$4;
+}
+
+class LlamaIsolateEntry {
+  static void entry(SerializableIsolateArguments args) async {
+    final SendPort sendPort = args.sendPort;
+    final LlamaNative llamaCppNative;
+
+    lib.llama_log_set(ffi.Pointer.fromFunction(_logCallback), ffi.nullptr);
+
+    try {
+      final receivePort = ReceivePort();
+      sendPort.send(receivePort.sendPort);
+
+      llamaCppNative = LlamaNative(
+        modelParams: args.modelParams,
+        contextParams: args.contextParams,
+        samplingParams: args.samplingParams
+      );
+
+      await for (var data in receivePort) {
+        if (data is List<ChatMessageRecord>) {
+
+          final messages = ChatMessages.fromRecords(data);
+
+          final response = llamaCppNative.prompt(messages);
+
+          await for (var message in response) {
+            sendPort.send((false, message));
+          }
+
+          sendPort.send(null);
+        }
+        else if (data is bool) {
+          sendPort.send(data);
+        }
+      }
+    }
+    catch (e) {
+      sendPort.send((message: e.toString()));
+    }
+  }
+
+  static void _logCallback(int level, ffi.Pointer<ffi.Char> text, ffi.Pointer<ffi.Void> userData) {
+    log(text.cast<Utf8>().toDartString());
+  }
+}
diff --git a/macos/build-info.c b/macos/build-info.c
@@ -0,0 +1,4 @@
+int LLAMA_BUILD_NUMBER = -1;
+char const *LLAMA_COMMIT = "undefined";
+char const *LLAMA_COMPILER = "undefined";
+char const *LLAMA_BUILD_TARGET = "MacOS";
diff --git a/macos/lib/.gitkeep b/macos/lib/.gitkeep