Skip to content

Commit

Permalink
bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
danemadsen committed Feb 4, 2025
1 parent c752cca commit 4501dab
Show file tree
Hide file tree
Showing 4 changed files with 729 additions and 0 deletions.
2 changes: 2 additions & 0 deletions lib/llama.dart
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
library;

import 'dart:async';
import 'dart:convert';
import 'dart:ffi' as ffi;
import 'dart:io';
import 'dart:isolate';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';

Expand Down
281 changes: 281 additions & 0 deletions lib/src/context_params.dart
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,118 @@ class ContextParams {
this.noPerformance,
});

/// Decodes a [ContextParams] instance from a list of bytes that was produced by
/// [toBytes()]. Throws an exception if the bytes are malformed.
factory ContextParams.fromBytes(List<int> bytes) {
final buffer = Uint8List.fromList(bytes);
final byteData = ByteData.sublistView(buffer);
int offset = 0;
// Read the header (first 4 bytes)
final int header = byteData.getUint32(offset, Endian.little);
offset += 4;

final cp = ContextParams();

if ((header & (1 << 0)) != 0) {
cp.nCtx = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 1)) != 0) {
cp.nBatch = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 2)) != 0) {
cp.nUBatch = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 3)) != 0) {
cp.nSeqMax = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 4)) != 0) {
cp.nThreads = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 5)) != 0) {
cp.nThreadsBatch = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 6)) != 0) {
int index = byteData.getUint32(offset, Endian.little);
cp.ropeScalingType = RopeScalingType.values[index];
offset += 4;
}
if ((header & (1 << 7)) != 0) {
int index = byteData.getUint32(offset, Endian.little);
cp.poolingType = PoolingType.values[index];
offset += 4;
}
if ((header & (1 << 8)) != 0) {
int index = byteData.getUint32(offset, Endian.little);
cp.attentionType = AttentionType.values[index];
offset += 4;
}
if ((header & (1 << 9)) != 0) {
cp.ropeFrequencyBase = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 10)) != 0) {
cp.ropeFrequencyScale = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 11)) != 0) {
cp.yarnExtrapolationFactor = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 12)) != 0) {
cp.yarnAttenuationFactor = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 13)) != 0) {
cp.yarnBetaFast = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 14)) != 0) {
cp.yarnBetaSlow = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 15)) != 0) {
cp.yarnOriginalContext = byteData.getInt64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 16)) != 0) {
cp.defragmentationThreshold = byteData.getFloat64(offset, Endian.little);
offset += 8;
}
if ((header & (1 << 17)) != 0) {
int index = byteData.getUint32(offset, Endian.little);
cp.typeK = GgmlType.values[index];
offset += 4;
}
if ((header & (1 << 18)) != 0) {
int index = byteData.getUint32(offset, Endian.little);
cp.typeV = GgmlType.values[index];
offset += 4;
}
if ((header & (1 << 19)) != 0) {
cp.embeddings = buffer[offset] != 0;
offset += 1;
}
if ((header & (1 << 20)) != 0) {
cp.offloadKqv = buffer[offset] != 0;
offset += 1;
}
if ((header & (1 << 21)) != 0) {
cp.flashAttention = buffer[offset] != 0;
offset += 1;
}
if ((header & (1 << 22)) != 0) {
cp.noPerformance = buffer[offset] != 0;
offset += 1;
}
return cp;
}

llama_context_params toNative() {
final llama_context_params contextParams = LlamaCppNative.lib.llama_context_default_params();

Expand Down Expand Up @@ -194,6 +306,175 @@ class ContextParams {

return contextParams;
}

/// Encodes this instance into a list of bytes (List<int>) without losing
/// double precision or null information.
///
/// The encoding begins with a 4‑byte header (a bitmask indicating which of the
/// 23 fields are non‑null) and then, in a fixed order, each present field is written:
///
/// - int fields as 64‑bit integers (8 bytes)
/// - double fields as 64‑bit IEEE‑754 (8 bytes)
/// - enum fields as 32‑bit unsigned integers (4 bytes)
/// - booleans as 1 byte (0/1)
List<int> toBytes() {
// We define the order (indices 0..22) for our 23 fields:
// 0: nCtx (int)
// 1: nBatch (int)
// 2: nUBatch (int)
// 3: nSeqMax (int)
// 4: nThreads (int)
// 5: nThreadsBatch (int)
// 6: ropeScalingType (enum)
// 7: poolingType (enum)
// 8: attentionType (enum)
// 9: ropeFrequencyBase (double)
// 10: ropeFrequencyScale (double)
// 11: yarnExtrapolationFactor (double)
// 12: yarnAttenuationFactor (double)
// 13: yarnBetaFast (double)
// 14: yarnBetaSlow (double)
// 15: yarnOriginalContext (int)
// 16: defragmentationThreshold (double)
// 17: typeK (enum)
// 18: typeV (enum)
// 19: embeddings (bool)
// 20: offloadKqv (bool)
// 21: flashAttention (bool)
// 22: noPerformance (bool)
//
// First, we calculate the total length needed.
int length = 4; // 4 bytes for the header
int header = 0;

// For each field, if it is non-null, set the corresponding bit and add the size.
if (nCtx != null) { header |= (1 << 0); length += 8; }
if (nBatch != null) { header |= (1 << 1); length += 8; }
if (nUBatch != null) { header |= (1 << 2); length += 8; }
if (nSeqMax != null) { header |= (1 << 3); length += 8; }
if (nThreads != null) { header |= (1 << 4); length += 8; }
if (nThreadsBatch != null) { header |= (1 << 5); length += 8; }
if (ropeScalingType != null) { header |= (1 << 6); length += 4; }
if (poolingType != null) { header |= (1 << 7); length += 4; }
if (attentionType != null) { header |= (1 << 8); length += 4; }
if (ropeFrequencyBase != null) { header |= (1 << 9); length += 8; }
if (ropeFrequencyScale != null) { header |= (1 << 10); length += 8; }
if (yarnExtrapolationFactor != null) { header |= (1 << 11); length += 8; }
if (yarnAttenuationFactor != null) { header |= (1 << 12); length += 8; }
if (yarnBetaFast != null) { header |= (1 << 13); length += 8; }
if (yarnBetaSlow != null) { header |= (1 << 14); length += 8; }
if (yarnOriginalContext != null){ header |= (1 << 15); length += 8; }
if (defragmentationThreshold != null){ header |= (1 << 16); length += 8; }
if (typeK != null) { header |= (1 << 17); length += 4; }
if (typeV != null) { header |= (1 << 18); length += 4; }
if (embeddings != null) { header |= (1 << 19); length += 1; }
if (offloadKqv != null) { header |= (1 << 20); length += 1; }
if (flashAttention != null) { header |= (1 << 21); length += 1; }
if (noPerformance != null) { header |= (1 << 22); length += 1; }

final buffer = Uint8List(length);
final byteData = ByteData.sublistView(buffer);
int offset = 0;
// Write the 4‑byte header (the bitmask)
byteData.setUint32(offset, header, Endian.little);
offset += 4;

// Now write each field if it was non-null.
if (nCtx != null) {
byteData.setInt64(offset, nCtx!, Endian.little);
offset += 8;
}
if (nBatch != null) {
byteData.setInt64(offset, nBatch!, Endian.little);
offset += 8;
}
if (nUBatch != null) {
byteData.setInt64(offset, nUBatch!, Endian.little);
offset += 8;
}
if (nSeqMax != null) {
byteData.setInt64(offset, nSeqMax!, Endian.little);
offset += 8;
}
if (nThreads != null) {
byteData.setInt64(offset, nThreads!, Endian.little);
offset += 8;
}
if (nThreadsBatch != null) {
byteData.setInt64(offset, nThreadsBatch!, Endian.little);
offset += 8;
}
if (ropeScalingType != null) {
byteData.setUint32(offset, ropeScalingType!.index, Endian.little);
offset += 4;
}
if (poolingType != null) {
byteData.setUint32(offset, poolingType!.index, Endian.little);
offset += 4;
}
if (attentionType != null) {
byteData.setUint32(offset, attentionType!.index, Endian.little);
offset += 4;
}
if (ropeFrequencyBase != null) {
byteData.setFloat64(offset, ropeFrequencyBase!, Endian.little);
offset += 8;
}
if (ropeFrequencyScale != null) {
byteData.setFloat64(offset, ropeFrequencyScale!, Endian.little);
offset += 8;
}
if (yarnExtrapolationFactor != null) {
byteData.setFloat64(offset, yarnExtrapolationFactor!, Endian.little);
offset += 8;
}
if (yarnAttenuationFactor != null) {
byteData.setFloat64(offset, yarnAttenuationFactor!, Endian.little);
offset += 8;
}
if (yarnBetaFast != null) {
byteData.setFloat64(offset, yarnBetaFast!, Endian.little);
offset += 8;
}
if (yarnBetaSlow != null) {
byteData.setFloat64(offset, yarnBetaSlow!, Endian.little);
offset += 8;
}
if (yarnOriginalContext != null) {
byteData.setInt64(offset, yarnOriginalContext!, Endian.little);
offset += 8;
}
if (defragmentationThreshold != null) {
byteData.setFloat64(offset, defragmentationThreshold!, Endian.little);
offset += 8;
}
if (typeK != null) {
byteData.setUint32(offset, typeK!.index, Endian.little);
offset += 4;
}
if (typeV != null) {
byteData.setUint32(offset, typeV!.index, Endian.little);
offset += 4;
}
if (embeddings != null) {
buffer[offset] = embeddings! ? 1 : 0;
offset += 1;
}
if (offloadKqv != null) {
buffer[offset] = offloadKqv! ? 1 : 0;
offset += 1;
}
if (flashAttention != null) {
buffer[offset] = flashAttention! ? 1 : 0;
offset += 1;
}
if (noPerformance != null) {
buffer[offset] = noPerformance! ? 1 : 0;
offset += 1;
}

return buffer;
}
}

enum RopeScalingType {
Expand Down
48 changes: 48 additions & 0 deletions lib/src/model_params.dart
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,28 @@ class ModelParams {
this.checkTensors,
});

factory ModelParams.fromInt(int buffer) {
final modelParams = ModelParams();

if ((buffer & 1 << 0) != 0) {
modelParams.vocabOnly = (buffer & 1 << 1) != 0;
}

if ((buffer & 1 << 2) != 0) {
modelParams.useMmap = (buffer & 1 << 3) != 0;
}

if ((buffer & 1 << 4) != 0) {
modelParams.useMlock = (buffer & 1 << 5) != 0;
}

if ((buffer & 1 << 6) != 0) {
modelParams.checkTensors = (buffer & 1 << 7) != 0;
}

return modelParams;
}

llama_model_params toNative() {
final llama_model_params modelParams = LlamaCppNative.lib.llama_model_default_params();

Expand All @@ -36,4 +58,30 @@ class ModelParams {

return modelParams;
}

int toInt() {
int buffer = 0;

if (vocabOnly != null) {
buffer |= 1 << 0;
buffer |= (vocabOnly! ? 1 : 0) << 1;
}

if (useMmap != null) {
buffer |= 1 << 2;
buffer |= (useMmap! ? 1 : 0) << 3;
}

if (useMlock != null) {
buffer |= 1 << 4;
buffer |= (useMlock! ? 1 : 0) << 5;
}

if (checkTensors != null) {
buffer |= 1 << 6;
buffer |= (checkTensors! ? 1 : 0) << 7;
}

return buffer;
}
}
Loading

0 comments on commit 4501dab

Please sign in to comment.