forked from ollama/ollama
-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add new Ollama engine using ggml through cgo This change introduces a new way to run pretrained models. It introduces 3 high level interfaces and a bunch of smaller helper interfaces to facilitate this. - `model.Model` defines the interface for a model architecture. Models such as `llama` and `mllama`, which are provided as examples, can implement the model's forward propagation in the `Forward` method. This method will be called to generate completions. This interface can be found in `model/model.go` - `ml.Backend` defines the interface for a backend tensor library, in this case `ggml`. Among other things, a Backend is responsible for loading a pretrained model into hardware (GPU, CPU, etc) and providing an interface for Models to access loaded tensors. This interface can be found in `ml/backend.go` - `ml.Tensor` defines the interface for a tensor and tensor operations This is the first implementation of the new engine. Follow up PRs will implement more features: - non-greedy sampling (ollama#8410) - integration with Ollama and KV caching (ollama#8301) - more model support (ollama#9080) with more coming soon Co-authored-by: Bruce MacDonald <[email protected]>
- Loading branch information
Showing
57 changed files
with
475,426 additions
and
493 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package cache | ||
|
||
import ( | ||
"github.com/ollama/ollama/ml" | ||
) | ||
|
||
type Options struct { | ||
Position int | ||
} | ||
|
||
type Cache interface { | ||
Sub(i int) Cache | ||
Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor) | ||
} | ||
|
||
type Simple struct { | ||
DType ml.DType | ||
Capacity int | ||
|
||
keys, values []ml.Tensor | ||
} | ||
|
||
func (c *Simple) Sub(i int) Cache { | ||
if i >= len(c.keys) { | ||
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...) | ||
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...) | ||
} | ||
|
||
return &Simple{ | ||
keys: c.keys[i : i+1], | ||
values: c.values[i : i+1], | ||
Capacity: c.Capacity, | ||
DType: c.DType, | ||
} | ||
} | ||
|
||
func (c *Simple) Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor) { | ||
if c.keys[0] == nil || c.values[0] == nil { | ||
c.keys[0] = ctx.Zeros(c.DType, int(key.Dim(0)*key.Dim(1))*c.Capacity) | ||
c.values[0] = ctx.Zeros(c.DType, int(value.Dim(0)*value.Dim(1))*c.Capacity) | ||
} | ||
|
||
ctx.Forward(key.Copy(ctx, c.keys[0].View(ctx, int(key.Stride(2))*opts.Position, int(key.Dim(0)*key.Dim(1)*key.Dim(2))))) | ||
ctx.Forward(value.Copy(ctx, c.values[0].View(ctx, int(value.Stride(2))*opts.Position, int(value.Dim(0)*value.Dim(1)*value.Dim(2))))) | ||
|
||
n := min(c.Capacity, int(key.Dim(2))+opts.Position) | ||
|
||
key = c.keys[0].View(ctx, 0, | ||
int(key.Dim(0)), int(key.Stride(1)), | ||
int(key.Dim(1)), int(key.Stride(2)), | ||
n, | ||
) | ||
|
||
value = c.values[0].View(ctx, 0, | ||
int(value.Dim(0)), int(value.Stride(1)), | ||
int(value.Dim(1)), int(value.Stride(2)), | ||
n, | ||
) | ||
|
||
// TODO shift context if necessary | ||
|
||
return key, value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.