Skip to content

Commit

Permalink
Merge branch 'ollama:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
likelovewant authored Feb 14, 2025
2 parents 2629a7a + 5296f48 commit 51a157d
Show file tree
Hide file tree
Showing 92 changed files with 478,282 additions and 568 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ if(CMAKE_HIP_COMPILER)
if(AMDGPU_TARGETS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)

if (WIN32)
target_compile_definitions(ggml-hip PRIVATE GGML_CUDA_NO_PEER_COPY=1)
endif()

set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
install(TARGETS ggml-hip
RUNTIME_DEPENDENCIES
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Chipper](https://github.com/TilmanGriesel/chipper) AI interface for tinkerers (Ollama, Haystack RAG, Python)
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)

### Cloud

Expand Down Expand Up @@ -459,9 +460,10 @@ See the [API documentation](./docs/api.md) for all endpoints.

- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
- [Homebrew](https://formulae.brew.sh/formula/ollama)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
- [Nix package](https://search.nixos.org/packages?show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
- [Flox](https://flox.dev/blog/ollama-part-one)

### Libraries
Expand Down Expand Up @@ -516,7 +518,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
- [Ollama for Zig](https://github.com/dravenk/ollama-zig)
- [Abso](https://github.com/lunary-ai/abso/blob/main/README.md#ollama) (OpenAI-compatible TypeScript SDK for any LLM provider)
- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)

### Mobile

Expand Down
7 changes: 5 additions & 2 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ import (
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llama/runner"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/runner"
"github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
Expand Down Expand Up @@ -338,7 +338,10 @@ func RunHandler(cmd *cobra.Command, args []string) error {
return err
}

opts.MultiModal = len(info.ProjectorInfo) != 0
// TODO(jessegross): We should either find another way to know if this is
// a vision model or remove the logic. Also consider that other modalities will
// need different behavior anyways.
opts.MultiModal = len(info.ProjectorInfo) != 0 || envconfig.NewEngine()
opts.ParentModel = info.Details.ParentModel

if interactive {
Expand Down
2 changes: 1 addition & 1 deletion cmd/runner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"fmt"
"os"

"github.com/ollama/ollama/llama/runner"
"github.com/ollama/ollama/runner"
)

func main() {
Expand Down
32 changes: 16 additions & 16 deletions convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"log/slog"
"strings"

"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)

type ModelParameters struct {
Expand All @@ -27,8 +27,8 @@ type AdapterParameters struct {
} `json:"lora_parameters"`
}

func (ModelParameters) KV(t *Tokenizer) llm.KV {
kv := llm.KV{
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
kv := ggml.KV{
"general.file_type": uint32(1),
"general.quantization_version": uint32(2),
"tokenizer.ggml.pre": t.Pre,
Expand All @@ -54,15 +54,15 @@ func (ModelParameters) KV(t *Tokenizer) llm.KV {
return kv
}

func (p AdapterParameters) KV() llm.KV {
func (p AdapterParameters) KV() ggml.KV {
var alpha float32
if p.LoraParameters.Alpha == 0 {
alpha = float32(p.Alpha)
} else {
alpha = p.LoraParameters.Alpha
}

kv := llm.KV{
kv := ggml.KV{
"adapter.lora.alpha": alpha,
"adapter.type": "lora",
"general.file_type": uint32(1),
Expand All @@ -79,27 +79,27 @@ func (ModelParameters) specialTokenTypes() []string {
}
}

func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts)
}

func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts)
}

type ModelConverter interface {
// KV maps parameters to LLM key-values
KV(*Tokenizer) llm.KV
KV(*Tokenizer) ggml.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
Tensors([]Tensor) []ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string

// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
}

type moreParser interface {
Expand All @@ -108,17 +108,17 @@ type moreParser interface {

type AdapterConverter interface {
// KV maps parameters to LLM key-values
KV(llm.KV) llm.KV
KV(ggml.KV) ggml.KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
Tensors([]Tensor) []ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string

writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
}

func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil {
return err
Expand Down
10 changes: 5 additions & 5 deletions convert/convert_bert.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"slices"
"strings"

"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)

type bertModel struct {
Expand Down Expand Up @@ -85,7 +85,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
return nil
}

func (p *bertModel) KV(t *Tokenizer) llm.KV {
func (p *bertModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "bert"
kv["bert.attention.causal"] = false
Expand Down Expand Up @@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) llm.KV {
return kv
}

func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
if slices.Contains([]string{
"embeddings.position_ids",
Expand All @@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
continue
}

out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
Expand Down
10 changes: 5 additions & 5 deletions convert/convert_commandr.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package convert
import (
"cmp"

"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)

type commandrModel struct {
Expand All @@ -24,7 +24,7 @@ type commandrModel struct {

var _ ModelConverter = (*commandrModel)(nil)

func (p *commandrModel) KV(t *Tokenizer) llm.KV {
func (p *commandrModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "command-r"
kv["general.name"] = "command-r"
Expand All @@ -43,10 +43,10 @@ func (p *commandrModel) KV(t *Tokenizer) llm.KV {
return kv
}

func (p *commandrModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *commandrModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
Expand Down
10 changes: 5 additions & 5 deletions convert/convert_gemma.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"

"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)

type gemmaModel struct {
Expand All @@ -23,7 +23,7 @@ type gemmaModel struct {

var _ ModelConverter = (*gemmaModel)(nil)

func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings
Expand All @@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
return kv
}

func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
if strings.HasSuffix(t.Name(), "_norm.weight") {
t.SetRepacker(p.addOne)
}

out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
Expand Down
6 changes: 2 additions & 4 deletions convert/convert_gemma2.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package convert

import (
"github.com/ollama/ollama/llm"
)
import "github.com/ollama/ollama/fs/ggml"

type gemma2Model struct {
gemmaModel
Expand All @@ -11,7 +9,7 @@ type gemma2Model struct {
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
}

func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
func (p *gemma2Model) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
Expand Down
10 changes: 5 additions & 5 deletions convert/convert_gemma2_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"

"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)

type gemma2Adapter struct {
Expand All @@ -15,14 +15,14 @@ type gemma2Adapter struct {

var _ AdapterConverter = (*gemma2Adapter)(nil)

func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
kv := p.AdapterParameters.KV()
kv["general.architecture"] = "gemma2"
return kv
}

func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
Expand All @@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack)
}

out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
Expand Down
12 changes: 6 additions & 6 deletions convert/convert_llama.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"

"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)

type llamaModel struct {
Expand Down Expand Up @@ -46,7 +46,7 @@ type llamaModel struct {

var _ ModelConverter = (*llamaModel)(nil)

func (p *llamaModel) KV(t *Tokenizer) llm.KV {
func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "llama"
kv["llama.vocab_size"] = p.VocabSize
Expand Down Expand Up @@ -120,11 +120,11 @@ func (p *llamaModel) KV(t *Tokenizer) llm.KV {
return kv
}

func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor

if p.RopeScaling.factors != nil {
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: "rope_freqs.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
Expand All @@ -138,7 +138,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack)
}

out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
Expand Down
Loading

0 comments on commit 51a157d

Please sign in to comment.