Merge pull request #3 from mutablelogic/dev

Updated Ollama and Mistral implementations
mutablelogic · Feb 2, 2025 · de4bbfc · de4bbfc
2 parents 11ec895 + 9e30bd3
commit de4bbfc
Show file tree

Hide file tree

Showing 47 changed files with 2,657 additions and 899 deletions.
diff --git a/README.md b/README.md
@@ -1,17 +1,21 @@
 # go-llm
 
 Large Language Model API interface. This is a simple API interface for large language models
-which run on [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md)
-and [Anthopic](https://docs.anthropic.com/en/api/getting-started).
+which run on [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md),
+[Anthopic](https://docs.anthropic.com/en/api/getting-started) and [Mistral](https://docs.mistral.ai/)
+(OpenAI might be added later).
 
 The module includes the ability to utilize:
 
 * Maintaining a session of messages
-* Tool calling support
+* Tool calling support, including using your own tools (aka Tool plugins)
+* Creating embedding vectors from text
 * Streaming responses
+* Multi-modal support (aka, Images and Attachments)
 
 There is a command-line tool included in the module which can be used to interact with the API.
-For example,
+If you have docker installed, you can use the following command to run the tool, without
+installation:
 
 ```bash
 # Display help
@@ -20,15 +24,23 @@ docker run ghcr.io/mutablelogic/go-llm:latest --help
 # Interact with Claude to retrieve news headlines, assuming
 # you have an API key for Anthropic and NewsAPI
 docker run \
-  --interactive -e ANTHROPIC_API_KEY -e NEWSAPI_KEY \
+  --interactive -e MISTRAL_API_KEY -e NEWSAPI_KEY \
   ghcr.io/mutablelogic/go-llm:latest \
-  chat claude-3-5-haiku-20241022
+  chat claude-3-5-haiku-20241022 --prompt "What is the latest news?"
 ```
 
+See below for more information on how to use the command-line tool (or how to install it
+if you have a `go` compiler).
+
 ## Programmatic Usage
 
 See the documentation [here](https://pkg.go.dev/github.com/mutablelogic/go-llm)
-for integration into your own Go programs. To create an
+for integration into your own Go programs.
+
+### Agent Instantiation
+
+For each LLM provider, you create an agent which can be used to interact with the API.
+To create an
 [Ollama](https://pkg.go.dev/github.com/mutablelogic/go-llm/pkg/anthropic)
 agent,
 
@@ -38,7 +50,7 @@ import (
 )
 
 func main() {
-  // Create a new agent
+  // Create a new agent - replace the URL with the one to your Ollama instance
   agent, err := ollama.New("https://ollama.com/api/v1/")
   if err != nil {
     panic(err)
@@ -49,7 +61,7 @@ func main() {
 
 To create an
 [Anthropic](https://pkg.go.dev/github.com/mutablelogic/go-llm/pkg/anthropic)
-agent,
+agent with an API key stored as an environment variable,
 
 ```go
 import (
@@ -58,14 +70,58 @@ import (
 
 func main() {
   // Create a new agent
-  agent, err := anthropic.New(os.Getev("ANTHROPIC_API_KEY"))
+  agent, err := anthropic.New(os.Getenv("ANTHROPIC_API_KEY"))
+  if err != nil {
+    panic(err)
+  }
+  // ...
+}
+```
+
+For [Mistral](https://pkg.go.dev/github.com/mutablelogic/go-llm/pkg/mistral) models, you can use:
+
+```go
+import (
+  "github.com/mutablelogic/go-llm/pkg/mistral"
+)
+
+func main() {
+  // Create a new agent
+  agent, err := mistral.New(os.Getenv("MISTRAL_API_KEY"))
+  if err != nil {
+    panic(err)
+  }
+  // ...
+}
+```
+
+You can append options to the agent creation to set the client/server communication options,
+such as user agent strings, timeouts, debugging, rate limiting, adding custom headers, etc. See [here](https://pkg.go.dev/github.com/mutablelogic/go-client#readme-basic-usage) for more information.
+
+There is also an _aggregated_ agent which can be used to interact with multiple providers at once. This is useful if you want
+to use models from different providers simultaneously.
+
+```go
+import (
+  "github.com/mutablelogic/go-llm/pkg/agent"
+)
+
+func main() {
+  // Create a new agent which aggregates multiple providers
+  agent, err := agent.New(
+    agent.WithAnthropic(os.Getenv("ANTHROPIC_API_KEY")), 
+    agent.WithMistral(os.Getenv("MISTRAL_API_KEY")),
+    agent.WithOllama(os.Getenv("OLLAMA_URL")),
+  )
   if err != nil {
     panic(err)
   }
   // ...
 }
 ```
 
+### Chat Sessions
+
 You create a **chat session** with a model as follows,
 
 ```go
@@ -75,7 +131,7 @@ import (
 
 func session(ctx context.Context, agent llm.Agent) error {
   // Create a new chat session
-  session := agent.Model("claude-3-5-haiku-20241022").Context()
+  session := agent.Model(context.TODO(), "claude-3-5-haiku-20241022").Context()
 
   // Repeat forever
   for {
@@ -84,12 +140,114 @@ func session(ctx context.Context, agent llm.Agent) error {
       return err
     }
 
-    // Print the response
-    fmt.Println(session.Text())
+    // Print the response for the zero'th completion
+    fmt.Println(session.Text(0))
   }
 }
 ```
 
+The `Context` object will continue to store the current session and options, and will
+ensure the session is maintained across multiple calls.
+
+### Embedding Generation
+
+TODO
+
+### Attachments & Image Caption Generation
+
+TODO
+
+### Streaming
+
+TODO
+
+### Tool Support
+
+TODO
+
+## Options
+
+You can add options to sessions, or to prompts. Different providers and models support
+different options.
+
+```go
+type Model interface {
+  // Set session-wide options
+  Context(...Opt) Context
+
+  // Add attachments (images, PDF's) to a user prompt for completion
+  UserPrompt(string, ...Opt) Context
+
+  // Create an embedding vector with embedding options
+  Embedding(context.Context, string, ...Opt) ([]float64, error)
+}
+
+type Context interface {
+  // Add single-use options when calling the model, which override
+  // session options. You can attach files to a user prompt.
+  FromUser(context.Context, string, ...Opt) error
+}
+```
+
+The options are as follows:
+
+| Option | Ollama | Anthropic | Mistral | OpenAI | Description |
+|--------|--------|-----------|---------|--------|-------------|
+| `llm.WithTemperature(float64)` | Yes | Yes | Yes | - | What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. |
+| `llm.WithTopP(float64)` | Yes | Yes | Yes | - | Nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. |
+| `llm.WithTopK(uint64)` | Yes | Yes | No | - | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. |
+| `llm.WithMaxTokens(uint64)` | No | Yes | Yes | - | The maximum number of tokens to generate in the response. |
+| `llm.WithStream(func(llm.Completion))` | Can be enabled when tools are not used | Yes | Yes | - | Stream the response to a function. |
+| `llm.WithToolChoice(string, string, ...)` | No | Yes | Use `auto`, `any`, `none`, `required` or a function name. Only the first argument is used. | - | The tool to use for the model. |
+| `llm.WithToolKit(llm.ToolKit)` | Cannot be combined with streaming | Yes | Yes | - | The set of tools to use. |
+| `llm.WithStopSequence(string, string, ...)` | Yes | Yes | Yes | - | Stop generation if one of these tokens is detected. |
+| `llm.WithSystemPrompt(string)` | No | Yes | Yes | - | Set the system prompt for the model. |
+| `llm.WithSeed(uint64)` | Yes | Yes | Yes | - | The seed to use for random sampling. If set, different calls will generate deterministic results. |
+| `llm.WithFormat(string)` | Use `json` | Yes | Use `json_format` or `text` | - | The format of the response. For Mistral, you must also instruct the model to produce JSON yourself with a system or a user message. |
+| `llm.WithPresencePenalty(float64)` | Yes | No | Yes | - | Determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative. |
+| `llm.WithFequencyPenalty(float64)` | Yes | No | Yes | - | Penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition. |
+| `mistral.WithPrediction(string)` | No | No | Yes | - | Enable users to specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results. |
+| `llm.WithSafePrompt()` | No | No | Yes | - | Whether to inject a safety prompt before all conversations. |
+| `llm.WithNumCompletions(uint64)` | No | No | Yes | - | Number of completions to return for each request. |
+| `llm.WithAttachment(io.Reader)` | Yes | Yes | Yes | - | Attach a file to a user prompt. It is the responsibility of the caller to close the reader. |
+| `antropic.WithEphemeral()` | No | Yes | No | - | Attachments should be cached server-side |
+| `antropic.WithCitations()` | No | Yes | No | - | Attachments should be used in citations |
+| `antropic.WithUser(string)` | No | Yes | No | - | Indicate the user name for the request, for debugging |
+
+## The Command Line Tool
+
+You can use the command-line tool to interact with the API. To build the tool, you can use the following command:
+
+```bash
+go install github.com/mutablelogic/go-llm/cmd/llm@latest
+llm --help
+```
+
+The output is something like:
+
+```text
+Usage: llm <command> [flags]
+
+LLM agent command line interface
+
+Flags:
+  -h, --help                      Show context-sensitive help.
+      --debug                     Enable debug output
+      --verbose                   Enable verbose output
+      --ollama-endpoint=STRING    Ollama endpoint ($OLLAMA_URL)
+      --anthropic-key=STRING      Anthropic API Key ($ANTHROPIC_API_KEY)
+      --news-key=STRING           News API Key ($NEWSAPI_KEY)
+
+Commands:
+  agents      Return a list of agents
+  models      Return a list of models
+  tools       Return a list of tools
+  download    Download a model
+  chat        Start a chat session
+
+Run "llm <command> --help" for more information on a command.
+```
+
 ## Contributing & Distribution
 
 *This module is currently in development and subject to change*. Please do file

diff --git a/agent.go b/agent.go
@@ -11,4 +11,8 @@ type Agent interface {
 
 	// Return the models
 	Models(context.Context) ([]Model, error)
+
+	// Return a model by name, or nil if not found.
+	// Panics on error.
+	Model(context.Context, string) Model
 }
diff --git a/attachment.go b/attachment.go
@@ -1,8 +1,13 @@
 package llm
 
 import (
+	"encoding/base64"
+	"encoding/json"
 	"io"
+	"mime"
+	"net/http"
 	"os"
+	"path/filepath"
 )
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -31,6 +36,25 @@ func ReadAttachment(r io.Reader) (*Attachment, error) {
 	return &Attachment{filename: filename, data: data}, nil
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// STRINGIFY
+
+func (a *Attachment) String() string {
+	var j struct {
+		Filename string `json:"filename"`
+		Type     string `json:"type"`
+		Bytes    uint64 `json:"bytes"`
+	}
+	j.Filename = a.filename
+	j.Type = a.Type()
+	j.Bytes = uint64(len(a.data))
+	data, err := json.MarshalIndent(j, "", "  ")
+	if err != nil {
+		return err.Error()
+	}
+	return string(data)
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // PUBLIC METHODS
 
@@ -41,3 +65,17 @@ func (a *Attachment) Filename() string {
 func (a *Attachment) Data() []byte {
 	return a.data
 }
+
+func (a *Attachment) Type() string {
+	// Mimetype based on content
+	mimetype := http.DetectContentType(a.data)
+	if mimetype == "application/octet-stream" && a.filename != "" {
+		// Detect mimetype from extension
+		mimetype = mime.TypeByExtension(filepath.Ext(a.filename))
+	}
+	return mimetype
+}
+
+func (a *Attachment) Url() string {
+	return "data:" + a.Type() + ";base64," + base64.StdEncoding.EncodeToString(a.data)
+}