Add JSON mode for the providers that support it

This is completing feature request #47 (comment)
ahyatt · Nov 24, 2024 · 84a7215 · 84a7215
1 parent 0cea385
commit 84a7215
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 7 deletions.
diff --git a/NEWS.org b/NEWS.org
@@ -1,4 +1,6 @@
-* Veresion 0.18.1
+* Version 0.19.0
+- Add JSON mode, for most providers with the exception of Claude.
+* Version 0.18.1
 - Fix extra argument in ~llm-batch-embeddings-async~.
 * Version 0.18.0
 - Add media handling, for images, videos, and audio.

diff --git a/README.org b/README.org
@@ -138,7 +138,7 @@ For all callbacks, the callback will be executed in the buffer the function was
 - ~llm-chat-token-limit~.  Gets the token limit for the chat model.  This isn't possible for some backends like =llama.cpp=, in which the model isn't selected or known by this library.
 
   And the following helper functions:
-  - ~llm-make-chat-prompt text &keys context examples functions temperature max-tokens~: This is how you make prompts.  ~text~ can be a string (the user input to the llm chatbot), or a list representing a series of back-and-forth exchanges, of odd number, with the last element of the list representing the user's latest input.  This supports inputting context (also commonly called a system prompt, although it isn't guaranteed to replace the actual system prompt), examples, and other important elements, all detailed in the docstring for this function.  The ~non-standard-params~ let you specify other options that might vary per-provider.  The correctness is up to the client.
+  - ~llm-make-chat-prompt text &keys context examples functions temperature max-tokens response-format non-standard-params~: This is how you make prompts.  ~text~ can be a string (the user input to the llm chatbot), or a list representing a series of back-and-forth exchanges, of odd number, with the last element of the list representing the user's latest input.  This supports inputting context (also commonly called a system prompt, although it isn't guaranteed to replace the actual system prompt), examples, and other important elements, all detailed in the docstring for this function.  ~response-format~ can be ~'json~, to force JSON output, but the prompt also needs to mention and ideally go into detail about what kind of JSON response is desired.  Providers with the ~json-response~ capability support JSON output, and it will be ignored if unsupported.  The ~non-standard-params~ let you specify other options that might vary per-provider, and for this, the correctness is up to the client.
   - ~llm-chat-prompt-to-text prompt~: From a prompt, return a string representation.  This is not usually suitable for passing to LLMs, but for debugging purposes.
   - ~llm-chat-streaming-to-point provider prompt buffer point finish-callback~: Same basic arguments as ~llm-chat-streaming~, but will stream to ~point~ in ~buffer~.
   - ~llm-chat-prompt-append-response prompt response role~: Append a new response (from the user, usually) to the prompt.  The ~role~ is optional, and defaults to ~'user~.

diff --git a/llm-ollama.el b/llm-ollama.el
@@ -144,6 +144,8 @@ PROVIDER is the llm-ollama provider."
     (when (llm-chat-prompt-functions prompt)
       (push `("tools" . ,(mapcar #'llm-provider-utils-openai-function-spec
                                  (llm-chat-prompt-functions prompt))) request-alist))
+    (when (eq 'json (llm-chat-prompt-response-format prompt))
+      (push `("format" . ,(llm-chat-prompt-response-format prompt)) request-alist))
     (push `("stream" . ,(if streaming t :json-false)) request-alist)
     (when (llm-chat-prompt-temperature prompt)
       (push `("temperature" . ,(llm-chat-prompt-temperature prompt)) options))
@@ -188,7 +190,7 @@ PROVIDER is the llm-ollama provider."
                                         2048))
 
 (cl-defmethod llm-capabilities ((provider llm-ollama))
-  (append '(streaming)
+  (append '(streaming json-response)
           (when (and (llm-ollama-embedding-model provider)
                      (let ((embedding-model (llm-models-match
                                              (llm-ollama-embedding-model provider))))

diff --git a/llm-openai.el b/llm-openai.el
@@ -195,6 +195,8 @@ STREAMING if non-nil, turn on response streaming."
                      (llm-chat-prompt-interactions prompt)))
           request-alist)
     (push `("model" . ,(llm-openai-chat-model provider)) request-alist)
+    (when (eq 'json (llm-chat-prompt-response-format prompt))
+      (push '("response_format" . (("type" . "json_object"))) request-alist))
     (when (llm-chat-prompt-temperature prompt)
       (push `("temperature" . ,(* (llm-chat-prompt-temperature prompt) 2.0)) request-alist))
     (when (llm-chat-prompt-max-tokens prompt)
@@ -294,7 +296,7 @@ RESPONSE can be nil if the response is complete."
   (llm-provider-utils-model-token-limit (llm-openai-chat-model provider)))
 
 (cl-defmethod llm-capabilities ((provider llm-openai))
-  (append '(streaming embeddings function-calls)
+  (append '(streaming embeddings function-calls json-response)
           (when-let ((model (llm-models-match (llm-openai-chat-model provider))))
             (seq-intersection (llm-model-capabilities model)
                               '(image-input)))))

diff --git a/llm-vertex.el b/llm-vertex.el
@@ -245,6 +245,8 @@ nothing to add, in which case it is nil."
             params-alist))
     (when (llm-chat-prompt-max-tokens prompt)
       (push `(maxOutputTokens . ,(llm-chat-prompt-max-tokens prompt)) params-alist))
+    (pcase (llm-chat-prompt-response-format prompt)
+      ('json (push '("response_mime_type" . "application/json") params-alist)))
     (when params-alist
       `((generation_config . ,params-alist)))))
 
@@ -300,7 +302,7 @@ If STREAMING is non-nil, use the URL for the streaming API."
 
 (cl-defmethod llm-capabilities ((provider llm-vertex))
   (append
-   (list 'streaming 'embeddings)
+   (list 'streaming 'embeddings json-response)
    (when-let ((model (llm-models-match (llm-vertex-chat-model provider)))
               (capabilities (llm-model-capabilities model)))
      (append

diff --git a/llm.el b/llm.el
@@ -55,6 +55,7 @@ for debugging, because the log buffer will grow without bound."
   :type 'boolean)
 
 (defun llm--warn-on-nonfree (name tos)
+
   "Issue a warning if `llm-warn-on-nonfree' is non-nil.
 NAME is the human readable name of the LLM (e.g \"Open AI\").
 
@@ -70,7 +71,7 @@ See %s for the details on the restrictions on use." name tos)))
   "This stores all the information needed for a structured chat prompt.
 
 Use of this directly is deprecated, instead use `llm-make-chat-prompt'."
-  context examples interactions functions temperature max-tokens non-standard-params)
+  context examples interactions functions temperature max-tokens response-format non-standard-params)
 
 (cl-defstruct llm-chat-prompt-interaction
   "This defines a single interaction given as part of a chat prompt.
@@ -229,7 +230,7 @@ instead."
   (llm-make-chat-prompt text))
 
 (cl-defun llm-make-chat-prompt (content &key context examples functions
-                                        temperature max-tokens
+                                        temperature max-tokens response-format
                                         non-standard-params)
   "Create a `llm-chat-prompt' with CONTENT sent to the LLM provider.
 
@@ -276,6 +277,12 @@ This is not required.
 
 MAX-TOKENS is the maximum number of tokens to generate.  This is optional.
 
+If RESPONSE-FORMAT is `json' (the currently only accepted value), we
+will attempt to force ouput to fit the format.  This should not be used
+with function calling.  If this is set the instructions to the LLM
+should tell the model about the format, for example with JSON format by
+including examples or describing the schema.
+
 CONTEXT, EXAMPLES, FUNCTIONS, TEMPERATURE, and MAX-TOKENS are
 usually turned into part of the interaction, and if so, they will
 be put in the first interaction of the prompt (before anything in
@@ -302,6 +309,7 @@ cdrs can be strings or numbers.  This is optional."
    :functions functions
    :temperature temperature
    :max-tokens max-tokens
+   :response-format response-format
    :non-standard-params non-standard-params))
 
 (defun llm-chat-prompt-append-response (prompt response &optional role)
@@ -536,6 +544,9 @@ won't have any partial responses, so basically just operates like
 
 `image-input': the LLM can accept images as input.
 
+`json-response': the LLM can be requested to return responses only in
+JSON format.
+
 `video-input': the LLM can accept video as input.
 
 `audio-input': the LLM can accept audio as input."