Skip to content

Commit

Permalink
Merge pull request #109 from alan-turing-institute/anthropic-multimodal
Browse files Browse the repository at this point in the history
Add anthropic multimodal option
  • Loading branch information
rchan26 authored Oct 29, 2024
2 parents c6ee30c + b7209cc commit 490d33c
Show file tree
Hide file tree
Showing 12 changed files with 622 additions and 13 deletions.
391 changes: 391 additions & 0 deletions examples/anthropic/anthropic-multimodal.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id": 0, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": ["describe what is happening in this image", {"type": "image", "source": {"media": "pantani_giro.jpg", "media_type": "image/jpeg"}}]}], "parameters": {"temperature": 1, "max_tokens": 100}}
{"id": 1, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": [{"type": "image", "source": {"media": "mortadella.jpg", "media_type": "image/jpeg"}}, "what is this?"]}], "parameters": {"temperature": 1, "max_tokens": 100}}
{"id": 2, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": ["what is in this image?", {"type": "image", "source": {"media": "pantani_giro.jpg", "media_type": "image/jpeg"}}]}, {"role": "assistant", "content": "This is image shows a group of cyclists."}, {"role": "user", "content": "are there any notable cyclists in this image? what are their names?"}], "parameters": {"temperature": 1, "max_tokens": 100}}
Binary file added examples/anthropic/data/media/mortadella.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/anthropic/data/media/pantani_giro.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id": 0, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": ["describe what is happening in this image", {"type": "image", "source": {"media": "pantani_giro.jpg", "media_type": "image/jpeg"}}]}], "parameters": {"temperature": 1, "max_tokens": 100}, "timestamp_sent": "29-10-2024-15-36-29", "response": "This image shows professional cyclists competing in what appears to be a cycling race, likely from the 1990s based on the equipment and team jerseys visible. There are several riders in the frame, including one wearing the distinctive pink jersey (known as the maglia rosa in the Giro d'Italia). The cyclists are riding along a stone wall with an iron fence on top, and they're using classic road racing bikes with distinctive team color schemes - notably some turquoise Bian"}
{"id": 1, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": [{"type": "image", "source": {"media": "mortadella.jpg", "media_type": "image/jpeg"}}, "what is this?"]}], "parameters": {"temperature": 1, "max_tokens": 100}, "timestamp_sent": "29-10-2024-15-36-31", "response": "These appear to be mortadella and other Italian cold cuts or processed meats. The larger ones with the string/twine pattern wrapped around them are likely mortadella (a type of Italian bologna), while the pink spotted ones appear to be a different variety of cold cut or processed meat product. The spotted pattern is characteristic of certain Italian deli meats where small pieces of fat or other ingredients are distributed throughout the meat. These are commonly sliced and served in sandwiches or on"}
{"id": 2, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": ["what is in this image?", {"type": "image", "source": {"media": "pantani_giro.jpg", "media_type": "image/jpeg"}}]}, {"role": "assistant", "content": "This is image shows a group of cyclists."}, {"role": "user", "content": "are there any notable cyclists in this image? what are their names?"}], "parameters": {"temperature": 1, "max_tokens": 100}, "timestamp_sent": "29-10-2024-15-36-33", "response": "From the image, I can see this appears to be from a professional cycling race, likely from the 1990s based on the equipment and jerseys. While there are professional cyclists in the image, including one wearing the pink jersey (which is the leader's jersey in the Giro d'Italia), I should refrain from identifying specific individuals by name. The image shows a group of riders from various teams including Mercatone Uno and what appears to be racing in a major"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id": 0, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": ["describe what is happening in this image", {"type": "image", "source": {"media": "pantani_giro.jpg", "media_type": "image/jpeg"}}]}], "parameters": {"temperature": 1, "max_tokens": 100}}
{"id": 1, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": [{"type": "image", "source": {"media": "mortadella.jpg", "media_type": "image/jpeg"}}, "what is this?"]}], "parameters": {"temperature": 1, "max_tokens": 100}}
{"id": 2, "api": "anthropic", "model_name": "claude-3-5-sonnet-20241022", "prompt": [{"role": "user", "content": ["what is in this image?", {"type": "image", "source": {"media": "pantani_giro.jpg", "media_type": "image/jpeg"}}]}, {"role": "assistant", "content": "This is image shows a group of cyclists."}, {"role": "user", "content": "are there any notable cyclists in this image? what are their names?"}], "parameters": {"temperature": 1, "max_tokens": 100}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
29-10-2024, 15:36: Completed experiment: anthropic-multimodal-example.jsonl! Experiment processing time: 9.878 seconds, Average time per query: 3.293 seconds
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ nav:
- Anthropic:
- Example: examples/anthropic/README.md
- Notebook: examples/anthropic/anthropic.ipynb
- Multimodal: examples/anthropic/anthropic-multimodal.ipynb
- Gemini:
- Example: examples/gemini/README.md
- Notebook: examples/gemini/gemini.ipynb
Expand Down
10 changes: 8 additions & 2 deletions src/prompto/apis/anthropic/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from prompto.apis.anthropic.anthropic_utils import (
anthropic_chat_roles,
convert_dict_to_input,
process_response,
)
from prompto.apis.base import AsyncAPI
Expand Down Expand Up @@ -331,7 +332,7 @@ async def _query_history(self, prompt_dict: dict, index: int | str) -> dict:

# if system message is present, then it must be the only one
if len(system) == 0:
system = None
system = ""
elif len(system) == 1:
system = system[0]
else:
Expand All @@ -342,7 +343,12 @@ async def _query_history(self, prompt_dict: dict, index: int | str) -> dict:
try:
response = await client.messages.create(
model=model_name,
messages=messages,
messages=[
convert_dict_to_input(
content_dict=x, media_folder=self.settings.media_folder
)
for x in messages
],
system=system,
**generation_config,
)
Expand Down
155 changes: 155 additions & 0 deletions src/prompto/apis/anthropic/anthropic_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,163 @@
import base64
import os

from anthropic.types.message import Message

anthropic_chat_roles = set(["user", "assistant"])


def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")


def parse_content_value(content: dict | str, media_folder: str) -> dict:
"""
Parse content dictionary and create a dictionary input for Anthropic API.
If content is a string, a dictionary to represent a text object is returned.
If content is a dictionary, expected keys are:
- type: str, multimedia type, one of ["text", "image"]
If type is "text", expect a key "text" with the text content.
If type is "image", expect a key "source" which is a dictionary with keys:
- url: str, URL of the image (can be a local path or a URL starting with "https://")
- detail: str, optional detail parameter (default is "auto)
Parameters
----------
content : dict | str
Either a dictionary or a string which defines a multimodal object.
media_folder : str
Folder where media files are stored ({data_folder}/media).
Returns
-------
dict
Dictionary which defines a text or image object
"""
if isinstance(content, str):
return {"type": "text", "text": content}

# read multimedia type
type = content.get("type")
if type is None:
raise ValueError("Multimedia type is not specified")

# create dictionary based on multimedia type
if type == "text":
# read file location
text = content.get("text")
if text is None:
raise ValueError(
"Got type == 'text', but 'text' is not a key in the content dictionary"
)

return {"type": "text", "text": text}
else:
if type == "image":
# read file location
source = content.get("source")
if source is None:
raise ValueError(
"Got type == 'image', but 'source' is not a key in the content dictionary"
)

if not isinstance(source, dict):
raise ValueError(
"Got type == 'image', but 'source' is not a dictionary"
)

# get media type
media_type = source.get("media_type")
if media_type is None:
raise ValueError(
"Got type == 'image', but 'media_type' is not a key in the content['source'] dictionary"
)

# get image source
media = source.get("media")
if media is None:
raise ValueError(
"Got type == 'image', but 'media' is not a key in the content['source'] dictionary"
)

# url is a local path and needs to be encoded to base64
image_path = os.path.join(media_folder, media)
base64_image = encode_image(image_path)
return {
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": base64_image,
},
}
else:
raise ValueError(f"Unsupported multimedia type: {type}")


def parse_content(
contents: list[dict | str] | dict | str, media_folder: str
) -> list[dict]:
"""
Parse contents data and create a list of multimedia data objects.
If contents is a single dictionary, a list with a single multimedia data object is returned.
Parameters
----------
contents : list[dict | str] | dict | str
Contents data to parse and create Part object(s).
Can be a list of dictionaries and strings, or a single dictionary or string.
media_folder : str
Folder where media files are stored ({data_folder}/media).
Returns
-------
list[dict]
List of dictionaries each defining a text or image object
"""
# convert to list[dict | str]
if isinstance(contents, dict) or isinstance(contents, str):
contents = [contents]

return [parse_content_value(p, media_folder=media_folder) for p in contents]


def convert_dict_to_input(content_dict: dict, media_folder: str) -> dict:
"""
Convert dictionary to an input that can be used by the Anthropic API.
The output is a dictionary with keys "role" and "contents".
Parameters
----------
content_dict : dict
Content dictionary with keys "role" and "content" where
the values are strings.
media_folder : str
Folder where media files are stored ({data_folder}/media).
Returns
-------
dict
dict with keys "role" and "contents" where the value of
role is either "user" or "model" and the value of
contents is a list of inputs to make up an input (which can include
text or image/video inputs).
"""
if "role" not in content_dict:
raise KeyError("role key is missing in content dictionary")
if "content" not in content_dict:
raise KeyError("content key is missing in content dictionary")

return {
"role": content_dict["role"],
"content": parse_content(
content_dict["content"],
media_folder=media_folder,
),
}


def process_response(response: Message) -> str | list[str]:
"""
Helper function to process the response from the Anthropic API.
Expand Down
2 changes: 1 addition & 1 deletion src/prompto/apis/openai/openai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def parse_content_value(content: dict | str, media_folder: str) -> dict:
if type is None:
raise ValueError("Multimedia type is not specified")

# create Part object based on multimedia type
# create dictionary based on multimedia type
if type == "text":
# read file location
text = content.get("text")
Expand Down
66 changes: 56 additions & 10 deletions tests/apis/anthropic/test_anthropic_history_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,15 @@ async def test_anthropic_query_history(
mock_anthropic.assert_awaited_once_with(
model=prompt_dict_history["model_name"],
messages=[
{"role": "user", "content": prompt_dict_history["prompt"][1]["content"]}
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt_dict_history["prompt"][1]["content"],
}
],
}
],
system=prompt_dict_history["prompt"][0]["content"],
**prompt_dict_history["parameters"],
Expand Down Expand Up @@ -162,7 +170,15 @@ async def test_anthropic_query_history_error(
mock_anthropic.assert_awaited_once_with(
model=prompt_dict_history["model_name"],
messages=[
{"role": "user", "content": prompt_dict_history["prompt"][1]["content"]}
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt_dict_history["prompt"][1]["content"],
}
],
}
],
system=prompt_dict_history["prompt"][0]["content"],
**prompt_dict_history["parameters"],
Expand Down Expand Up @@ -221,18 +237,33 @@ async def test_anthropic_query_history_no_system(
messages=[
{
"role": "user",
"content": prompt_dict_history_no_system["prompt"][0]["content"],
"content": [
{
"type": "text",
"text": prompt_dict_history_no_system["prompt"][0]["content"],
}
],
},
{
"role": "assistant",
"content": prompt_dict_history_no_system["prompt"][1]["content"],
"content": [
{
"type": "text",
"text": prompt_dict_history_no_system["prompt"][1]["content"],
}
],
},
{
"role": "user",
"content": prompt_dict_history_no_system["prompt"][2]["content"],
"content": [
{
"type": "text",
"text": prompt_dict_history_no_system["prompt"][2]["content"],
}
],
},
],
system=None,
system="",
**prompt_dict_history_no_system["parameters"],
)

Expand Down Expand Up @@ -278,18 +309,33 @@ async def test_anthropic_query_history_error_no_system(
messages=[
{
"role": "user",
"content": prompt_dict_history_no_system["prompt"][0]["content"],
"content": [
{
"type": "text",
"text": prompt_dict_history_no_system["prompt"][0]["content"],
}
],
},
{
"role": "assistant",
"content": prompt_dict_history_no_system["prompt"][1]["content"],
"content": [
{
"type": "text",
"text": prompt_dict_history_no_system["prompt"][1]["content"],
}
],
},
{
"role": "user",
"content": prompt_dict_history_no_system["prompt"][2]["content"],
"content": [
{
"type": "text",
"text": prompt_dict_history_no_system["prompt"][2]["content"],
}
],
},
],
system=None,
system="",
**prompt_dict_history_no_system["parameters"],
)

Expand Down

0 comments on commit 490d33c

Please sign in to comment.