feat: Multimodal Node (#16962)

* clean gitignore * first draft * fix python versioning and typing * fix typing * make embedding storage more flexible * test mimetype guessing * add pants build file to the new package * add unit tests * add unit tests * use the right config value * better get/set_content * try * revert * pants explicit dependency * fix tests * try * fix one more test * do not load path, same as url * oops.. * try
run-llama · Nov 20, 2024 · 795bebc · 795bebc
1 parent eccda3a
commit 795bebc
Show file tree

Hide file tree

Showing 18 changed files with 600 additions and 77 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,13 +1,22 @@
+# Build
 .pants.d/
 dist/
 migration_scripts/
-venv/
+
+# IDEs
 .idea
+.vscode
+.zed
+
+# Local development
+venv/
 .venv/
 .ipynb_checkpoints
 .__pycache__
 __pycache__
 dev_notebooks/
+
+# Other
 llamaindex_registry.txt
 packages_to_bump_deduped.txt
 .env

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -22,20 +22,22 @@ repos:
         exclude: llama-index-core/llama_index/core/_static
       - id: trailing-whitespace
         exclude: llama-index-core/llama_index/core/_static
+
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     rev: v0.1.5
-
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
         exclude: ".*poetry.lock|.*_static"
+
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 23.10.1
     hooks:
       - id: black-jupyter
         name: black-src
         alias: black
         exclude: "^docs|.*poetry.lock|.*_static"
+
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.0.1
     hooks:
@@ -56,9 +58,10 @@ repos:
             --explicit-package-bases,
             --disallow-untyped-defs,
             --ignore-missing-imports,
-            --python-version=3.8,
+            --python-version=3.9,
           ]
         entry: bash -c "export MYPYPATH=llama_index"
+
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 23.10.1
     hooks:
@@ -68,6 +71,7 @@ repos:
         files: ^(docs/|examples/)
         # Using PEP 8's line length in docs prevents excess left/right scrolling
         args: [--line-length=79]
+
   - repo: https://github.com/adamchainz/blacken-docs
     rev: 1.16.0
     hooks:
@@ -78,11 +82,13 @@ repos:
         additional_dependencies: [black==23.10.1]
         # Using PEP 8's line length in docs prevents excess left/right scrolling
         args: [--line-length=79]
+
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v3.0.3
     hooks:
       - id: prettier
         exclude: llama-index-core/llama_index/core/_static|poetry.lock|llama-index-legacy/llama_index/legacy/_static|docs/docs
+
   - repo: https://github.com/codespell-project/codespell
     rev: v2.2.6
     hooks:
@@ -98,13 +104,15 @@ repos:
           [
             "--skip=*/algolia.js",
             "--ignore-words-list",
-            "astroid,gallary,momento,narl,ot,rouge,nin,gere,asend",
+            "astroid,gallary,momento,narl,ot,rouge,nin,gere,asend,seperator",
           ]
+
   - repo: https://github.com/srstevenson/nb-clean
     rev: 3.1.0
     hooks:
       - id: nb-clean
         args: [--preserve-cell-outputs, --remove-empty-cells]
+
   - repo: https://github.com/pappasam/toml-sort
     rev: v0.23.1
     hooks:

diff --git a/llama-index-core/llama_index/core/__init__.py b/llama-index-core/llama_index/core/__init__.py
@@ -6,6 +6,12 @@
 from logging import NullHandler
 from typing import Callable, Optional
 
+try:
+    # Force pants to install eval_type_backport on 3.9
+    import eval_type_backport  # noqa  # type: ignore
+except ImportError:
+    pass
+
 # response
 from llama_index.core.base.response.schema import Response
 
@@ -28,8 +34,8 @@
     GPTVectorStoreIndex,
     KeywordTableIndex,
     KnowledgeGraphIndex,
-    PropertyGraphIndex,
     ListIndex,
+    PropertyGraphIndex,
     RAKEKeywordTableIndex,
     SimpleKeywordTableIndex,
     SummaryIndex,
@@ -67,6 +73,9 @@
     set_global_service_context,
 )
 
+# global settings
+from llama_index.core.settings import Settings
+
 # storage
 from llama_index.core.storage.storage_context import StorageContext
 
@@ -76,9 +85,6 @@
 # global tokenizer
 from llama_index.core.utils import get_tokenizer, set_global_tokenizer
 
-# global settings
-from llama_index.core.settings import Settings
-
 # best practices for library logging:
 # https://docs.python.org/3/howto/logging.html#configuring-logging-for-a-library
 logging.getLogger(__name__).addHandler(NullHandler())

diff --git a/llama-index-core/llama_index/core/bridge/pydantic.py b/llama-index-core/llama_index/core/bridge/pydantic.py
@@ -1,29 +1,30 @@
 import pydantic
 from pydantic import (
-    ConfigDict,
+    AnyUrl,
     BaseModel,
-    GetJsonSchemaHandler,
-    GetCoreSchemaHandler,
+    BeforeValidator,
+    ConfigDict,
     Field,
+    GetCoreSchemaHandler,
+    GetJsonSchemaHandler,
     PlainSerializer,
     PrivateAttr,
+    Secret,
+    SecretStr,
+    SerializeAsAny,
     StrictFloat,
     StrictInt,
     StrictStr,
-    create_model,
-    model_validator,
-    field_validator,
-    ValidationInfo,
-    ValidationError,
     TypeAdapter,
+    ValidationError,
+    ValidationInfo,
     WithJsonSchema,
-    BeforeValidator,
-    SerializeAsAny,
     WrapSerializer,
+    create_model,
     field_serializer,
-    Secret,
-    SecretStr,
+    field_validator,
     model_serializer,
+    model_validator,
 )
 from pydantic.fields import FieldInfo
 from pydantic.json_schema import JsonSchemaValue
@@ -58,4 +59,5 @@
     "Secret",
     "SecretStr",
     "model_serializer",
+    "AnyUrl",
 ]