Skip to content

Commit

Permalink
Release 0.9.15 (#977)
Browse files Browse the repository at this point in the history
* Set default predict_concurrency when using trt-llm to 512 (#954)

* Set default predict_concurrency when using trt-llm to 512

* update tests

* Truss changes to support lazy data that reads bptr secret and fetches from remote (#963)

* lazy data resolution support

* add support for lazy data resolver in truss

* remove lazy loader reference from template

* fetch in model wrapper

* duplicate download util for shared template

* concurrent download

* fix path reference

* use updated expiration_timestamp type

---------

Co-authored-by: Pankaj Gupta <[email protected]>

* Update push docs. (#965)

* Adding initial code to implement build commands (#961)

* Adding initial code to implement build commands

* Adding some tests

* Adding docker integration tests

* making build command an empty list by default

* removing unnecessary build_commands list for loop thing

* correct secrets str in docs (#968)

* Fix lazy data resolver error handling (#967)

* [chains] Add external_package_dirs option. Usage in Whiper model chainlet. (#966)

* add truss chains init (#973)

* [BT-10657] Wire up truss chains deploy (#969)

* Wire up the new chains mutations to truss chains deploy.

* Add comment.

* Respond to PR feedback.

* * Prune docker build cache in integration tests. (#976)

* Show requirement file content before pip install.
* For all tests running docker containers, show container logs if an exception was raised.
* Update control requirements to truss 0.9.14 (required also incrementing httpx version).

* Bump version to 0.9.15

---------

Co-authored-by: Bryce Dubayah <[email protected]>
Co-authored-by: joostinyi <[email protected]>
Co-authored-by: Pankaj Gupta <[email protected]>
Co-authored-by: Sidharth Shanker <[email protected]>
Co-authored-by: Het Trivedi <[email protected]>
Co-authored-by: rcano-baseten <[email protected]>
Co-authored-by: Marius Killinger <[email protected]>
  • Loading branch information
8 people authored Jun 14, 2024
1 parent 7e17bf7 commit 713f74d
Show file tree
Hide file tree
Showing 39 changed files with 867 additions and 41 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ jobs:
matrix:
split_group: ["1", "2", "3", "4", "5"]
steps:
- name: Purge Docker cache
run: docker builder prune -af
- uses: actions/checkout@v3
- uses: ./.github/actions/setup-python/
- run: poetry install
Expand All @@ -77,6 +79,8 @@ jobs:
strategy:
fail-fast: false
steps:
- name: Purge Docker cache
run: docker builder prune -af
- uses: actions/checkout@v3
- uses: ./.github/actions/setup-python/
- run: poetry install
Expand Down
1 change: 1 addition & 0 deletions docs/chains/getting-started.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ More details are in the [concepts section](/chains/concepts).
Create a Chain project directory with a python file in it. You can chose a name
and location, in this example we assume the file is named `hello.py`.

-- Note: If you are changing this snippet, please update the example code in example_chainlet.py accordingly
```python
import random
import truss_chains as chains
Expand Down
8 changes: 5 additions & 3 deletions docs/chains/guide.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ MISTRAL_HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
MISTRAL_CACHE = truss_config.ModelRepo(
repo_id=MISTRAL_HF_MODEL, allow_patterns=["*.json", "*.safetensors", ".model"]
)
# This name should correspond to a secret "name" in https://app.baseten.co/settings/secrets
HF_ACCESS_TOKEN_NAME = "hf_access_token"

class MistralLLM(chains.ChainletBase):
# The RemoteConfig object defines the resources required for this chainlet.
Expand All @@ -127,7 +129,7 @@ class MistralLLM(chains.ChainletBase):
compute=chains.Compute(cpu_count=2, gpu="A10G"),
# Cache the model weights in the image and make the huggingface
# access token secret available to the model.
assets=chains.Assets(cached=[MISTRAL_CACHE], secret_keys=["hf_access_token"]),
assets=chains.Assets(cached=[MISTRAL_CACHE], secret_keys=[HF_ACCESS_TOKEN_NAME]),
)

def __init__(
Expand All @@ -147,14 +149,14 @@ class MistralLLM(chains.ChainletBase):
MISTRAL_HF_MODEL,
torch_dtype=torch.float16,
device_map="auto",
use_auth_token=context.secrets["HF_ACCESS_TOKEN"],
use_auth_token=context.secrets[HF_ACCESS_TOKEN_NAME],
)

self._tokenizer = transformers.AutoTokenizer.from_pretrained(
MISTRAL_HF_MODEL,
device_map="auto",
torch_dtype=torch.float16,
use_auth_token=context.secrets["HF_ACCESS_TOKEN"],
use_auth_token=context.secrets[HF_ACCESS_TOKEN_NAME],
)

self._generate_args = {
Expand Down
7 changes: 7 additions & 0 deletions docs/reference/cli/push.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ Name of the remote in .trussrc to patch changes to.
<ParamField body="--publish" type="BOOL">
Push the truss as a published deployment. If no production deployment exists, promote the truss to production after deploy completes.
</ParamField>
<ParamField body="--model-name" type="TEXT">
Name of the model
</ParamField>
<ParamField body="--promote" type="BOOL">
Push the truss as a published deployment. Even if a production deployment exists, promote the truss to production after deploy completes.
</ParamField>
Expand All @@ -30,6 +33,10 @@ Name of the deployment created by the push. Can only be used in combination with
<ParamField body="--wait" type="BOOL">
Whether to wait for deployment to complete before returning. If the deploy or build fails, will return with a non-zero exit code.
</ParamField>
<ParamField body="--timeout-seconds" type="INTEGER">
Maximum time to wait for deployment to complete in seconds. Without specifying, the command will not complete until the deployment is complete.
</ParamField>

<ParamField body="--help">
Show help message and exit.
</ParamField>
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "truss"
version = "0.9.14"
version = "0.9.15"
description = "A seamless bridge from model development to model delivery"
license = "MIT"
readme = "README.md"
Expand Down
89 changes: 89 additions & 0 deletions truss-chains/examples/whisper/whisper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Optional

# flake8: noqa F402
# This location assumes `fde`-repo is checked out at the same level as `truss`-repo.
_LOCAL_WHISPER_LIB = "../../../../fde/whisper-trt/src"
import sys

sys.path.append(_LOCAL_WHISPER_LIB)

import base64

import pydantic
import truss_chains as chains
from huggingface_hub import snapshot_download


# TODO: The I/O types below should actually be taken from `whisper_trt.types`.
# But that cannot be imported without having `tensorrt_llm` installed.
# It could be fixed, by making that module importable without any special requirements.
class Segment(pydantic.BaseModel):
start_time_sec: float
end_time_sec: float
text: str
start: float # TODO: deprecate, use field with unit (seconds).
end: float # TODO: deprecate, use field with unit (seconds).


class WhisperResult(pydantic.BaseModel):
segments: list[Segment]
language: Optional[str]
language_code: Optional[str] = pydantic.Field(
...,
description="IETF language tag, e.g. 'en', see. "
"https://en.wikipedia.org/wiki/IETF_language_tag.",
)


class WhisperInput(pydantic.BaseModel):
audio_b64: str


@chains.mark_entrypoint
class WhisperModel(chains.ChainletBase):

remote_config = chains.RemoteConfig(
docker_image=chains.DockerImage(
base_image="baseten/truss-server-base:3.10-gpu-v0.9.0",
apt_requirements=["python3.10-venv", "openmpi-bin", "libopenmpi-dev"],
pip_requirements=[
"--extra-index-url https://pypi.nvidia.com",
"tensorrt_llm==0.10.0.dev2024042300",
"hf_transfer",
"janus",
"kaldialign",
"librosa",
"mpi4py==3.1.4",
"safetensors",
"soundfile",
"tiktoken",
"torchaudio",
"async-batcher>=0.2.0",
"pydantic>=2.7.1",
],
external_package_dirs=[chains.make_abs_path_here(_LOCAL_WHISPER_LIB)],
),
compute=chains.Compute(gpu="A10G", predict_concurrency=128),
assets=chains.Assets(secret_keys=["hf_access_token"]),
)

def __init__(
self,
context: chains.DeploymentContext = chains.depends_context(),
) -> None:
snapshot_download(
repo_id="baseten/whisper_trt_large-v3_A10G_i224_o512_bs8_bw5",
local_dir=context.data_dir,
allow_patterns=["**"],
token=context.secrets["hf_access_token"],
)
from whisper_trt import WhisperModel

self._model = WhisperModel(str(context.data_dir), max_queue_time=0.050)

async def run_remote(self, request: WhisperInput) -> WhisperResult:
binary_data = base64.b64decode(request.audio_b64.encode("utf-8"))
waveform = self._model.preprocess_audio(binary_data)
return await self._model.transcribe(
waveform, timestamps=True, raise_when_trimmed=True
)
3 changes: 3 additions & 0 deletions truss-chains/truss_chains/code_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,9 @@ def _make_truss_config(
# Absolute paths don't work with remote build.
config.requirements_file = _REQUIREMENTS_FILENAME
config.system_packages = image.apt_requirements
if image.external_package_dirs:
for ext_dir in image.external_package_dirs:
config.external_package_dirs.append(ext_dir.abs_path)
# Assets.
assets = chains_config.get_asset_spec()
config.secrets = assets.secrets
Expand Down
5 changes: 3 additions & 2 deletions truss-chains/truss_chains/definitions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# TODO: this file contains too much implementation -> restructure.
import abc
import logging
import os
import pathlib
import traceback
from types import GenericAlias
Expand Down Expand Up @@ -103,7 +102,8 @@ def __init__(
self._original_path = original_path

def _raise_if_not_exists(self, abs_path: str) -> None:
if not os.path.isfile(abs_path):
path = pathlib.Path(abs_path)
if not (path.is_file() or (path.is_dir() and any(path.iterdir()))):
raise MissingDependencyError(
f"With the file path `{self._original_path}` an absolute path relative "
f"to the calling module `{self._creating_module}` was created, "
Expand All @@ -129,6 +129,7 @@ class DockerImage(SafeModelNonSerializable):
pip_requirements: list[str] = []
apt_requirements: list[str] = []
data_dir: Optional[AbsPath] = None
external_package_dirs: Optional[list[AbsPath]] = None


class ComputeSpec(pydantic.BaseModel):
Expand Down
53 changes: 51 additions & 2 deletions truss-chains/truss_chains/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,22 @@
import inspect
import logging
import pathlib
from typing import Any, Dict, Iterable, Iterator, MutableMapping, Optional, Type, cast
import uuid
from typing import (
Any,
Dict,
Iterable,
Iterator,
List,
MutableMapping,
Optional,
Type,
cast,
)

import truss
from truss.remote.baseten import service as b10_service
from truss.remote.baseten import types as b10_types
from truss_chains import code_gen, definitions, framework, utils


Expand All @@ -19,10 +31,18 @@ def _deploy_to_baseten(
f"Deploying chainlet `{model_name}` as truss model on Baseten "
f"(publish={options.publish}, promote={options.promote})."
)

# Since we are deploying a model independently of the chain, we add a random suffix to
# prevent us from running into issues with existing models with the same name.
#
# This is a bit of a hack for now. Once we support model_origin for Chains models, we
# can drop the requirement for names on models.
model_suffix = str(uuid.uuid4()).split("-")[0]

# Models must be trusted to use the API KEY secret.
service = options.remote_provider.push(
truss_handle,
model_name=model_name,
model_name=model_name + model_suffix,
trusted=True,
publish=options.publish,
promote=options.promote,
Expand Down Expand Up @@ -158,6 +178,14 @@ def get_entrypoint(self) -> b10_service.TrussService:
)
return service

@property
def services(self) -> MutableMapping[str, b10_service.TrussService]:
return self._services

@property
def entrypoint_name(self) -> str:
return self._entrypoint

@property
def run_url(self) -> str:
return self.get_entrypoint.predict_url
Expand Down Expand Up @@ -221,4 +249,25 @@ def deploy_remotely(
chainlet_name_to_url[chainlet_descriptor.name] = service.predict_url
else:
chainlet_name_to_url[chainlet_descriptor.name] = "http://dummy"

if isinstance(options, definitions.DeploymentOptionsBaseten):
chainlets: List[b10_types.ChainletData] = []
entrypoint_name = chain_service.entrypoint_name

for chainlet_name, truss_service in chain_service.services.items():
baseten_service = cast(b10_service.BasetenService, truss_service)
chainlets.append(
b10_types.ChainletData(
name=chainlet_name,
oracle_version_id=baseten_service.model_version_id,
is_entrypoint=chainlet_name == entrypoint_name,
)
)

chain_id = options.remote_provider.create_chain(
chain_name=chain_service.name, chainlets=chainlets, publish=options.publish
)

print(f"Newly Created Chain: {chain_id}")

return chain_service
50 changes: 27 additions & 23 deletions truss-chains/truss_chains/example_chainlet.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,35 @@
import random

# For more on chains, check out https://truss.baseten.co/chains/intro.
import truss_chains as chains


class DummyGenerateData(chains.ChainletBase):
def run_remote(self) -> str:
return "abc"
# By inhereting chains.ChainletBase, the chains framework will know to create a chainlet that hosts the RandInt class.
class RandInt(chains.ChainletBase):

# run_remote must be implemented by all chainlets. This is the code that will be executed at inference time.
def run_remote(self, max_value: int) -> int:
return random.randint(1, max_value)


# The @chains.mark_entrypoint decorator indicates that this Chainlet is the entrypoint.
# Each chain must have exactly one entrypoint.
@chains.mark_entrypoint
class HelloWorld(chains.ChainletBase):
# chains.depends indicates that the HelloWorld chainlet depends on the RandInt Chainlet
# this enables the HelloWorld chainlet to call the RandInt chainlet
def __init__(self, rand_int=chains.depends(RandInt, retries=3)) -> None:
self._rand_int = rand_int

# Nesting the classes is a hack to make it *appear* like SplitText is from a different
# module.
class shared_chainlet:
class DummySplitText(chains.ChainletBase):
def run_remote(self, data: str) -> list[str]:
return [data[:2], data[2:]]
def run_remote(self, max_value: int) -> str:
num_repetitions = self._rand_int.run_remote(max_value)
return "Hello World! " * num_repetitions


class DummyExample(chains.ChainletBase):
def __init__(
self,
data_generator: DummyGenerateData = chains.depends(DummyGenerateData),
splitter: shared_chainlet.DummySplitText = chains.depends(
shared_chainlet.DummySplitText
),
context: chains.DeploymentContext = chains.depends_context(),
) -> None:
self._data_generator = data_generator
self._data_splitter = splitter
self._context = context
if __name__ == "__main__":
with chains.run_local():
hello_world_chain = HelloWorld()
result = hello_world_chain.run_remote(max_value=5)

def run_remote(self) -> list[str]:
return self._data_splitter.run_remote(self._data_generator.run_remote())
print(result)
# Hello World! Hello World! Hello World!
6 changes: 5 additions & 1 deletion truss-chains/truss_chains/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _example_chainlet_code() -> str:
logging.error("example_chainlet` is broken.", exc_info=True, stack_info=True)
return "<EXAMPLE CODE MISSING/BROKEN>"

example_name = example_chainlet.DummyExample.__name__
example_name = example_chainlet.HelloWorld.__name__
source = pathlib.Path(example_chainlet.__file__).read_text()
tree = ast.parse(source)
class_code = ""
Expand Down Expand Up @@ -720,6 +720,10 @@ def import_target(
) -> Iterator[Type[definitions.ABCChainlet]]:
module_path = pathlib.Path(module_path).resolve()
module_name = module_path.stem # Use the file's name as the module name
if not os.path.isfile(module_path):
raise ImportError(
f"`{module_path}` is not a file. You must point to a file where the entrypoint is defined."
)

error_msg = f"Could not import `{target_name}` from `{module_path}`. Check path."
spec = importlib.util.spec_from_file_location(module_name, module_path)
Expand Down
Loading

0 comments on commit 713f74d

Please sign in to comment.