diff --git a/pyproject.toml b/pyproject.toml
index d458074e..9d0acfa9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "elevenlabs"
-version = "1.50.1"
+version = "1.50.2"
description = ""
readme = "README.md"
authors = []
diff --git a/reference.md b/reference.md
index 6823081c..428a6b03 100644
--- a/reference.md
+++ b/reference.md
@@ -580,21 +580,16 @@ Converts text into speech using a voice of your choice and returns audio.
```python
-from elevenlabs import ElevenLabs, VoiceSettings
+from elevenlabs import ElevenLabs
client = ElevenLabs(
api_key="YOUR_API_KEY",
)
client.text_to_speech.convert(
- voice_id="pMsXgVXv3BLzUgSXRplE",
- optimize_streaming_latency="0",
- output_format="mp3_22050_32",
- text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
- voice_settings=VoiceSettings(
- stability=0.5,
- similarity_boost=0.75,
- style=0.0,
- ),
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
+ output_format="mp3_44100_128",
+ text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+ model_id="eleven_multilingual_v2",
)
```
@@ -635,7 +630,16 @@ client.text_to_speech.convert(
-
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]`
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
@@ -826,7 +830,16 @@ client.text_to_speech.convert_with_timestamps(
-
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]`
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
@@ -971,21 +984,16 @@ Converts text into speech using a voice of your choice and returns audio as an a
```python
-from elevenlabs import ElevenLabs, VoiceSettings
+from elevenlabs import ElevenLabs
client = ElevenLabs(
api_key="YOUR_API_KEY",
)
client.text_to_speech.convert_as_stream(
- voice_id="pMsXgVXv3BLzUgSXRplE",
- optimize_streaming_latency="0",
- output_format="mp3_22050_32",
- text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
- voice_settings=VoiceSettings(
- stability=0.1,
- similarity_boost=0.3,
- style=0.2,
- ),
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
+ output_format="mp3_44100_128",
+ text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+ model_id="eleven_multilingual_v2",
)
```
@@ -1026,7 +1034,16 @@ client.text_to_speech.convert_as_stream(
-
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]`
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
@@ -1221,7 +1238,16 @@ for chunk in response:
-
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]`
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
diff --git a/src/elevenlabs/__init__.py b/src/elevenlabs/__init__.py
index 1b45287f..8339bfa4 100644
--- a/src/elevenlabs/__init__.py
+++ b/src/elevenlabs/__init__.py
@@ -126,7 +126,6 @@
ModerationStatusResponseModelWarningStatus,
ObjectJsonSchemaProperty,
ObjectJsonSchemaPropertyPropertiesValue,
- OptimizeStreamingLatency,
OrbAvatar,
OutputFormat,
PhoneNumberAgentInfo,
@@ -410,7 +409,6 @@
"ModerationStatusResponseModelWarningStatus",
"ObjectJsonSchemaProperty",
"ObjectJsonSchemaPropertyPropertiesValue",
- "OptimizeStreamingLatency",
"OrbAvatar",
"OutputFormat",
"PhoneNumberAgentInfo",
diff --git a/src/elevenlabs/core/client_wrapper.py b/src/elevenlabs/core/client_wrapper.py
index 81697d1e..dfbcbfcc 100644
--- a/src/elevenlabs/core/client_wrapper.py
+++ b/src/elevenlabs/core/client_wrapper.py
@@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]:
headers: typing.Dict[str, str] = {
"X-Fern-Language": "Python",
"X-Fern-SDK-Name": "elevenlabs",
- "X-Fern-SDK-Version": "1.50.1",
+ "X-Fern-SDK-Version": "1.50.2",
}
if self._api_key is not None:
headers["xi-api-key"] = self._api_key
diff --git a/src/elevenlabs/speech_to_speech/client.py b/src/elevenlabs/speech_to_speech/client.py
index d4e661b7..0d027b43 100644
--- a/src/elevenlabs/speech_to_speech/client.py
+++ b/src/elevenlabs/speech_to_speech/client.py
@@ -3,7 +3,6 @@
import typing
from ..core.client_wrapper import SyncClientWrapper
from .. import core
-from ..types.optimize_streaming_latency import OptimizeStreamingLatency
from ..types.output_format import OutputFormat
from ..core.request_options import RequestOptions
from ..core.jsonable_encoder import jsonable_encoder
@@ -28,7 +27,7 @@ def convert(
*,
audio: core.File,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
voice_settings: typing.Optional[str] = OMIT,
@@ -50,8 +49,15 @@ def convert(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -123,9 +129,9 @@ def convert_as_stream(
voice_id: str,
*,
audio: core.File,
- enable_logging: typing.Optional[OptimizeStreamingLatency] = None,
- optimize_streaming_latency: typing.Optional[OutputFormat] = None,
- output_format: typing.Optional[str] = None,
+ enable_logging: typing.Optional[bool] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
+ output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
voice_settings: typing.Optional[str] = OMIT,
seed: typing.Optional[int] = OMIT,
@@ -143,25 +149,21 @@ def convert_as_stream(
audio : core.File
See core.File for more documentation
- enable_logging : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ enable_logging : typing.Optional[bool]
+ When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OutputFormat]
- The output format of the generated audio.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
- output_format : typing.Optional[str]
- Output format of the generated audio. Must be one of:
- mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
- mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
- mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
- mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
- mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
- mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
- pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
- pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
- pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
- pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Pro tier or above.
- ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
+ output_format : typing.Optional[OutputFormat]
+ The output format of the generated audio.
model_id : typing.Optional[str]
Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for speech to speech, you can check this using the can_do_voice_conversion property.
@@ -236,7 +238,7 @@ async def convert(
*,
audio: core.File,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
voice_settings: typing.Optional[str] = OMIT,
@@ -258,8 +260,15 @@ async def convert(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -331,9 +340,9 @@ async def convert_as_stream(
voice_id: str,
*,
audio: core.File,
- enable_logging: typing.Optional[OptimizeStreamingLatency] = None,
- optimize_streaming_latency: typing.Optional[OutputFormat] = None,
- output_format: typing.Optional[str] = None,
+ enable_logging: typing.Optional[bool] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
+ output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
voice_settings: typing.Optional[str] = OMIT,
seed: typing.Optional[int] = OMIT,
@@ -351,25 +360,21 @@ async def convert_as_stream(
audio : core.File
See core.File for more documentation
- enable_logging : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ enable_logging : typing.Optional[bool]
+ When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OutputFormat]
- The output format of the generated audio.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
- output_format : typing.Optional[str]
- Output format of the generated audio. Must be one of:
- mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
- mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
- mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
- mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
- mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
- mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
- pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
- pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
- pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
- pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Pro tier or above.
- ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
+ output_format : typing.Optional[OutputFormat]
+ The output format of the generated audio.
model_id : typing.Optional[str]
Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for speech to speech, you can check this using the can_do_voice_conversion property.
diff --git a/src/elevenlabs/text_to_speech/client.py b/src/elevenlabs/text_to_speech/client.py
index 865cef6b..3352e96b 100644
--- a/src/elevenlabs/text_to_speech/client.py
+++ b/src/elevenlabs/text_to_speech/client.py
@@ -2,7 +2,6 @@
import typing
from ..core.client_wrapper import SyncClientWrapper
-from ..types.optimize_streaming_latency import OptimizeStreamingLatency
from ..types.output_format import OutputFormat
from ..types.voice_settings import VoiceSettings
from ..types.pronunciation_dictionary_version_locator import PronunciationDictionaryVersionLocator
@@ -44,7 +43,7 @@ def convert(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -77,8 +76,15 @@ def convert(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -126,21 +132,16 @@ def convert(
Examples
--------
- from elevenlabs import ElevenLabs, VoiceSettings
+ from elevenlabs import ElevenLabs
client = ElevenLabs(
api_key="YOUR_API_KEY",
)
client.text_to_speech.convert(
- voice_id="pMsXgVXv3BLzUgSXRplE",
- optimize_streaming_latency="0",
- output_format="mp3_22050_32",
- text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
- voice_settings=VoiceSettings(
- stability=0.5,
- similarity_boost=0.75,
- style=0.0,
- ),
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
+ output_format="mp3_44100_128",
+ text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+ model_id="eleven_multilingual_v2",
)
"""
with self._client_wrapper.httpx_client.stream(
@@ -205,7 +206,7 @@ def convert_with_timestamps(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -238,8 +239,15 @@ def convert_with_timestamps(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -361,7 +369,7 @@ def convert_as_stream(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -394,8 +402,15 @@ def convert_as_stream(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -443,21 +458,16 @@ def convert_as_stream(
Examples
--------
- from elevenlabs import ElevenLabs, VoiceSettings
+ from elevenlabs import ElevenLabs
client = ElevenLabs(
api_key="YOUR_API_KEY",
)
client.text_to_speech.convert_as_stream(
- voice_id="pMsXgVXv3BLzUgSXRplE",
- optimize_streaming_latency="0",
- output_format="mp3_22050_32",
- text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
- voice_settings=VoiceSettings(
- stability=0.1,
- similarity_boost=0.3,
- style=0.2,
- ),
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
+ output_format="mp3_44100_128",
+ text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+ model_id="eleven_multilingual_v2",
)
"""
with self._client_wrapper.httpx_client.stream(
@@ -522,7 +532,7 @@ def stream_with_timestamps(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -555,8 +565,15 @@ def stream_with_timestamps(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -693,7 +710,7 @@ async def convert(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -726,8 +743,15 @@ async def convert(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -777,7 +801,7 @@ async def convert(
--------
import asyncio
- from elevenlabs import AsyncElevenLabs, VoiceSettings
+ from elevenlabs import AsyncElevenLabs
client = AsyncElevenLabs(
api_key="YOUR_API_KEY",
@@ -786,15 +810,10 @@ async def convert(
async def main() -> None:
await client.text_to_speech.convert(
- voice_id="pMsXgVXv3BLzUgSXRplE",
- optimize_streaming_latency="0",
- output_format="mp3_22050_32",
- text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
- voice_settings=VoiceSettings(
- stability=0.5,
- similarity_boost=0.75,
- style=0.0,
- ),
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
+ output_format="mp3_44100_128",
+ text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+ model_id="eleven_multilingual_v2",
)
@@ -862,7 +881,7 @@ async def convert_with_timestamps(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -895,8 +914,15 @@ async def convert_with_timestamps(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -1026,7 +1052,7 @@ async def convert_as_stream(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -1059,8 +1085,15 @@ async def convert_as_stream(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
@@ -1110,7 +1143,7 @@ async def convert_as_stream(
--------
import asyncio
- from elevenlabs import AsyncElevenLabs, VoiceSettings
+ from elevenlabs import AsyncElevenLabs
client = AsyncElevenLabs(
api_key="YOUR_API_KEY",
@@ -1119,15 +1152,10 @@ async def convert_as_stream(
async def main() -> None:
await client.text_to_speech.convert_as_stream(
- voice_id="pMsXgVXv3BLzUgSXRplE",
- optimize_streaming_latency="0",
- output_format="mp3_22050_32",
- text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
- voice_settings=VoiceSettings(
- stability=0.1,
- similarity_boost=0.3,
- style=0.2,
- ),
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
+ output_format="mp3_44100_128",
+ text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+ model_id="eleven_multilingual_v2",
)
@@ -1195,7 +1223,7 @@ async def stream_with_timestamps(
*,
text: str,
enable_logging: typing.Optional[bool] = None,
- optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+ optimize_streaming_latency: typing.Optional[int] = None,
output_format: typing.Optional[OutputFormat] = None,
model_id: typing.Optional[str] = OMIT,
language_code: typing.Optional[str] = OMIT,
@@ -1228,8 +1256,15 @@ async def stream_with_timestamps(
enable_logging : typing.Optional[bool]
When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
- optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
- You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+ optimize_streaming_latency : typing.Optional[int]
+ You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+ 0 - default mode (no latency optimizations)
+ 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+ 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+ 3 - max latency optimizations
+ 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+ Defaults to None.
output_format : typing.Optional[OutputFormat]
The output format of the generated audio.
diff --git a/src/elevenlabs/types/__init__.py b/src/elevenlabs/types/__init__.py
index 86ca5244..6139eef5 100644
--- a/src/elevenlabs/types/__init__.py
+++ b/src/elevenlabs/types/__init__.py
@@ -131,7 +131,6 @@
from .moderation_status_response_model_warning_status import ModerationStatusResponseModelWarningStatus
from .object_json_schema_property import ObjectJsonSchemaProperty
from .object_json_schema_property_properties_value import ObjectJsonSchemaPropertyPropertiesValue
-from .optimize_streaming_latency import OptimizeStreamingLatency
from .orb_avatar import OrbAvatar
from .output_format import OutputFormat
from .phone_number_agent_info import PhoneNumberAgentInfo
@@ -348,7 +347,6 @@
"ModerationStatusResponseModelWarningStatus",
"ObjectJsonSchemaProperty",
"ObjectJsonSchemaPropertyPropertiesValue",
- "OptimizeStreamingLatency",
"OrbAvatar",
"OutputFormat",
"PhoneNumberAgentInfo",
diff --git a/src/elevenlabs/types/optimize_streaming_latency.py b/src/elevenlabs/types/optimize_streaming_latency.py
deleted file mode 100644
index 1b9a4dec..00000000
--- a/src/elevenlabs/types/optimize_streaming_latency.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-OptimizeStreamingLatency = typing.Union[typing.Literal["0", "1", "2", "3", "4"], typing.Any]