SDK regeneration

elevenlabs · Dec 14, 2024 · c17e159 · c17e159
1 parent 8b3be4a
commit c17e159
Show file tree

Hide file tree

Showing 8 changed files with 204 additions and 147 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "elevenlabs"
-version = "1.50.1"
+version = "1.50.2"
 description = ""
 readme = "README.md"
 authors = []

diff --git a/reference.md b/reference.md
@@ -580,21 +580,16 @@ Converts text into speech using a voice of your choice and returns audio.
 <dd>
 
 ```python
-from elevenlabs import ElevenLabs, VoiceSettings
+from elevenlabs import ElevenLabs
 
 client = ElevenLabs(
     api_key="YOUR_API_KEY",
 )
 client.text_to_speech.convert(
-    voice_id="pMsXgVXv3BLzUgSXRplE",
-    optimize_streaming_latency="0",
-    output_format="mp3_22050_32",
-    text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
-    voice_settings=VoiceSettings(
-        stability=0.5,
-        similarity_boost=0.75,
-        style=0.0,
-    ),
+    voice_id="JBFqnCBsd6RMkjVDRZzb",
+    output_format="mp3_44100_128",
+    text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+    model_id="eleven_multilingual_v2",
 )
 
 ```
@@ -635,7 +630,16 @@ client.text_to_speech.convert(
 <dl>
 <dd>
 
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]` 
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
 
 </dd>
 </dl>
@@ -826,7 +830,16 @@ client.text_to_speech.convert_with_timestamps(
 <dl>
 <dd>
 
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]` 
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
 
 </dd>
 </dl>
@@ -971,21 +984,16 @@ Converts text into speech using a voice of your choice and returns audio as an a
 <dd>
 
 ```python
-from elevenlabs import ElevenLabs, VoiceSettings
+from elevenlabs import ElevenLabs
 
 client = ElevenLabs(
     api_key="YOUR_API_KEY",
 )
 client.text_to_speech.convert_as_stream(
-    voice_id="pMsXgVXv3BLzUgSXRplE",
-    optimize_streaming_latency="0",
-    output_format="mp3_22050_32",
-    text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”",
-    voice_settings=VoiceSettings(
-        stability=0.1,
-        similarity_boost=0.3,
-        style=0.2,
-    ),
+    voice_id="JBFqnCBsd6RMkjVDRZzb",
+    output_format="mp3_44100_128",
+    text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
+    model_id="eleven_multilingual_v2",
 )
 
 ```
@@ -1026,7 +1034,16 @@ client.text_to_speech.convert_as_stream(
 <dl>
 <dd>
 
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]` 
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
 
 </dd>
 </dl>
@@ -1221,7 +1238,16 @@ for chunk in response:
 <dl>
 <dd>
 
-**optimize_streaming_latency:** `typing.Optional[OptimizeStreamingLatency]` — You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+**optimize_streaming_latency:** `typing.Optional[int]` 
+
+You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+0 - default mode (no latency optimizations)
+1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+3 - max latency optimizations
+4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+Defaults to None.
 
 </dd>
 </dl>

diff --git a/src/elevenlabs/__init__.py b/src/elevenlabs/__init__.py
@@ -126,7 +126,6 @@
     ModerationStatusResponseModelWarningStatus,
     ObjectJsonSchemaProperty,
     ObjectJsonSchemaPropertyPropertiesValue,
-    OptimizeStreamingLatency,
     OrbAvatar,
     OutputFormat,
     PhoneNumberAgentInfo,
@@ -410,7 +409,6 @@
     "ModerationStatusResponseModelWarningStatus",
     "ObjectJsonSchemaProperty",
     "ObjectJsonSchemaPropertyPropertiesValue",
-    "OptimizeStreamingLatency",
     "OrbAvatar",
     "OutputFormat",
     "PhoneNumberAgentInfo",

diff --git a/src/elevenlabs/core/client_wrapper.py b/src/elevenlabs/core/client_wrapper.py
@@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "elevenlabs",
-            "X-Fern-SDK-Version": "1.50.1",
+            "X-Fern-SDK-Version": "1.50.2",
         }
         if self._api_key is not None:
             headers["xi-api-key"] = self._api_key

diff --git a/src/elevenlabs/speech_to_speech/client.py b/src/elevenlabs/speech_to_speech/client.py
@@ -3,7 +3,6 @@
 import typing
 from ..core.client_wrapper import SyncClientWrapper
 from .. import core
-from ..types.optimize_streaming_latency import OptimizeStreamingLatency
 from ..types.output_format import OutputFormat
 from ..core.request_options import RequestOptions
 from ..core.jsonable_encoder import jsonable_encoder
@@ -28,7 +27,7 @@ def convert(
         *,
         audio: core.File,
         enable_logging: typing.Optional[bool] = None,
-        optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+        optimize_streaming_latency: typing.Optional[int] = None,
         output_format: typing.Optional[OutputFormat] = None,
         model_id: typing.Optional[str] = OMIT,
         voice_settings: typing.Optional[str] = OMIT,
@@ -50,8 +49,15 @@ def convert(
         enable_logging : typing.Optional[bool]
             When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
 
-        optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
-            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+        optimize_streaming_latency : typing.Optional[int]
+            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+            0 - default mode (no latency optimizations)
+            1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+            2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+            3 - max latency optimizations
+            4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+            Defaults to None.
 
         output_format : typing.Optional[OutputFormat]
             The output format of the generated audio.
@@ -123,9 +129,9 @@ def convert_as_stream(
         voice_id: str,
         *,
         audio: core.File,
-        enable_logging: typing.Optional[OptimizeStreamingLatency] = None,
-        optimize_streaming_latency: typing.Optional[OutputFormat] = None,
-        output_format: typing.Optional[str] = None,
+        enable_logging: typing.Optional[bool] = None,
+        optimize_streaming_latency: typing.Optional[int] = None,
+        output_format: typing.Optional[OutputFormat] = None,
         model_id: typing.Optional[str] = OMIT,
         voice_settings: typing.Optional[str] = OMIT,
         seed: typing.Optional[int] = OMIT,
@@ -143,25 +149,21 @@ def convert_as_stream(
         audio : core.File
             See core.File for more documentation
 
-        enable_logging : typing.Optional[OptimizeStreamingLatency]
-            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+        enable_logging : typing.Optional[bool]
+            When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
 
-        optimize_streaming_latency : typing.Optional[OutputFormat]
-            The output format of the generated audio.
+        optimize_streaming_latency : typing.Optional[int]
+            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+            0 - default mode (no latency optimizations)
+            1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+            2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+            3 - max latency optimizations
+            4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+            Defaults to None.
 
-        output_format : typing.Optional[str]
-            Output format of the generated audio. Must be one of:
-            mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
-            mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
-            mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
-            mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
-            mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
-            mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
-            pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
-            pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
-            pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
-            pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Pro tier or above.
-            ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
+        output_format : typing.Optional[OutputFormat]
+            The output format of the generated audio.
 
         model_id : typing.Optional[str]
             Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for speech to speech, you can check this using the can_do_voice_conversion property.
@@ -236,7 +238,7 @@ async def convert(
         *,
         audio: core.File,
         enable_logging: typing.Optional[bool] = None,
-        optimize_streaming_latency: typing.Optional[OptimizeStreamingLatency] = None,
+        optimize_streaming_latency: typing.Optional[int] = None,
         output_format: typing.Optional[OutputFormat] = None,
         model_id: typing.Optional[str] = OMIT,
         voice_settings: typing.Optional[str] = OMIT,
@@ -258,8 +260,15 @@ async def convert(
         enable_logging : typing.Optional[bool]
             When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
 
-        optimize_streaming_latency : typing.Optional[OptimizeStreamingLatency]
-            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+        optimize_streaming_latency : typing.Optional[int]
+            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+            0 - default mode (no latency optimizations)
+            1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+            2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+            3 - max latency optimizations
+            4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+            Defaults to None.
 
         output_format : typing.Optional[OutputFormat]
             The output format of the generated audio.
@@ -331,9 +340,9 @@ async def convert_as_stream(
         voice_id: str,
         *,
         audio: core.File,
-        enable_logging: typing.Optional[OptimizeStreamingLatency] = None,
-        optimize_streaming_latency: typing.Optional[OutputFormat] = None,
-        output_format: typing.Optional[str] = None,
+        enable_logging: typing.Optional[bool] = None,
+        optimize_streaming_latency: typing.Optional[int] = None,
+        output_format: typing.Optional[OutputFormat] = None,
         model_id: typing.Optional[str] = OMIT,
         voice_settings: typing.Optional[str] = OMIT,
         seed: typing.Optional[int] = OMIT,
@@ -351,25 +360,21 @@ async def convert_as_stream(
         audio : core.File
             See core.File for more documentation
 
-        enable_logging : typing.Optional[OptimizeStreamingLatency]
-            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model.
+        enable_logging : typing.Optional[bool]
+            When enable_logging is set to false full privacy mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Full privacy mode may only be used by enterprise customers.
 
-        optimize_streaming_latency : typing.Optional[OutputFormat]
-            The output format of the generated audio.
+        optimize_streaming_latency : typing.Optional[int]
+            You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values:
+            0 - default mode (no latency optimizations)
+            1 - normal latency optimizations (about 50% of possible latency improvement of option 3)
+            2 - strong latency optimizations (about 75% of possible latency improvement of option 3)
+            3 - max latency optimizations
+            4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
+
+            Defaults to None.
 
-        output_format : typing.Optional[str]
-            Output format of the generated audio. Must be one of:
-            mp3_22050_32 - output format, mp3 with 22.05kHz sample rate at 32kbps.
-            mp3_44100_32 - output format, mp3 with 44.1kHz sample rate at 32kbps.
-            mp3_44100_64 - output format, mp3 with 44.1kHz sample rate at 64kbps.
-            mp3_44100_96 - output format, mp3 with 44.1kHz sample rate at 96kbps.
-            mp3_44100_128 - default output format, mp3 with 44.1kHz sample rate at 128kbps.
-            mp3_44100_192 - output format, mp3 with 44.1kHz sample rate at 192kbps. Requires you to be subscribed to Creator tier or above.
-            pcm_16000 - PCM format (S16LE) with 16kHz sample rate.
-            pcm_22050 - PCM format (S16LE) with 22.05kHz sample rate.
-            pcm_24000 - PCM format (S16LE) with 24kHz sample rate.
-            pcm_44100 - PCM format (S16LE) with 44.1kHz sample rate. Requires you to be subscribed to Pro tier or above.
-            ulaw_8000 - μ-law format (sometimes written mu-law, often approximated as u-law) with 8kHz sample rate. Note that this format is commonly used for Twilio audio inputs.
+        output_format : typing.Optional[OutputFormat]
+            The output format of the generated audio.
 
         model_id : typing.Optional[str]
             Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for speech to speech, you can check this using the can_do_voice_conversion property.