-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
93 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Import necessary libraries | ||
import requests # Used for making HTTP requests | ||
import os # Used for working with JSON data | ||
|
||
# Define constants for the script | ||
CHUNK_SIZE = 1024 # Size of chunks to read/write at a time | ||
XI_API_KEY = os.getenv("ELEVEN_API_KEY") # Your API key for authentication | ||
VOICE_ID = "GBv7mTt0atIp3Br8iCZE" # ID of the voice model to use | ||
OUTPUT_PATH = "output.wav" # Path to save the output audio file | ||
|
||
# Construct the URL for the Text-to-Speech API request | ||
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream" | ||
|
||
# Set up headers for the API request, including the API key for authentication | ||
headers = { | ||
"Accept": "application/json", | ||
"xi-api-key": XI_API_KEY | ||
} | ||
def generate_audio(text): | ||
# Set up the data payload for the API request, including the text and voice settings | ||
data = { | ||
"text": text, | ||
"model_id": "eleven_multilingual_v2", | ||
"voice_settings": { | ||
"stability": 1, | ||
"similarity_boost": 0.8, | ||
"style": 0.0, | ||
"use_speaker_boost": True | ||
} | ||
} | ||
|
||
# Make the POST request to the TTS API with headers and data, enabling streaming response | ||
response = requests.post(tts_url, headers=headers, json=data, stream=True) | ||
|
||
# Check if the request was successful | ||
if response.ok: | ||
# Open the output file in write-binary mode | ||
with open(OUTPUT_PATH, "wb") as f: | ||
# Read the response in chunks and write to the file | ||
for chunk in response.iter_content(chunk_size=CHUNK_SIZE): | ||
f.write(chunk) | ||
# Inform the user of success | ||
print("Audio stream saved successfully.") | ||
else: | ||
# Print the error message if the request was not successful | ||
print(response.text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# The 'requests' and 'json' libraries are imported. | ||
# 'requests' is used to send HTTP requests, while 'json' is used for parsing the JSON data that we receive from the API. | ||
import requests | ||
import json,os | ||
|
||
# An API key is defined here. You'd normally get this from the service you're accessing. It's a form of authentication. | ||
XI_API_KEY = "<xi-api-key>" | ||
|
||
# This is the URL for the API endpoint we'll be making a GET request to. | ||
url = "https://api.elevenlabs.io/v1/voices" | ||
|
||
# Here, headers for the HTTP request are being set up. | ||
# Headers provide metadata about the request. In this case, we're specifying the content type and including our API key for authentication. | ||
headers = { | ||
"Accept": "application/json", | ||
"xi-api-key": os.getenv("ELEVEN_API_KEY"), | ||
"Content-Type": "application/json" | ||
} | ||
|
||
# A GET request is sent to the API endpoint. The URL and the headers are passed into the request. | ||
response = requests.get(url, headers=headers) | ||
|
||
# The JSON response from the API is parsed using the built-in .json() method from the 'requests' library. | ||
# This transforms the JSON data into a Python dictionary for further processing. | ||
data = response.json() | ||
|
||
# A loop is created to iterate over each 'voice' in the 'voices' list from the parsed data. | ||
# The 'voices' list consists of dictionaries, each representing a unique voice provided by the API. | ||
for voice in data['voices']: | ||
# For each 'voice', the 'name' and 'voice_id' are printed out. | ||
# These keys in the voice dictionary contain values that provide information about the specific voice. | ||
print(f"{voice['name']}; {voice['voice_id']}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import streamlit as st | ||
from streamlit_mic_recorder import speech_to_text | ||
from langchain_openai import ChatOpenAI | ||
from audio_gen import generate_audio | ||
|
||
llm=ChatOpenAI(model="gpt-4o") | ||
st.title("VOICE ENABLED CHAT APP") | ||
st.write("ask anything") | ||
text=speech_to_text(language="en",use_container_width=True,just_once=True,key="STT") | ||
if text: | ||
response=llm.invoke(text) | ||
content=response.content | ||
st.write(content) | ||
generate_audio(content) | ||
st.audio(data="output.wav",format="audio/wav",autoplay=True) |
Binary file not shown.
Binary file not shown.