Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TTS FastAPI Endpoint and run-api.sh Script #950

Closed
wants to merge 19 commits into from
93 changes: 93 additions & 0 deletions api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel
import os
from io import BytesIO
from core import run_tts_script
from tabs.inference.inference import (
extract_model_and_epoch,
names,
match_index,
get_speakers_id,
)

# Initialize FastAPI app
app = FastAPI()
print(names)
# Default values
default_voice_name = "ur-PK-UzmaNeural"
default_model_file = sorted(names, key=lambda x: extract_model_and_epoch(x))[1]
print(f"Using default model: {default_model_file}")
default_index_file = match_index(default_model_file)
default_sid = get_speakers_id(default_model_file)[0] if get_speakers_id(default_model_file) else 0


# Input model
class TTSRequest(BaseModel):
tts_text: str # Only text is required


def gen_random_string(length=5):
import random
import string
return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))

@app.post("/tts")
async def tts_endpoint(request: TTSRequest):
try:
ran_file_name = gen_random_string(5)
output_tts_path = os.path.join(os.getcwd(), "assets", "audios", f"{ran_file_name}_tts_output.wav")
output_rvc_path = os.path.join(os.getcwd(), "assets", "audios", f"{ran_file_name}_tts_rvc_output.wav")

# Run the TTS script with default parameters
_, audio_file_path = run_tts_script(
tts_file=None,
tts_text=request.tts_text,
tts_voice=default_voice_name,
tts_rate=1, # Default TTS speed
pitch=5, # Default pitch
filter_radius=5,
index_rate=0.65,
volume_envelope=1,
protect=0.5,
hop_length=128,
f0_method="rmvpe",
output_tts_path=output_tts_path,
output_rvc_path=output_rvc_path,
pth_path=default_model_file,
index_path=default_index_file,
split_audio=False,
f0_autotune=False,
f0_autotune_strength=1.0,
clean_audio=True,
clean_strength=0.5,
export_format="WAV",
f0_file=None,
embedder_model="contentvec",
embedder_model_custom=None,
sid=default_sid,
)

# Check if the audio file exists
if not os.path.exists(audio_file_path):
raise HTTPException(
status_code=500, detail="Audio file was not generated successfully."
)

# Read the audio file as bytes
with open(audio_file_path, "rb") as audio_file:
audio_bytes = audio_file.read()

# Cleanup: Delete the generated files after reading
try:
os.remove(audio_file_path)
os.remove(output_rvc_path) # Ensure to delete both files if applicable
except Exception as cleanup_error:
# Log or handle cleanup errors if necessary
print(f"Error during cleanup: {cleanup_error}")

# Return audio bytes
return Response(content=audio_bytes, media_type="audio/wav")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error during TTS conversion: {str(e)}")

1 change: 1 addition & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def launch_gradio(port):
share="--share" in sys.argv,
inbrowser="--open" in sys.argv,
server_port=port,
server_name="0.0.0.0"
)


Expand Down
61 changes: 61 additions & 0 deletions client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import requests
import sounddevice as sd
import wave
from io import BytesIO
import numpy as np

def tts_client(api_url: str, text: str):
"""
Sends text to the TTS API, receives audio, and plays it.

Args:
api_url (str): The URL of the TTS API endpoint.
text (str): The text to synthesize into speech.
"""
try:
# Send text to the API
response = requests.post(api_url, json={"tts_text": text})
response.raise_for_status() # Raise exception for HTTP errors

# Convert response bytes to a WAV file object
audio_bytes = BytesIO(response.content)
with wave.open(audio_bytes, "rb") as wav_file:
# Extract audio parameters
sample_rate = wav_file.getframerate()
n_channels = wav_file.getnchannels()
audio_data = wav_file.readframes(wav_file.getnframes())

# Convert audio data to NumPy array for playback
audio_array = np.frombuffer(audio_data, dtype=np.int16)

# Play audio using sounddevice
print("Playing audio...")
sd.play(audio_array, samplerate=sample_rate)
sd.wait() # Wait until playback is finished
print("Audio playback finished.")

except requests.exceptions.RequestException as e:
print(f"Error communicating with the TTS API: {e}")
except Exception as e:
print(f"An error occurred: {e}")

if __name__ == "__main__":
# Define the API URL
api_url = "http://5.9.81.185:9033/tts"

# Input text for TTS
text = """
ایک دور دراز گاؤں میں ایک ننھا شہزادہ رہتا تھا جس کا نام ارحم تھا۔ ارحم نہایت ذہین، بہادر اور دل کا نرم تھا۔ گاؤں کے لوگ اس سے بہت محبت کرتے تھے کیونکہ وہ ہمیشہ دوسروں کی مدد کے لئے تیار رہتا تھا۔

ایک دن گاؤں کے قریب ایک خوفناک دیو آگیا جو گاؤں کے کھیتوں اور گھروں کو تباہ کر رہا تھا۔ گاؤں کے لوگ خوف زدہ تھے اور کوئی بھی دیو کا سامنا کرنے کو تیار نہ تھا۔ بزرگوں نے کہا کہ صرف کسی بہادر شخص کی قربانی ہی دیو کو روک سکتی ہے۔

ارحم نے یہ سنا تو اس کے دل میں گاؤں کو بچانے کا عزم پیدا ہوا۔ وہ اپنے والدین کے پاس گیا اور ان سے کہا، "مجھے دیو کا سامنا کرنا ہے تاکہ گاؤں محفوظ ہو جائے۔" والدین کے آنکھوں میں آنسو تھے لیکن انہوں نے اپنے بیٹے کے عزم کو سلام کیا۔

ارحم نے اپنی تلوار اٹھائی اور دیو کے غار کی طرف روانہ ہو گیا۔ راستے میں اس نے خوب دعائیں کیں اور اپنے اللہ پر بھروسہ رکھا۔ جب وہ دیو کے سامنے پہنچا تو دیو ہنسا اور کہا، "ایک ننھا لڑکا میری طاقت کا مقابلہ کرے گا؟"

لیکن ارحم نے ہمت نہیں ہاری۔ اس نے اپنے دل کی طاقت سے دیو کا مقابلہ کیا۔ آخر کار دیو کو شکست ہوئی اور وہ غائب ہو گیا۔ ارحم نے گاؤں کو بچا لیا۔

جب وہ واپس آیا تو گاؤں کے لوگ خوشی سے جھوم اٹھے۔ ارحم کی بہادری کی کہانی ہر طرف مشہور ہو گئی اور وہ گاؤں کے ہیرو بن گیا۔
"""
# Call the client function
tts_client(api_url, text)
44 changes: 20 additions & 24 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,45 +1,41 @@
# Core dependencies
pip>=23.3; sys_platform == 'darwin'
wheel; sys_platform == 'darwin'
PyYAML; sys_platform == 'darwin'
numpy==1.23.5
requests>=2.31.0,<2.32.0
PyYAML
numpy
requests
tqdm
wget

# Audio processing
ffmpeg-python>=0.2.0
faiss-cpu==1.7.3
librosa==0.9.2
scipy==1.11.1
soundfile==0.12.1
ffmpeg-python
faiss-cpu
librosa
scipy
soundfile
noisereduce
pedalboard
stftpitchshift
soxr

# Machine learning and deep learning
omegaconf>=2.0.6; sys_platform == 'darwin'
numba; sys_platform == 'linux'
numba==0.57.0; sys_platform == 'darwin' or sys_platform == 'win32'
torch==2.3.1
torchaudio==2.3.1
torchvision==0.18.1
torchcrepe==0.0.23
omegaconf
numba
torch
torchaudio
torchvision
torchcrepe
torchfcpe
einops
libf0
transformers==4.44.2
transformers

# Visualization and UI
matplotlib==3.7.2
matplotlib
tensorboard
gradio==5.5.0
gradio

# Miscellaneous utilities
certifi>=2023.07.22; sys_platform == 'darwin'
antlr4-python3-runtime==4.8; sys_platform == 'darwin'
tensorboardX
edge-tts==6.1.9
certifi
antlr4-python3-runtime
edge-tts
pypresence
beautifulsoup4
9 changes: 9 additions & 0 deletions run-api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/sh
printf "\033]0;Applio\007"
. .venv/bin/activate

export PYTORCH_ENABLE_MPS_FALLBACK=1
export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0

clear
python api.py --open
8 changes: 8 additions & 0 deletions run-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/sh
printf "\033]0;Applio\007"
. .venv/bin/activate

export PYTORCH_ENABLE_MPS_FALLBACK=1
export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0

uvicorn api:app --host 0.0.0.0 --port 9033
13 changes: 0 additions & 13 deletions run-applio.bat

This file was deleted.

Empty file modified run-applio.sh
100644 → 100755
Empty file.
88 changes: 0 additions & 88 deletions run-install.bat

This file was deleted.

Empty file modified run-install.sh
100644 → 100755
Empty file.
13 changes: 0 additions & 13 deletions run-tensorboard.bat

This file was deleted.

Loading