You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi!
Me and a friend of mine are trying to code a sort of "Voice AI LLM" in discord, we need to read the input, store it to a file and after it has correctly been stored (finished appending data) feed it into the AI, however i'm having trouble detecting when the listening ends. any help?
`def callback(user, data: voice_recv.VoiceData):
global history_audio
audio = data.pcm
history_audio += audio
if len(history_audio) > CHUNK_SIZE:
tmp_audio = history_audio
history_audio = b''
audio_data = np.frombuffer(tmp_audio, dtype=np.int32, offset=0) # Convert to numpy array
sample_rate = 48000
# Check if the file exists
file_name = 'voice/recoding_2.wav'
if not os.path.exists('voice'):
os.makedirs('voice')
if os.path.exists(file_name):
# Append to the existing file
with wave.open(file_name, 'rb') as wav_file:
params = wav_file.getparams() # Get the existing file parameters (e.g., channels, sample width, etc.)
existing_frames = wav_file.readframes(wav_file.getnframes()) # Read the existing audio data
# Convert existing frames to numpy array and append the new audio data
existing_audio_data = np.frombuffer(existing_frames, dtype=np.int32)
appended_audio_data = np.concatenate((existing_audio_data, audio_data))
# Write the appended data back to the file
with wave.open(file_name, 'wb') as wav_file:
wav_file.setparams(params)
wav_file.writeframes(appended_audio_data.tobytes())
else:
# Create a new file and write the audio data
with wave.open(file_name, 'wb') as wav_file:
wav_file.setnchannels(1) # Assuming mono audio
wav_file.setsampwidth(4) # Assuming 32-bit (4 bytes) samples
wav_file.setframerate(sample_rate)
wav_file.writeframes(audio_data.tobytes())
## voice power level, how loud the user is speaking
# ext_data = packet.extension_data.get(voice_recv.ExtensionID.audio_power)
# value = int.from_bytes(ext_data, 'big')
# power = 127-(value & 127)
# print('#' * int(power * (79/128)))
## instead of 79 you can use shutil.get_terminal_size().columns-1
vc = await ctx.author.voice.channel.connect(cls=voice_recv.VoiceRecvClient)
try:
# Start listening to the audio
vc.listen(voice_recv.BasicSink(callback))
# Loop until the voice client stops listening
finally:
print("SEND TO AI")`
ps: Sorry for the bad english (currently 38 hours awake and it's not my first language)
The text was updated successfully, but these errors were encountered:
Update:
def on_voice_member_speaking_start(member: discord.Member)
def on_voice_member_speaking_stop(member: discord.Member)
found these two on the readme but i'm not getting how to implement 'em
Hi!
Me and a friend of mine are trying to code a sort of "Voice AI LLM" in discord, we need to read the input, store it to a file and after it has correctly been stored (finished appending data) feed it into the AI, however i'm having trouble detecting when the listening ends. any help?
`def callback(user, data: voice_recv.VoiceData):
global history_audio
audio = data.pcm
history_audio += audio
The text was updated successfully, but these errors were encountered: