forked from mastra-ai/mastra
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request mastra-ai#1338 from mastra-ai/audio
TTS primitives in Mastra
- Loading branch information
Showing
28 changed files
with
988 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
'@mastra/core': patch | ||
'@mastra/tts': patch | ||
'docs': patch | ||
--- | ||
|
||
TTS module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
const meta = { | ||
"providers-and-models": "Providers and Models", | ||
generate: "generate", | ||
stream: "stream", | ||
}; | ||
|
||
export default meta; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
const meta = { | ||
"providers-and-models": "Providers and Models", | ||
generate: "generate", | ||
stream: "stream", | ||
}; | ||
|
||
export default meta; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# `generate()` | ||
|
||
The `generate()` method is used to interact with the TTS model to produce an audio response. This method accepts `text` and `voice` as parameters. | ||
|
||
## Parameters | ||
|
||
|
||
<PropertiesTable | ||
content={[ | ||
{ | ||
name: 'text', | ||
type: 'string', | ||
description: 'The messages to be processed by TTS.', | ||
}, | ||
{ | ||
name: 'voice', | ||
type: 'string', | ||
description: 'Voice ID to be used with generation.', | ||
}, | ||
]} | ||
/> | ||
|
||
## Returns | ||
|
||
<PropertiesTable | ||
content={[ | ||
{ | ||
name: 'audioResult', | ||
type: 'Readable', | ||
isOptional: true, | ||
description: 'The generated audio stream', | ||
}, | ||
]} | ||
/> | ||
|
||
## Examples | ||
|
||
### Basic Audio Generation (ElevenLabs) | ||
|
||
```typescript | ||
import { ElevenLabsTTS } from '@mastra/tts' | ||
|
||
const tts = new ElevenLabsTTS({ | ||
model: { | ||
name: 'eleven_multilingual_v2', | ||
apiKey: process.env.ELEVENLABS_API_KEY!, | ||
}, | ||
}); | ||
|
||
const voices = await tts.voices(); | ||
const voiceId = voices?.[0]?.voice_id!; | ||
|
||
const { audioResult } = await tts.generate({ text: "What is AI?", voice: voiceId }); | ||
|
||
await writeFile(path.join(process.cwd(), '/test-outputs/generate-output.mp3'), audioBuffer); | ||
``` | ||
|
||
### Basic Audio Generation (OpenAI) | ||
|
||
```typescript | ||
import { OpenAITTS } from '@mastra/tts' | ||
|
||
const tts = new OpenAITTS({ | ||
model: { | ||
name: 'tts-1', | ||
apiKey: process.env.OPENAI_API_KEY!, | ||
}, | ||
}); | ||
|
||
const voices = await tts.voices(); | ||
const voiceId = voices?.[0]?.voice_id!; | ||
|
||
const { audioResult } = await tts.generate({ text: "What is AI?", voice: voiceId }); | ||
|
||
const outputPath = path.join(process.cwd(), 'test-outputs/open-aigenerate-test.mp3'); | ||
writeFileSync(outputPath, audioResult); | ||
``` | ||
|
||
## Related Methods | ||
|
||
For streaming audio responses, see the [`stream()`](./stream.mdx) method documentation. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Providers and Models | ||
|
||
## Most popular providers | ||
|
||
| Provider | Supported Models | | ||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||
| ElevenLabs | `eleven_multilingual_v2`, `eleven_flash_v2_5`, `eleven_flash_v2`, `eleven_multilingual_sts_v2`, `eleven_english_sts_v2` | | ||
| OpenAI | `tts-1`, `tts-1-hd` | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
# `stream()` | ||
|
||
The `stream()` method is used to interact with the TTS model to produce an audio response stream. This method accepts `text` and `voice` as parameters. | ||
|
||
## Parameters | ||
|
||
|
||
<PropertiesTable | ||
content={[ | ||
{ | ||
name: 'text', | ||
type: 'string', | ||
description: 'The messages to be processed by TTS.', | ||
}, | ||
{ | ||
name: 'voice', | ||
type: 'string', | ||
description: 'Voice ID to be used with generation.', | ||
}, | ||
]} | ||
/> | ||
|
||
## Returns | ||
|
||
<PropertiesTable | ||
content={[ | ||
{ | ||
name: 'audioResult', | ||
type: 'Readable', | ||
isOptional: true, | ||
description: 'The generated audio stream', | ||
}, | ||
]} | ||
/> | ||
|
||
## Examples | ||
|
||
### Basic Audio Generation (ElevenLabs) | ||
|
||
```typescript | ||
import { ElevenLabsTTS } from '@mastra/tts' | ||
|
||
const tts = new ElevenLabsTTS({ | ||
model: { | ||
name: 'eleven_multilingual_v2', | ||
apiKey: process.env.ELEVENLABS_API_KEY!, | ||
}, | ||
}); | ||
|
||
const voices = await tts.voices(); | ||
const voiceId = voices?.[0]?.voice_id!; | ||
|
||
const { audioResult } = await tts.stream({ text: "What is AI?", voice: voiceId }); | ||
|
||
// Create a write stream to simulate real-time playback | ||
const outputPath = path.join(process.cwd(), '/test-outputs/streaming-output.mp3'); | ||
const writeStream = createWriteStream(outputPath); | ||
|
||
let firstChunkTime: number | null = null; | ||
let lastChunkTime: number | null = null; | ||
let totalChunks = 0; | ||
|
||
// Process chunks as they arrive | ||
for await (const chunk of audioResult) { | ||
if (!firstChunkTime) { | ||
firstChunkTime = Date.now(); | ||
} | ||
lastChunkTime = Date.now(); | ||
totalChunks++; | ||
|
||
// Write chunk immediately as it arrives | ||
writeStream.write(chunk); | ||
|
||
// Log timing of chunk arrival | ||
console.log(`Received chunk ${totalChunks} at ${lastChunkTime - firstChunkTime!}ms`); | ||
} | ||
|
||
writeStream.end() | ||
``` | ||
|
||
### Basic Audio Stream (ElevenLabs) | ||
|
||
```typescript | ||
import { ElevenLabsTTS } from '@mastra/tts' | ||
|
||
const tts = new ElevenLabsTTS({ | ||
model: { | ||
name: 'eleven_multilingual_v2', | ||
apiKey: process.env.ELEVENLABS_API_KEY!, | ||
}, | ||
}); | ||
|
||
const voices = await tts.voices(); | ||
const voiceId = voices?.[0]?.voice_id!; | ||
|
||
const { audioResult } = await tts.stream({ text: "What is AI?", voice: voiceId }); | ||
|
||
// Create a write stream to simulate real-time playback | ||
const outputPath = path.join(process.cwd(), '/test-outputs/streaming-output.mp3'); | ||
const writeStream = createWriteStream(outputPath); | ||
|
||
let firstChunkTime: number | null = null; | ||
let lastChunkTime: number | null = null; | ||
let totalChunks = 0; | ||
|
||
// Process chunks as they arrive | ||
for await (const chunk of audioResult) { | ||
if (!firstChunkTime) { | ||
firstChunkTime = Date.now(); | ||
} | ||
lastChunkTime = Date.now(); | ||
totalChunks++; | ||
|
||
// Write chunk immediately as it arrives | ||
writeStream.write(chunk); | ||
|
||
// Log timing of chunk arrival | ||
console.log(`Received chunk ${totalChunks} at ${lastChunkTime - firstChunkTime!}ms`); | ||
} | ||
|
||
writeStream.end() | ||
``` | ||
|
||
### Basic Audio Stream (OpenAI) | ||
|
||
```typescript | ||
import { OpenAITTS } from '@mastra/tts' | ||
|
||
const tts = new OpenAITTS({ | ||
model: { | ||
name: 'tts-1', | ||
apiKey: process.env.OPENAI_API_KEY!, | ||
}, | ||
}); | ||
|
||
const voices = await tts.voices(); | ||
const voiceId = voices?.[0]?.voice_id!; | ||
|
||
const { audioResult } = await tts.stream({ text: "What is AI?", voice: voiceId }); | ||
|
||
// Create a write stream to simulate real-time playback | ||
const outputPath = path.join(process.cwd(), '/test-outputs/streaming-output.mp3'); | ||
const writeStream = createWriteStream(outputPath); | ||
|
||
let firstChunkTime: number | null = null; | ||
let lastChunkTime: number | null = null; | ||
let totalChunks = 0; | ||
|
||
// Process chunks as they arrive | ||
for await (const chunk of audioResult) { | ||
if (!firstChunkTime) { | ||
firstChunkTime = Date.now(); | ||
} | ||
lastChunkTime = Date.now(); | ||
totalChunks++; | ||
|
||
// Write chunk immediately as it arrives | ||
writeStream.write(chunk); | ||
|
||
// Log timing of chunk arrival | ||
console.log(`Received chunk ${totalChunks} at ${lastChunkTime - firstChunkTime!}ms`); | ||
} | ||
|
||
writeStream.end() | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import { MastraBase } from '../base'; | ||
import { InstrumentClass } from '../telemetry'; | ||
|
||
interface BuiltInModelConfig { | ||
provider: string; | ||
name: string; | ||
apiKey?: string; | ||
} | ||
|
||
@InstrumentClass({ | ||
prefix: 'tts', | ||
excludeMethods: ['__setTools', '__setLogger', '__setTelemetry', '#log'], | ||
}) | ||
export abstract class MastraTTS extends MastraBase { | ||
model: BuiltInModelConfig; | ||
constructor({ model }: { model: BuiltInModelConfig }) { | ||
super({ | ||
component: 'TTS', | ||
}); | ||
this.model = model; | ||
} | ||
|
||
traced<T extends Function>(method: T, methodName: string): T { | ||
return ( | ||
this.telemetry?.traceMethod(method, { | ||
spanName: `${this.model.name}-tts.${methodName}`, | ||
attributes: { | ||
'tts.type': `${this.model.name}`, | ||
}, | ||
}) ?? method | ||
); | ||
} | ||
|
||
abstract generate({ text }: { text: string }): Promise<any>; | ||
abstract stream({ text }: { text: string }): Promise<any>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import { config } from 'dotenv'; | ||
|
||
config(); | ||
|
||
export default { | ||
preset: 'ts-jest', | ||
extensionsToTreatAsEsm: ['.ts'], | ||
moduleNameMapper: { | ||
'^(\\.{1,2}/.*)\\.js$': '$1', | ||
}, | ||
transform: { | ||
'^.+\\.tsx?$': [ | ||
'ts-jest', | ||
{ | ||
useESM: true, | ||
}, | ||
], | ||
}, | ||
}; |
Oops, something went wrong.