Go SDK for the ElevenLabs API.
- 🗣️ Text-to-Speech: Convert text to realistic speech with multiple voices and models
- 📝 Speech-to-Text: Transcribe audio with speaker diarization support
- 🎙️ Speech-to-Speech: Voice conversion - transform speech to a different voice
- 🔊 Sound Effects: Generate sound effects from text descriptions
- 🎨 Voice Design: Create custom AI voices with specific characteristics
- 🎵 Music Composition: Generate music from text prompts
- 🎙️ Audio Isolation: Extract vocals/speech from audio
- ⏱️ Forced Alignment: Get word-level timestamps for audio
- đź’¬ Text-to-Dialogue: Generate multi-speaker conversations
- 🌍 Dubbing: Translate and dub video/audio content
- 📚 Projects: Manage long-form audio content (audiobooks, podcasts)
- đź“– Pronunciation Dictionaries: Control pronunciation of specific terms
- ⚡ WebSocket TTS: Low-latency text-to-speech streaming for real-time voice synthesis
- ⚡ WebSocket STT: Real-time speech-to-text with partial results
- 📞 Twilio Integration: Phone call integration for conversational AI agents
- 📱 Phone Numbers: Manage phone numbers for voice agents
go get github.com/agentplexus/go-elevenlabspackage main
import (
"context"
"io"
"log"
"os"
elevenlabs "github.com/agentplexus/go-elevenlabs"
)
func main() {
// Create client (uses ELEVENLABS_API_KEY env var)
client, err := elevenlabs.NewClient()
if err != nil {
log.Fatal(err)
}
ctx := context.Background()
// List available voices
voices, err := client.Voices().List(ctx)
if err != nil {
log.Fatal(err)
}
log.Printf("Found %d voices", len(voices))
// Generate speech
if len(voices) > 0 {
audio, err := client.TextToSpeech().Simple(ctx,
voices[0].VoiceID,
"Hello from the ElevenLabs Go SDK!")
if err != nil {
log.Fatal(err)
}
// Save to file
f, _ := os.Create("hello.mp3")
defer f.Close()
io.Copy(f, audio)
}
}client, err := elevenlabs.NewClient(
elevenlabs.WithAPIKey("your-api-key"),
elevenlabs.WithTimeout(5 * time.Minute),
)// Simple generation
audio, err := client.TextToSpeech().Simple(ctx, voiceID, "Hello world")
// With full options
resp, err := client.TextToSpeech().Generate(ctx, &elevenlabs.TTSRequest{
VoiceID: "21m00Tcm4TlvDq8ikWAM",
Text: "Hello with custom settings!",
ModelID: "eleven_multilingual_v2",
VoiceSettings: &elevenlabs.VoiceSettings{
Stability: 0.6,
SimilarityBoost: 0.8,
Style: 0.1,
SpeakerBoost: true,
},
OutputFormat: "mp3_44100_192",
})// Transcribe from URL
result, err := client.SpeechToText().TranscribeURL(ctx, "https://example.com/audio.mp3")
fmt.Printf("Text: %s\n", result.Text)
fmt.Printf("Language: %s\n", result.LanguageCode)
// With speaker diarization
result, err := client.SpeechToText().TranscribeWithDiarization(ctx, audioURL)
for _, word := range result.Words {
fmt.Printf("[%s] %s (%.2fs - %.2fs)\n", word.Speaker, word.Text, word.Start, word.End)
}// Simple sound effect
audio, err := client.SoundEffects().Simple(ctx, "thunder and rain storm")
// With options
sfx, err := client.SoundEffects().Generate(ctx, &elevenlabs.SoundEffectRequest{
Text: "spaceship engine humming",
DurationSeconds: 10,
PromptInfluence: 0.5,
})// Generate music from prompt
resp, err := client.Music().Generate(ctx, &elevenlabs.MusicRequest{
Prompt: "upbeat electronic music for a tech video",
DurationMs: 30000,
})
// Instrumental only
audio, err := client.Music().GenerateInstrumental(ctx, "calm piano melody", 60000)
// Generate with composition plan for fine-grained control
plan, _ := client.Music().GeneratePlan(ctx, &elevenlabs.CompositionPlanRequest{
Prompt: "pop song about summer",
DurationMs: 180000,
})
resp, err := client.Music().GenerateDetailed(ctx, &elevenlabs.MusicDetailedRequest{
CompositionPlan: plan,
})
// Separate stems (vocals, drums, bass, etc.)
f, _ := os.Open("song.mp3")
stems, err := client.Music().SeparateStems(ctx, &elevenlabs.StemSeparationRequest{
File: f,
Filename: "song.mp3",
})// Extract vocals from audio file
f, _ := os.Open("mixed_audio.mp3")
isolated, err := client.AudioIsolation().IsolateFile(ctx, f, "mixed_audio.mp3")// Get word-level timestamps
f, _ := os.Open("speech.mp3")
result, err := client.ForcedAlignment().AlignFile(ctx, f, "speech.mp3",
"The text that was spoken in the audio")
for _, word := range result.Words {
fmt.Printf("%s: %.2fs - %.2fs\n", word.Text, word.Start, word.End)
}// Generate multi-speaker dialogue
audio, err := client.TextToDialogue().Simple(ctx, []elevenlabs.DialogueInput{
{Text: "Hello, how are you?", VoiceID: "voice1"},
{Text: "I'm doing great, thanks!", VoiceID: "voice2"},
})// Generate a custom voice
resp, err := client.VoiceDesign().GeneratePreview(ctx, &elevenlabs.VoiceDesignRequest{
Gender: elevenlabs.VoiceGenderFemale,
Age: elevenlabs.VoiceAgeYoung,
Accent: elevenlabs.VoiceAccentAmerican,
AccentStrength: 1.0,
Text: "This is a preview of the generated voice. It should be at least one hundred characters long for best results.",
})// Create from a map
dict, err := client.Pronunciation().CreateFromMap(ctx, "Tech Terms", map[string]string{
"API": "A P I",
"kubectl": "kube control",
"nginx": "engine X",
})
// Create from JSON file
dict, err := client.Pronunciation().CreateFromJSON(ctx, "Terms", "pronunciation.json")// Create dubbing job
dub, err := client.Dubbing().Create(ctx, &elevenlabs.DubbingRequest{
SourceURL: "https://example.com/video.mp4",
TargetLanguage: "es",
Name: "Video - Spanish",
})
// Check status
status, err := client.Dubbing().GetStatus(ctx, dub.DubbingID)// Create a project for long-form content
project, err := client.Projects().Create(ctx, &elevenlabs.CreateProjectRequest{
Name: "My Audiobook",
DefaultModelID: "eleven_multilingual_v2",
DefaultParagraphVoiceID: voiceID,
})
// Convert to audio
err = client.Projects().Convert(ctx, project.ProjectID)// Convert speech from one voice to another
f, _ := os.Open("input.mp3")
resp, err := client.SpeechToSpeech().Convert(ctx, &elevenlabs.SpeechToSpeechRequest{
VoiceID: targetVoiceID,
Audio: f,
})
// Simple conversion
output, err := client.SpeechToSpeech().Simple(ctx, targetVoiceID, audioReader)// Connect for low-latency TTS (ideal for LLM output)
conn, err := client.WebSocketTTS().Connect(ctx, voiceID, &elevenlabs.WebSocketTTSOptions{
ModelID: "eleven_turbo_v2_5",
OutputFormat: "pcm_16000",
OptimizeStreamingLatency: 3,
})
defer conn.Close()
// Stream text as it arrives (e.g., from LLM)
for text := range llmOutputStream {
conn.SendText(text)
}
conn.Flush()
// Receive audio chunks
for audio := range conn.Audio() {
// Play or save audio chunks
}// Connect for live transcription
conn, err := client.WebSocketSTT().Connect(ctx, &elevenlabs.WebSocketSTTOptions{
SampleRate: 16000,
EnablePartials: true,
})
defer conn.Close()
// Send audio chunks
go func() {
for audioChunk := range microphoneInput {
conn.SendAudio(audioChunk)
}
conn.EndStream()
}()
// Receive transcripts
for transcript := range conn.Transcripts() {
if transcript.IsFinal {
fmt.Println("Final:", transcript.Text)
} else {
fmt.Println("Partial:", transcript.Text)
}
}// Register incoming Twilio call with an ElevenLabs agent
resp, err := client.Twilio().RegisterCall(ctx, &elevenlabs.TwilioRegisterCallRequest{
AgentID: "your-agent-id",
})
// Return resp.TwiML to Twilio webhook
// Make outbound call
call, err := client.Twilio().OutboundCall(ctx, &elevenlabs.TwilioOutboundCallRequest{
AgentID: "your-agent-id",
AgentPhoneNumberID: "phone-number-id",
ToNumber: "+1234567890",
})
// List phone numbers
numbers, err := client.PhoneNumbers().List(ctx)See the examples/ directory for runnable examples:
| Example | Description |
|---|---|
basic/ |
Common SDK operations |
websocket-tts/ |
Real-time TTS streaming for LLM integration |
websocket-stt/ |
Live transcription with partial results |
speech-to-speech/ |
Voice conversion |
twilio/ |
Phone call integration with Twilio |
ttsscript/ |
Multi-voice script authoring |
retryhttp/ |
Retry-capable HTTP transport |
export ELEVENLABS_API_KEY="your-api-key"
go run examples/basic/main.goaudio, err := client.TextToSpeech().Simple(ctx, voiceID, text)
if err != nil {
if elevenlabs.IsRateLimitError(err) {
log.Println("Rate limited, waiting...")
time.Sleep(time.Minute)
} else if elevenlabs.IsUnauthorizedError(err) {
log.Fatal("Invalid API key")
} else if elevenlabs.IsNotFoundError(err) {
log.Fatal("Voice not found")
} else {
log.Fatalf("Error: %v", err)
}
}ELEVENLABS_API_KEY: Your ElevenLabs API key (used automatically if not provided viaWithAPIKey)
Contributions are welcome! Please feel free to submit a Pull Request.
MIT License