ElevenLabs Go SDK

Features

🗣️ Text-to-Speech: Convert text to realistic speech with multiple voices and models
📝 Speech-to-Text: Transcribe audio with speaker diarization support
🎙️ Speech-to-Speech: Voice conversion - transform speech to a different voice
🔊 Sound Effects: Generate sound effects from text descriptions
🎨 Voice Design: Create custom AI voices with specific characteristics
🎵 Music Composition: Generate music from text prompts
🎙️ Audio Isolation: Extract vocals/speech from audio
⏱️ Forced Alignment: Get word-level timestamps for audio
💬 Text-to-Dialogue: Generate multi-speaker conversations
🌍 Dubbing: Translate and dub video/audio content
📚 Projects: Manage long-form audio content (audiobooks, podcasts)
📖 Pronunciation Dictionaries: Control pronunciation of specific terms

Real-Time Services

⚡ WebSocket TTS: Low-latency text-to-speech streaming for real-time voice synthesis
⚡ WebSocket STT: Real-time speech-to-text with partial results
📞 Twilio Integration: Phone call integration for conversational AI agents
📱 Phone Numbers: Manage phone numbers for voice agents

Installation

go get github.com/agentplexus/go-elevenlabs

Quick Start

Basic Text-to-Speech

package main

import (
    "context"
    "io"
    "log"
    "os"

    elevenlabs "github.com/agentplexus/go-elevenlabs"
)

func main() {
    // Create client (uses ELEVENLABS_API_KEY env var)
    client, err := elevenlabs.NewClient()
    if err != nil {
        log.Fatal(err)
    }

    ctx := context.Background()

    // List available voices
    voices, err := client.Voices().List(ctx)
    if err != nil {
        log.Fatal(err)
    }
    log.Printf("Found %d voices", len(voices))

    // Generate speech
    if len(voices) > 0 {
        audio, err := client.TextToSpeech().Simple(ctx,
            voices[0].VoiceID,
            "Hello from the ElevenLabs Go SDK!")
        if err != nil {
            log.Fatal(err)
        }

        // Save to file
        f, _ := os.Create("hello.mp3")
        defer f.Close()
        io.Copy(f, audio)
    }
}

With Custom Options

client, err := elevenlabs.NewClient(
    elevenlabs.WithAPIKey("your-api-key"),
    elevenlabs.WithTimeout(5 * time.Minute),
)

Services

Text-to-Speech

// Simple generation
audio, err := client.TextToSpeech().Simple(ctx, voiceID, "Hello world")

// With full options
resp, err := client.TextToSpeech().Generate(ctx, &elevenlabs.TTSRequest{
    VoiceID: "21m00Tcm4TlvDq8ikWAM",
    Text:    "Hello with custom settings!",
    ModelID: "eleven_multilingual_v2",
    VoiceSettings: &elevenlabs.VoiceSettings{
        Stability:       0.6,
        SimilarityBoost: 0.8,
        Style:           0.1,
        SpeakerBoost:    true,
    },
    OutputFormat: "mp3_44100_192",
})

Speech-to-Text

// Transcribe from URL
result, err := client.SpeechToText().TranscribeURL(ctx, "https://example.com/audio.mp3")
fmt.Printf("Text: %s\n", result.Text)
fmt.Printf("Language: %s\n", result.LanguageCode)

// With speaker diarization
result, err := client.SpeechToText().TranscribeWithDiarization(ctx, audioURL)
for _, word := range result.Words {
    fmt.Printf("[%s] %s (%.2fs - %.2fs)\n", word.Speaker, word.Text, word.Start, word.End)
}

Sound Effects

// Simple sound effect
audio, err := client.SoundEffects().Simple(ctx, "thunder and rain storm")

// With options
sfx, err := client.SoundEffects().Generate(ctx, &elevenlabs.SoundEffectRequest{
    Text:            "spaceship engine humming",
    DurationSeconds: 10,
    PromptInfluence: 0.5,
})

Music Composition

// Generate music from prompt
resp, err := client.Music().Generate(ctx, &elevenlabs.MusicRequest{
    Prompt:     "upbeat electronic music for a tech video",
    DurationMs: 30000,
})

// Instrumental only
audio, err := client.Music().GenerateInstrumental(ctx, "calm piano melody", 60000)

// Generate with composition plan for fine-grained control
plan, _ := client.Music().GeneratePlan(ctx, &elevenlabs.CompositionPlanRequest{
    Prompt:     "pop song about summer",
    DurationMs: 180000,
})
resp, err := client.Music().GenerateDetailed(ctx, &elevenlabs.MusicDetailedRequest{
    CompositionPlan: plan,
})

// Separate stems (vocals, drums, bass, etc.)
f, _ := os.Open("song.mp3")
stems, err := client.Music().SeparateStems(ctx, &elevenlabs.StemSeparationRequest{
    File:     f,
    Filename: "song.mp3",
})

Audio Isolation

// Extract vocals from audio file
f, _ := os.Open("mixed_audio.mp3")
isolated, err := client.AudioIsolation().IsolateFile(ctx, f, "mixed_audio.mp3")

Forced Alignment

// Get word-level timestamps
f, _ := os.Open("speech.mp3")
result, err := client.ForcedAlignment().AlignFile(ctx, f, "speech.mp3",
    "The text that was spoken in the audio")

for _, word := range result.Words {
    fmt.Printf("%s: %.2fs - %.2fs\n", word.Text, word.Start, word.End)
}

Text-to-Dialogue

// Generate multi-speaker dialogue
audio, err := client.TextToDialogue().Simple(ctx, []elevenlabs.DialogueInput{
    {Text: "Hello, how are you?", VoiceID: "voice1"},
    {Text: "I'm doing great, thanks!", VoiceID: "voice2"},
})

Voice Design

// Generate a custom voice
resp, err := client.VoiceDesign().GeneratePreview(ctx, &elevenlabs.VoiceDesignRequest{
    Gender:         elevenlabs.VoiceGenderFemale,
    Age:            elevenlabs.VoiceAgeYoung,
    Accent:         elevenlabs.VoiceAccentAmerican,
    AccentStrength: 1.0,
    Text:           "This is a preview of the generated voice. It should be at least one hundred characters long for best results.",
})

Pronunciation Dictionaries

// Create from a map
dict, err := client.Pronunciation().CreateFromMap(ctx, "Tech Terms", map[string]string{
    "API":     "A P I",
    "kubectl": "kube control",
    "nginx":   "engine X",
})

// Create from JSON file
dict, err := client.Pronunciation().CreateFromJSON(ctx, "Terms", "pronunciation.json")

Dubbing

// Create dubbing job
dub, err := client.Dubbing().Create(ctx, &elevenlabs.DubbingRequest{
    SourceURL:      "https://example.com/video.mp4",
    TargetLanguage: "es",
    Name:           "Video - Spanish",
})

// Check status
status, err := client.Dubbing().GetStatus(ctx, dub.DubbingID)

Projects (Studio)

// Create a project for long-form content
project, err := client.Projects().Create(ctx, &elevenlabs.CreateProjectRequest{
    Name:                    "My Audiobook",
    DefaultModelID:          "eleven_multilingual_v2",
    DefaultParagraphVoiceID: voiceID,
})

// Convert to audio
err = client.Projects().Convert(ctx, project.ProjectID)

Speech-to-Speech (Voice Conversion)

// Convert speech from one voice to another
f, _ := os.Open("input.mp3")
resp, err := client.SpeechToSpeech().Convert(ctx, &elevenlabs.SpeechToSpeechRequest{
    VoiceID: targetVoiceID,
    Audio:   f,
})

// Simple conversion
output, err := client.SpeechToSpeech().Simple(ctx, targetVoiceID, audioReader)

WebSocket TTS (Real-Time Streaming)

// Connect for low-latency TTS (ideal for LLM output)
conn, err := client.WebSocketTTS().Connect(ctx, voiceID, &elevenlabs.WebSocketTTSOptions{
    ModelID:                  "eleven_turbo_v2_5",
    OutputFormat:             "pcm_16000",
    OptimizeStreamingLatency: 3,
})
defer conn.Close()

// Stream text as it arrives (e.g., from LLM)
for text := range llmOutputStream {
    conn.SendText(text)
}
conn.Flush()

// Receive audio chunks
for audio := range conn.Audio() {
    // Play or save audio chunks
}

WebSocket STT (Real-Time Transcription)

// Connect for live transcription
conn, err := client.WebSocketSTT().Connect(ctx, &elevenlabs.WebSocketSTTOptions{
    SampleRate:     16000,
    EnablePartials: true,
})
defer conn.Close()

// Send audio chunks
go func() {
    for audioChunk := range microphoneInput {
        conn.SendAudio(audioChunk)
    }
    conn.EndStream()
}()

// Receive transcripts
for transcript := range conn.Transcripts() {
    if transcript.IsFinal {
        fmt.Println("Final:", transcript.Text)
    } else {
        fmt.Println("Partial:", transcript.Text)
    }
}

Twilio Integration (Phone Calls)

// Register incoming Twilio call with an ElevenLabs agent
resp, err := client.Twilio().RegisterCall(ctx, &elevenlabs.TwilioRegisterCallRequest{
    AgentID: "your-agent-id",
})
// Return resp.TwiML to Twilio webhook

// Make outbound call
call, err := client.Twilio().OutboundCall(ctx, &elevenlabs.TwilioOutboundCallRequest{
    AgentID:            "your-agent-id",
    AgentPhoneNumberID: "phone-number-id",
    ToNumber:           "+1234567890",
})

// List phone numbers
numbers, err := client.PhoneNumbers().List(ctx)

Examples

See the examples/ directory for runnable examples:

Example	Description
`basic/`	Common SDK operations
`websocket-tts/`	Real-time TTS streaming for LLM integration
`websocket-stt/`	Live transcription with partial results
`speech-to-speech/`	Voice conversion
`twilio/`	Phone call integration with Twilio
`ttsscript/`	Multi-voice script authoring
`retryhttp/`	Retry-capable HTTP transport

export ELEVENLABS_API_KEY="your-api-key"
go run examples/basic/main.go

Error Handling

audio, err := client.TextToSpeech().Simple(ctx, voiceID, text)
if err != nil {
    if elevenlabs.IsRateLimitError(err) {
        log.Println("Rate limited, waiting...")
        time.Sleep(time.Minute)
    } else if elevenlabs.IsUnauthorizedError(err) {
        log.Fatal("Invalid API key")
    } else if elevenlabs.IsNotFoundError(err) {
        log.Fatal("Voice not found")
    } else {
        log.Fatalf("Error: %v", err)
    }
}

Environment Variables

ELEVENLABS_API_KEY: Your ElevenLabs API key (used automatically if not provided via WithAPIKey)

Name		Name	Last commit message	Last commit date
Latest commit History 46 Commits
.github		.github
cmd		cmd
docs		docs
examples		examples
internal/api		internal/api
omnivoice		omnivoice
openapi		openapi
ttsscript		ttsscript
voices		voices
.gitignore		.gitignore
.golangci.yaml		.golangci.yaml
CHANGELOG.json		CHANGELOG.json
CHANGELOG.md		CHANGELOG.md
LICENSE		LICENSE
PRESENTATION.md		PRESENTATION.md
README.md		README.md
README_AGENT.md		README_AGENT.md
README_AGENT_ROADMAP.md		README_AGENT_ROADMAP.md
TRD_STT.md		TRD_STT.md
audio.go		audio.go
audio_test.go		audio_test.go
audioisolation.go		audioisolation.go
audioisolation_test.go		audioisolation_test.go
client.go		client.go
client_test.go		client_test.go
dubbing.go		dubbing.go
errors.go		errors.go
errors_test.go		errors_test.go
forcedalignment.go		forcedalignment.go
forcedalignment_test.go		forcedalignment_test.go
generate.sh		generate.sh
go.mod		go.mod
go.sum		go.sum
history.go		history.go
history_test.go		history_test.go
integration_test.go		integration_test.go
mkdocs.yml		mkdocs.yml
models.go		models.go
models_test.go		models_test.go
music.go		music.go
music_test.go		music_test.go
ogen-fixnull		ogen-fixnull
ogen.yml		ogen.yml
projects.go		projects.go
projects_test.go		projects_test.go
pronunciation.go		pronunciation.go
pronunciation_rules.go		pronunciation_rules.go
pronunciation_rules_test.go		pronunciation_rules_test.go
pronunciation_test.go		pronunciation_test.go
soundeffects.go		soundeffects.go
soundeffects_test.go		soundeffects_test.go
speechtospeech.go		speechtospeech.go
speechtotext.go		speechtotext.go
speechtotext_test.go		speechtotext_test.go
texttodialogue.go		texttodialogue.go
texttodialogue_test.go		texttodialogue_test.go
texttospeech.go		texttospeech.go
texttospeech_test.go		texttospeech_test.go
twilio.go		twilio.go
user.go		user.go
user_test.go		user_test.go
voicedesign.go		voicedesign.go
voicedesign_test.go		voicedesign_test.go
voices.go		voices.go
voices_test.go		voices_test.go
voicesettings.go		voicesettings.go
websocketstt.go		websocketstt.go
websockettts.go		websockettts.go

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

ElevenLabs Go SDK

Features

Real-Time Services

Installation

Quick Start

Basic Text-to-Speech

With Custom Options

Services

Text-to-Speech

Speech-to-Text

Sound Effects

Music Composition

Audio Isolation

Forced Alignment

Text-to-Dialogue

Voice Design

Pronunciation Dictionaries

Dubbing

Projects (Studio)

Speech-to-Speech (Voice Conversion)

WebSocket TTS (Real-Time Streaming)

WebSocket STT (Real-Time Transcription)

Twilio Integration (Phone Calls)

Examples

Error Handling

Environment Variables

Documentation

Contributing

License

About

Uh oh!

Releases 7

Contributors 3

Uh oh!

Languages

License

agentplexus/go-elevenlabs

Folders and files

Latest commit

History

Repository files navigation

ElevenLabs Go SDK

Features

Real-Time Services

Installation

Quick Start

Basic Text-to-Speech

With Custom Options

Services

Text-to-Speech

Speech-to-Text

Sound Effects

Music Composition

Audio Isolation

Forced Alignment

Text-to-Dialogue

Voice Design

Pronunciation Dictionaries

Dubbing

Projects (Studio)

Speech-to-Speech (Voice Conversion)

WebSocket TTS (Real-Time Streaming)

WebSocket STT (Real-Time Transcription)

Twilio Integration (Phone Calls)

Examples

Error Handling

Environment Variables

Documentation

Contributing

License

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases 7

Contributors 3

Uh oh!

Languages