TTS: configurable auth, Health check, Phoenix options; .env.example; Gitea CI workflow
Some checks failed
CI / build (push) Has been cancelled
Some checks failed
CI / build (push) Has been cancelled
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
21
.env.example
Normal file
21
.env.example
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Virtual Banker — example environment (copy to .env and set values)
|
||||||
|
# Do not commit .env; use secrets in CI/production.
|
||||||
|
|
||||||
|
# Database and Redis
|
||||||
|
DATABASE_URL=postgres://user:pass@localhost:5432/virtual_banker?sslmode=disable
|
||||||
|
REDIS_URL=redis://localhost:6379
|
||||||
|
PORT=8081
|
||||||
|
|
||||||
|
# TTS: ElevenLabs (default) or Phoenix
|
||||||
|
# Leave unset to use mock TTS. Set TTS_VOICE_ID + one of the keys for real TTS.
|
||||||
|
TTS_VOICE_ID=
|
||||||
|
TTS_API_KEY=
|
||||||
|
# ELEVENLABS_API_KEY= # alternative to TTS_API_KEY
|
||||||
|
# ELEVENLABS_VOICE_ID= # alternative to TTS_VOICE_ID
|
||||||
|
|
||||||
|
# Phoenix / custom TTS endpoint (optional)
|
||||||
|
# TTS_BASE_URL=https://phoenix.example.com/tts/v1
|
||||||
|
# TTS_AUTH_HEADER_NAME=Authorization
|
||||||
|
# TTS_AUTH_HEADER_VALUE=Bearer your-token
|
||||||
|
# USE_PHOENIX_TTS=true
|
||||||
|
# PHOENIX_TTS_BASE_URL=https://phoenix.example.com/tts/v1
|
||||||
23
.gitea/workflows/ci.yml
Normal file
23
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Gitea Actions: build and test virtual-banker backend on push
|
||||||
|
name: CI
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [master, main]
|
||||||
|
pull_request:
|
||||||
|
branches: [master, main]
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21'
|
||||||
|
- name: Build
|
||||||
|
run: go build ./...
|
||||||
|
working-directory: backend
|
||||||
|
- name: Test
|
||||||
|
run: go test ./...
|
||||||
|
working-directory: backend
|
||||||
@@ -33,6 +33,10 @@ virtual-banker/
|
|||||||
- PostgreSQL 16+ with pgvector extension
|
- PostgreSQL 16+ with pgvector extension
|
||||||
- Redis
|
- Redis
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
- Copy `.env.example` to `.env` and set `DATABASE_URL`, `REDIS_URL`, and optionally TTS vars (`TTS_BASE_URL`, `TTS_API_KEY`, `TTS_VOICE_ID`) for ElevenLabs or Phoenix. See `backend/tts/README.md` for TTS backend selection and Phoenix endpoint swap.
|
||||||
|
|
||||||
### Development Setup
|
### Development Setup
|
||||||
|
|
||||||
1. **Start infrastructure** (uses existing postgres/redis from main monorepo):
|
1. **Start infrastructure** (uses existing postgres/redis from main monorepo):
|
||||||
|
|||||||
@@ -55,9 +55,9 @@ func main() {
|
|||||||
// Initialize services
|
// Initialize services
|
||||||
sessionManager := session.NewManager(db, redisClient)
|
sessionManager := session.NewManager(db, redisClient)
|
||||||
|
|
||||||
// Initialize ASR/TTS (using mocks for now)
|
// Initialize ASR/TTS
|
||||||
asrService := asr.NewMockASRService()
|
asrService := asr.NewMockASRService()
|
||||||
ttsService := tts.NewMockTTSService()
|
ttsService := newTTSService()
|
||||||
|
|
||||||
// Initialize LLM (using mock for now)
|
// Initialize LLM (using mock for now)
|
||||||
llmGateway := llm.NewMockLLMGateway()
|
llmGateway := llm.NewMockLLMGateway()
|
||||||
@@ -128,6 +128,28 @@ func main() {
|
|||||||
log.Println("Server exited")
|
log.Println("Server exited")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newTTSService returns a TTS service from env: use real API when TTS_API_KEY (or
|
||||||
|
// ELEVENLABS_API_KEY) and TTS_VOICE_ID are set. Optional: TTS_BASE_URL (Phoenix),
|
||||||
|
// TTS_AUTH_HEADER_NAME / TTS_AUTH_HEADER_VALUE (e.g. Authorization: Bearer),
|
||||||
|
// USE_PHOENIX_TTS=true to require TTS_BASE_URL.
|
||||||
|
func newTTSService() tts.Service {
|
||||||
|
apiKey := getEnv("TTS_API_KEY", os.Getenv("ELEVENLABS_API_KEY"))
|
||||||
|
voiceID := getEnv("TTS_VOICE_ID", os.Getenv("ELEVENLABS_VOICE_ID"))
|
||||||
|
baseURL := getEnv("TTS_BASE_URL", "")
|
||||||
|
authName := getEnv("TTS_AUTH_HEADER_NAME", "")
|
||||||
|
authValue := getEnv("TTS_AUTH_HEADER_VALUE", "")
|
||||||
|
usePhoenix := getEnv("USE_PHOENIX_TTS", "") == "true" || getEnv("USE_PHOENIX_TTS", "") == "1"
|
||||||
|
if usePhoenix && baseURL == "" {
|
||||||
|
baseURL = getEnv("PHOENIX_TTS_BASE_URL", "https://phoenix.example.com/tts/v1")
|
||||||
|
}
|
||||||
|
hasAuth := apiKey != "" || authValue != ""
|
||||||
|
if hasAuth && voiceID != "" {
|
||||||
|
opts := tts.TTSOptions{BaseURL: baseURL, AuthHeaderName: authName, AuthHeaderValue: authValue}
|
||||||
|
return tts.NewElevenLabsTTSServiceWithOptionsFull(apiKey, voiceID, opts)
|
||||||
|
}
|
||||||
|
return tts.NewMockTTSService()
|
||||||
|
}
|
||||||
|
|
||||||
func getEnv(key, defaultValue string) string {
|
func getEnv(key, defaultValue string) string {
|
||||||
if value := os.Getenv(key); value != "" {
|
if value := os.Getenv(key); value != "" {
|
||||||
return value
|
return value
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package observability
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Tracer provides distributed tracing
|
// Tracer provides distributed tracing
|
||||||
|
|||||||
86
backend/tts/README.md
Normal file
86
backend/tts/README.md
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# TTS package — ElevenLabs-compatible, Phoenix endpoint swap
|
||||||
|
|
||||||
|
This package provides a **text-to-speech client** that matches the [ElevenLabs TTS API](https://elevenlabs.io/docs/api-reference/text-to-speech) contract. You can point it at **ElevenLabs** or at a **Phoenix-hosted** TTS service that implements the same API shape; switching is a config change (base URL), no code change.
|
||||||
|
|
||||||
|
**Note:** The repo [eleven-labs/api-service](https://github.com/eleven-labs/api-service) on GitHub is a PHP OpenAPI consumer library, not the voice TTS API. This client targets the **REST TTS API** at `api.elevenlabs.io` (and compatible backends).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Parity with ElevenLabs TTS API
|
||||||
|
|
||||||
|
| Feature | ElevenLabs API | This client |
|
||||||
|
|--------|----------------|-------------|
|
||||||
|
| **Sync** `POST /v1/text-to-speech/:voice_id` | ✅ | ✅ `Synthesize` |
|
||||||
|
| **Stream** `POST /v1/text-to-speech/:voice_id/stream` | ✅ | ✅ `SynthesizeStream` |
|
||||||
|
| **Voice settings** (stability, similarity_boost, style, speaker_boost) | ✅ | ✅ `VoiceConfig` |
|
||||||
|
| **Model** (`model_id`) | ✅ | ✅ `SetModelID` / default `eleven_multilingual_v2` |
|
||||||
|
| **Auth** `xi-api-key` header | ✅ | ✅ |
|
||||||
|
| **Output** `Accept: audio/mpeg` (mp3) | ✅ | ✅ |
|
||||||
|
| **Retries** (5xx, backoff) | — | ✅ on sync |
|
||||||
|
| **Visemes** (lip sync) | ❌ (no phoneme API) | ✅ client-side approximation |
|
||||||
|
|
||||||
|
Optional ElevenLabs features not used here: `output_format` query, `optimize_streaming_latency`, WebSocket streaming. For “just change endpoint” to Phoenix, the host only needs to implement the same **sync + stream** JSON body and return **audio/mpeg**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Which TTS backend? (decision table)
|
||||||
|
|
||||||
|
| Env / condition | Backend used |
|
||||||
|
|----------------|--------------|
|
||||||
|
| `TTS_VOICE_ID` unset (or no auth) | **Mock** (no real synthesis) |
|
||||||
|
| `TTS_VOICE_ID` + `TTS_API_KEY` or `ELEVENLABS_*` set, `TTS_BASE_URL` unset | **ElevenLabs** (api.elevenlabs.io) |
|
||||||
|
| `TTS_BASE_URL` set (e.g. Phoenix) + auth + voice | **Phoenix** (or other compatible host) |
|
||||||
|
| `USE_PHOENIX_TTS=true` | Prefer Phoenix; use `TTS_BASE_URL` or `PHOENIX_TTS_BASE_URL` |
|
||||||
|
|
||||||
|
Auth: default header is `xi-api-key` (ElevenLabs). For Phoenix with Bearer token set `TTS_AUTH_HEADER_NAME=Authorization` and `TTS_AUTH_HEADER_VALUE=Bearer <token>`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Using with Phoenix (swap endpoint)
|
||||||
|
|
||||||
|
1. **Phoenix TTS service** must expose the same contract:
|
||||||
|
- `POST /v1/text-to-speech/:voice_id` — body: `{"text","model_id","voice_settings"}` → response: raw mp3
|
||||||
|
- `POST /v1/text-to-speech/:voice_id/stream` — same body → response: streaming mp3
|
||||||
|
- **Health:** `GET /health` at the same origin (e.g. `{baseURL}/../health`) returning 2xx so `tts.Service.Health(ctx)` can be used for readiness.
|
||||||
|
|
||||||
|
2. **Configure the app** with the Phoenix base URL (and optional auth):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export TTS_BASE_URL="https://phoenix.example.com/tts/v1"
|
||||||
|
export TTS_VOICE_ID="default-voice-id"
|
||||||
|
# Optional: Phoenix uses Bearer token
|
||||||
|
export TTS_AUTH_HEADER_NAME="Authorization"
|
||||||
|
export TTS_AUTH_HEADER_VALUE="Bearer your-token"
|
||||||
|
# Or feature flag to force Phoenix
|
||||||
|
export USE_PHOENIX_TTS=true
|
||||||
|
export PHOENIX_TTS_BASE_URL="https://phoenix.example.com/tts/v1"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Health check:** The client’s `Health(ctx)` calls `GET {baseURL}/../health` when base URL is not ElevenLabs. Wire this into your readiness probe or a `/ready` endpoint if you need TTS to be up before accepting traffic.
|
||||||
|
|
||||||
|
4. **In code** (e.g. for reuse in another project):
|
||||||
|
|
||||||
|
```go
|
||||||
|
opts := tts.TTSOptions{
|
||||||
|
BaseURL: "https://phoenix.example.com/tts/v1",
|
||||||
|
AuthHeaderName: "Authorization",
|
||||||
|
AuthHeaderValue: "Bearer token",
|
||||||
|
}
|
||||||
|
svc := tts.NewElevenLabsTTSServiceWithOptionsFull(apiKey, voiceID, opts)
|
||||||
|
if err := svc.Health(ctx); err != nil { /* not ready */ }
|
||||||
|
audio, err := svc.Synthesize(ctx, "Hello world")
|
||||||
|
```
|
||||||
|
|
||||||
|
No code change beyond config: same interface, different base URL and optional auth header.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Reuse across projects
|
||||||
|
|
||||||
|
This package lives in **virtual-banker** and can be depended on as a Go module path (e.g. `github.com/your-org/virtual-banker/backend/tts` or via a shared repo). Any project that needs TTS can:
|
||||||
|
|
||||||
|
- Depend on this package.
|
||||||
|
- Use `tts.Service` and either `NewMockTTSService()` or `NewElevenLabsTTSServiceWithOptions(apiKey, voiceID, baseURL)` / `NewElevenLabsTTSServiceWithOptionsFull(apiKey, voiceID, opts)` for custom auth.
|
||||||
|
- Set `baseURL` to ElevenLabs (`""` or `https://api.elevenlabs.io/v1`) or to the Phoenix TTS base URL.
|
||||||
|
|
||||||
|
The **interface** (`Synthesize`, `SynthesizeStream`, `GetVisemes`) stays the same regardless of backend.
|
||||||
@@ -7,20 +7,31 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ElevenLabsTTSService integrates with ElevenLabs TTS API
|
// ElevenLabsTTSService integrates with ElevenLabs TTS API or a Phoenix-compatible endpoint
|
||||||
type ElevenLabsTTSService struct {
|
type ElevenLabsTTSService struct {
|
||||||
apiKey string
|
apiKey string
|
||||||
voiceID string
|
voiceID string
|
||||||
modelID string
|
modelID string
|
||||||
baseURL string
|
baseURL string
|
||||||
|
authHeaderName string // default "xi-api-key" when empty
|
||||||
|
authHeaderValue string
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
defaultVoiceConfig *VoiceConfig
|
defaultVoiceConfig *VoiceConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TTSOptions allows optional overrides when creating the TTS service (e.g. Phoenix auth)
|
||||||
|
type TTSOptions struct {
|
||||||
|
BaseURL string // e.g. "https://phoenix.example.com/tts/v1"
|
||||||
|
AuthHeaderName string // e.g. "Authorization"; empty = "xi-api-key"
|
||||||
|
AuthHeaderValue string // e.g. "Bearer token"; empty = apiKey
|
||||||
|
}
|
||||||
|
|
||||||
// VoiceConfig holds ElevenLabs voice configuration
|
// VoiceConfig holds ElevenLabs voice configuration
|
||||||
type VoiceConfig struct {
|
type VoiceConfig struct {
|
||||||
Stability float64 `json:"stability"`
|
Stability float64 `json:"stability"`
|
||||||
@@ -36,13 +47,45 @@ type ElevenLabsRequest struct {
|
|||||||
VoiceSettings VoiceConfig `json:"voice_settings,omitempty"`
|
VoiceSettings VoiceConfig `json:"voice_settings,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewElevenLabsTTSService creates a new ElevenLabs TTS service
|
// DefaultElevenLabsBaseURL is the default TTS API base (ElevenLabs or Phoenix-compatible).
|
||||||
|
const DefaultElevenLabsBaseURL = "https://api.elevenlabs.io/v1"
|
||||||
|
|
||||||
|
// NewElevenLabsTTSService creates a new TTS service for ElevenLabs or a Phoenix-hosted
|
||||||
|
// ElevenLabs-compatible API. Use baseURL "" for default (api.elevenlabs.io); set to
|
||||||
|
// your Phoenix TTS base (e.g. https://phoenix.example.com/tts/v1) to swap endpoint.
|
||||||
func NewElevenLabsTTSService(apiKey, voiceID string) *ElevenLabsTTSService {
|
func NewElevenLabsTTSService(apiKey, voiceID string) *ElevenLabsTTSService {
|
||||||
|
return NewElevenLabsTTSServiceWithOptions(apiKey, voiceID, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewElevenLabsTTSServiceWithOptions creates a TTS service with a configurable base URL.
|
||||||
|
// baseURL: if empty, uses DefaultElevenLabsBaseURL (ElevenLabs). For Phoenix, use e.g.
|
||||||
|
// "https://phoenix.example.com/tts/v1" so that /text-to-speech/:id and /stream are used.
|
||||||
|
func NewElevenLabsTTSServiceWithOptions(apiKey, voiceID, baseURL string) *ElevenLabsTTSService {
|
||||||
|
return NewElevenLabsTTSServiceWithOptionsFull(apiKey, voiceID, TTSOptions{BaseURL: baseURL})
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewElevenLabsTTSServiceWithOptionsFull creates a TTS service with full options (base URL, auth header).
|
||||||
|
// Use for Phoenix when auth differs from ElevenLabs (e.g. Authorization: Bearer <token>).
|
||||||
|
func NewElevenLabsTTSServiceWithOptionsFull(apiKey, voiceID string, opts TTSOptions) *ElevenLabsTTSService {
|
||||||
|
baseURL := strings.TrimSuffix(opts.BaseURL, "/")
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = DefaultElevenLabsBaseURL
|
||||||
|
}
|
||||||
|
authName := opts.AuthHeaderName
|
||||||
|
if authName == "" {
|
||||||
|
authName = "xi-api-key"
|
||||||
|
}
|
||||||
|
authVal := opts.AuthHeaderValue
|
||||||
|
if authVal == "" {
|
||||||
|
authVal = apiKey
|
||||||
|
}
|
||||||
return &ElevenLabsTTSService{
|
return &ElevenLabsTTSService{
|
||||||
apiKey: apiKey,
|
apiKey: apiKey,
|
||||||
voiceID: voiceID,
|
voiceID: voiceID,
|
||||||
modelID: "eleven_multilingual_v2", // Default model
|
modelID: "eleven_multilingual_v2",
|
||||||
baseURL: "https://api.elevenlabs.io/v1",
|
baseURL: baseURL,
|
||||||
|
authHeaderName: authName,
|
||||||
|
authHeaderValue: authVal,
|
||||||
httpClient: &http.Client{
|
httpClient: &http.Client{
|
||||||
Timeout: 30 * time.Second,
|
Timeout: 30 * time.Second,
|
||||||
},
|
},
|
||||||
@@ -71,8 +114,8 @@ func (s *ElevenLabsTTSService) Synthesize(ctx context.Context, text string) ([]b
|
|||||||
|
|
||||||
// SynthesizeWithConfig synthesizes text to audio with custom voice configuration
|
// SynthesizeWithConfig synthesizes text to audio with custom voice configuration
|
||||||
func (s *ElevenLabsTTSService) SynthesizeWithConfig(ctx context.Context, text string, config *VoiceConfig) ([]byte, error) {
|
func (s *ElevenLabsTTSService) SynthesizeWithConfig(ctx context.Context, text string, config *VoiceConfig) ([]byte, error) {
|
||||||
if s.apiKey == "" {
|
if s.authHeaderValue == "" && s.apiKey == "" {
|
||||||
return nil, fmt.Errorf("ElevenLabs API key not configured")
|
return nil, fmt.Errorf("TTS API key or auth not configured")
|
||||||
}
|
}
|
||||||
if s.voiceID == "" {
|
if s.voiceID == "" {
|
||||||
return nil, fmt.Errorf("ElevenLabs voice ID not configured")
|
return nil, fmt.Errorf("ElevenLabs voice ID not configured")
|
||||||
@@ -109,8 +152,9 @@ func (s *ElevenLabsTTSService) SynthesizeWithConfig(ctx context.Context, text st
|
|||||||
|
|
||||||
req.Header.Set("Accept", "audio/mpeg")
|
req.Header.Set("Accept", "audio/mpeg")
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
req.Header.Set("xi-api-key", s.apiKey)
|
if s.authHeaderValue != "" {
|
||||||
|
req.Header.Set(s.authHeaderName, s.authHeaderValue)
|
||||||
|
}
|
||||||
// Execute request with retry logic
|
// Execute request with retry logic
|
||||||
var resp *http.Response
|
var resp *http.Response
|
||||||
maxRetries := 3
|
maxRetries := 3
|
||||||
@@ -131,12 +175,8 @@ func (s *ElevenLabsTTSService) SynthesizeWithConfig(ctx context.Context, text st
|
|||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
bodyBytes, _ := io.ReadAll(bytes.NewReader([]byte{}))
|
|
||||||
if resp.Body != nil {
|
|
||||||
bodyBytes, _ = io.ReadAll(resp.Body)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retry on 5xx errors
|
// Retry on 5xx errors
|
||||||
if resp.StatusCode >= 500 && i < maxRetries-1 {
|
if resp.StatusCode >= 500 && i < maxRetries-1 {
|
||||||
backoff := time.Duration(i+1) * time.Second
|
backoff := time.Duration(i+1) * time.Second
|
||||||
@@ -165,8 +205,8 @@ func (s *ElevenLabsTTSService) SynthesizeStream(ctx context.Context, text string
|
|||||||
|
|
||||||
// SynthesizeStreamWithConfig synthesizes text to audio stream with custom voice configuration
|
// SynthesizeStreamWithConfig synthesizes text to audio stream with custom voice configuration
|
||||||
func (s *ElevenLabsTTSService) SynthesizeStreamWithConfig(ctx context.Context, text string, config *VoiceConfig) (io.Reader, error) {
|
func (s *ElevenLabsTTSService) SynthesizeStreamWithConfig(ctx context.Context, text string, config *VoiceConfig) (io.Reader, error) {
|
||||||
if s.apiKey == "" {
|
if s.authHeaderValue == "" && s.apiKey == "" {
|
||||||
return nil, fmt.Errorf("ElevenLabs API key not configured")
|
return nil, fmt.Errorf("TTS API key or auth not configured")
|
||||||
}
|
}
|
||||||
if s.voiceID == "" {
|
if s.voiceID == "" {
|
||||||
return nil, fmt.Errorf("ElevenLabs voice ID not configured")
|
return nil, fmt.Errorf("ElevenLabs voice ID not configured")
|
||||||
@@ -203,8 +243,9 @@ func (s *ElevenLabsTTSService) SynthesizeStreamWithConfig(ctx context.Context, t
|
|||||||
|
|
||||||
req.Header.Set("Accept", "audio/mpeg")
|
req.Header.Set("Accept", "audio/mpeg")
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
req.Header.Set("xi-api-key", s.apiKey)
|
if s.authHeaderValue != "" {
|
||||||
|
req.Header.Set(s.authHeaderName, s.authHeaderValue)
|
||||||
|
}
|
||||||
// Execute request
|
// Execute request
|
||||||
resp, err := s.httpClient.Do(req)
|
resp, err := s.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -212,15 +253,44 @@ func (s *ElevenLabsTTSService) SynthesizeStreamWithConfig(ctx context.Context, t
|
|||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
resp.Body.Close()
|
|
||||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||||
return nil, fmt.Errorf("ElevenLabs streaming API error: status %d, body: %s", resp.StatusCode, string(bodyBytes))
|
resp.Body.Close()
|
||||||
|
return nil, fmt.Errorf("TTS streaming API error: status %d, body: %s", resp.StatusCode, string(bodyBytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return stream reader (caller is responsible for closing)
|
// Return stream reader (caller is responsible for closing)
|
||||||
return resp.Body, nil
|
return resp.Body, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Health checks connectivity to the TTS backend. For Phoenix, expects GET {baseURL}/../health (or /health).
|
||||||
|
// For ElevenLabs (default base URL), this is a no-op and returns nil (no public health endpoint).
|
||||||
|
func (s *ElevenLabsTTSService) Health(ctx context.Context) error {
|
||||||
|
if s.baseURL == DefaultElevenLabsBaseURL {
|
||||||
|
return nil // ElevenLabs has no public health; skip to avoid unnecessary calls
|
||||||
|
}
|
||||||
|
u, err := url.Parse(s.baseURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("TTS base URL invalid: %w", err)
|
||||||
|
}
|
||||||
|
u.Path = path.Join(path.Dir(u.Path), "health")
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.authHeaderValue != "" {
|
||||||
|
req.Header.Set(s.authHeaderName, s.authHeaderValue)
|
||||||
|
}
|
||||||
|
resp, err := s.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("TTS health check failed: %w", err)
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("TTS health returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetVisemes returns viseme events for lip sync
|
// GetVisemes returns viseme events for lip sync
|
||||||
// ElevenLabs doesn't provide viseme data directly, so we use phoneme-to-viseme mapping
|
// ElevenLabs doesn't provide viseme data directly, so we use phoneme-to-viseme mapping
|
||||||
func (s *ElevenLabsTTSService) GetVisemes(ctx context.Context, text string) ([]VisemeEvent, error) {
|
func (s *ElevenLabsTTSService) GetVisemes(ctx context.Context, text string) ([]VisemeEvent, error) {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package tts
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -11,6 +10,8 @@ type Service interface {
|
|||||||
SynthesizeStream(ctx context.Context, text string) (io.Reader, error)
|
SynthesizeStream(ctx context.Context, text string) (io.Reader, error)
|
||||||
Synthesize(ctx context.Context, text string) ([]byte, error)
|
Synthesize(ctx context.Context, text string) ([]byte, error)
|
||||||
GetVisemes(ctx context.Context, text string) ([]VisemeEvent, error)
|
GetVisemes(ctx context.Context, text string) ([]VisemeEvent, error)
|
||||||
|
// Health checks connectivity to the TTS backend (e.g. Phoenix /health). No-op for mocks.
|
||||||
|
Health(ctx context.Context) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// VisemeEvent represents a viseme (lip shape) event for lip sync
|
// VisemeEvent represents a viseme (lip shape) event for lip sync
|
||||||
@@ -52,6 +53,9 @@ func (s *MockTTSService) GetVisemes(ctx context.Context, text string) ([]VisemeE
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Health is a no-op for the mock (no backend).
|
||||||
|
func (s *MockTTSService) Health(ctx context.Context) error { return nil }
|
||||||
|
|
||||||
// ElevenLabsTTSService integrates with ElevenLabs (implementation in elevenlabs-adapter.go)
|
// ElevenLabsTTSService integrates with ElevenLabs (implementation in elevenlabs-adapter.go)
|
||||||
// This interface definition is kept for backwards compatibility
|
// This interface definition is kept for backwards compatibility
|
||||||
// The actual implementation is in elevenlabs-adapter.go
|
// The actual implementation is in elevenlabs-adapter.go
|
||||||
|
|||||||
Reference in New Issue
Block a user