Files
virtual-banker/docs/openapi.yaml

1138 lines
32 KiB
YAML

openapi: 3.0.3
info:
title: Virtual Banker API
version: 1.0.0
description: |
REST API for the Virtual Banker platform, including voice operations (TTS/ASR),
session management, conversation orchestration, and avatar control.
Features:
- Voice session management
- Text-to-speech (TTS) synthesis with viseme support
- Speech-to-text (ASR) transcription
- Conversation orchestration
- WebRTC signaling for real-time communication
- Avatar animation and lip sync
contact:
name: Virtual Banker Support
email: support@d-bis.org
license:
name: MIT
url: https://opensource.org/licenses/MIT
servers:
- url: https://virtual-banker.d-bis.org/v1
description: Production server
- url: https://sandbox-virtual-banker.d-bis.org/v1
description: Sandbox server
- url: http://localhost:8081/v1
description: Development server
security:
- BearerAuth: []
tags:
- name: Sessions
description: Session management operations
- name: Voice
description: Voice operations (TTS/ASR)
- name: Conversation
description: Conversation orchestration
- name: Avatar
description: Avatar control and animation
- name: Providers
description: Voice provider management
- name: Health
description: Health check endpoints
paths:
/health:
get:
tags: [Health]
summary: Health check
description: Returns the health status of the Virtual Banker API and service connections
operationId: getHealth
security: []
responses:
'200':
description: Service is healthy
content:
application/json:
schema:
type: object
properties:
status:
type: string
example: "healthy"
services:
type: object
properties:
asr:
type: object
properties:
provider:
type: string
example: "mock"
status:
type: string
example: "available"
tts:
type: object
properties:
provider:
type: string
example: "mock"
status:
type: string
example: "available"
llm:
type: object
properties:
provider:
type: string
example: "mock"
status:
type: string
example: "available"
database:
type: string
example: "connected"
redis:
type: string
example: "connected"
timestamp:
type: string
format: date-time
/sessions:
post:
tags: [Sessions]
summary: Create session
description: Creates a new virtual banker session
operationId: createSession
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateSessionRequest'
example:
tenant_id: "tenant-123"
user_id: "user-456"
auth_assertion: "jwt-token"
portal_context:
route: "/account"
account_id: "acc-789"
responses:
'201':
description: Session created successfully
content:
application/json:
schema:
$ref: '#/components/schemas/SessionResponse'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalServerError'
get:
tags: [Sessions]
summary: List sessions
description: Returns a list of active sessions for the authenticated user
operationId: listSessions
responses:
'200':
description: List of sessions
content:
application/json:
schema:
$ref: '#/components/schemas/SessionListResponse'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalServerError'
/sessions/{sessionId}:
get:
tags: [Sessions]
summary: Get session
description: Returns session details
operationId: getSession
parameters:
- $ref: '#/components/parameters/SessionId'
responses:
'200':
description: Session details
content:
application/json:
schema:
$ref: '#/components/schemas/SessionResponse'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
delete:
tags: [Sessions]
summary: End session
description: Ends a virtual banker session
operationId: endSession
parameters:
- $ref: '#/components/parameters/SessionId'
responses:
'200':
description: Session ended
content:
application/json:
schema:
$ref: '#/components/schemas/BaseResponse'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/sessions/{sessionId}/refresh-token:
post:
tags: [Sessions]
summary: Refresh ephemeral token
description: Refreshes the ephemeral token for a session
operationId: refreshToken
parameters:
- $ref: '#/components/parameters/SessionId'
responses:
'200':
description: Token refreshed
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
data:
type: object
properties:
ephemeral_token:
type: string
expires_at:
type: string
format: date-time
timestamp:
type: string
format: date-time
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions:
post:
tags: [Voice]
summary: Create voice session
description: Creates a voice session for real-time TTS/ASR operations
operationId: createVoiceSession
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateVoiceSessionRequest'
example:
session_id: "sess-abc123"
provider:
asr: "deepgram"
tts: "elevenlabs"
voice_config:
voice_id: "21m00Tcm4TlvDq8ikWAM"
model_id: "eleven_multilingual_v2"
stability: 0.5
similarity_boost: 0.75
responses:
'201':
description: Voice session created
content:
application/json:
schema:
$ref: '#/components/schemas/VoiceSessionResponse'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions/{sessionId}/transcribe:
post:
tags: [Voice]
summary: Transcribe audio (ASR)
description: Transcribes audio to text using speech-to-text service
operationId: transcribeAudio
parameters:
- $ref: '#/components/parameters/SessionId'
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
required:
- audio
properties:
audio:
type: string
format: binary
description: Audio file (WAV, MP3, OGG, etc.)
language:
type: string
description: Language code (optional, auto-detect if not provided)
example: "en-US"
format:
type: string
enum: [wav, mp3, ogg, webm]
default: "wav"
include_words:
type: boolean
description: Include word-level timestamps
default: false
content:
application/json:
schema:
type: object
required:
- audio_data
properties:
audio_data:
type: string
format: byte
description: Base64-encoded audio data
language:
type: string
example: "en-US"
include_words:
type: boolean
default: false
responses:
'200':
description: Transcription result
content:
application/json:
schema:
$ref: '#/components/schemas/TranscriptionResponse'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions/{sessionId}/transcribe-stream:
post:
tags: [Voice]
summary: Stream transcription (ASR)
description: Streams audio for real-time transcription (returns SSE stream)
operationId: transcribeAudioStream
parameters:
- $ref: '#/components/parameters/SessionId'
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
required:
- audio_stream
properties:
audio_stream:
type: string
format: binary
description: Audio stream
language:
type: string
example: "en-US"
responses:
'200':
description: Transcription event stream (SSE)
content:
text/event-stream:
schema:
type: string
example: |
event: partial
data: {"type":"partial","text":"Hello, how can I","confidence":0.95,"timestamp":1704067200}
event: final
data: {"type":"final","text":"Hello, how can I help you today?","confidence":0.98,"timestamp":1704067210,"words":[{"word":"Hello","start_time":0.0,"end_time":0.5},{"word":"how","start_time":0.6,"end_time":0.8}]}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions/{sessionId}/synthesize:
post:
tags: [Voice]
summary: Synthesize speech (TTS)
description: Converts text to speech audio
operationId: synthesizeSpeech
parameters:
- $ref: '#/components/parameters/SessionId'
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/SynthesizeRequest'
example:
text: "Hello, how can I help you today?"
voice_config:
voice_id: "21m00Tcm4TlvDq8ikWAM"
model_id: "eleven_multilingual_v2"
stability: 0.5
similarity_boost: 0.75
format: "mp3"
sample_rate: 44100
responses:
'200':
description: Audio synthesis result
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
data:
type: object
properties:
audio_data:
type: string
format: byte
description: Base64-encoded audio data
format:
type: string
example: "mp3"
sample_rate:
type: integer
example: 44100
duration:
type: number
format: float
example: 2.5
visemes:
type: array
items:
$ref: '#/components/schemas/VisemeEvent'
timestamp:
type: string
format: date-time
content:
audio/mpeg:
schema:
type: string
format: binary
audio/wav:
schema:
type: string
format: binary
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions/{sessionId}/synthesize-stream:
post:
tags: [Voice]
summary: Stream speech synthesis (TTS)
description: Streams text for real-time speech synthesis (returns audio stream)
operationId: synthesizeSpeechStream
parameters:
- $ref: '#/components/parameters/SessionId'
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- text
properties:
text:
type: string
example: "Hello, how can I help you today?"
voice_config:
$ref: '#/components/schemas/VoiceConfig'
format:
type: string
enum: [mp3, wav, pcm]
default: "mp3"
sample_rate:
type: integer
enum: [16000, 22050, 44100]
default: 44100
responses:
'200':
description: Audio stream
content:
audio/mpeg:
schema:
type: string
format: binary
audio/wav:
schema:
type: string
format: binary
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions/{sessionId}/visemes:
post:
tags: [Voice]
summary: Get viseme events
description: Returns viseme (lip shape) events for text, used for avatar lip sync
operationId: getVisemes
parameters:
- $ref: '#/components/parameters/SessionId'
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- text
properties:
text:
type: string
example: "Hello, how can I help you today?"
responses:
'200':
description: Viseme events
content:
application/json:
schema:
$ref: '#/components/schemas/VisemeResponse'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/sessions/{sessionId}/status:
get:
tags: [Voice]
summary: Get voice session status
description: Returns the status of a voice session
operationId: getVoiceSessionStatus
parameters:
- $ref: '#/components/parameters/SessionId'
responses:
'200':
description: Voice session status
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
data:
type: object
properties:
session_id:
type: string
state:
type: string
enum: [idle, listening, thinking, speaking]
asr_status:
type: string
enum: [active, inactive, error]
tts_status:
type: string
enum: [active, inactive, error]
provider:
type: object
properties:
asr:
type: string
tts:
type: string
timestamp:
type: string
format: date-time
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/providers:
get:
tags: [Providers]
summary: List voice providers
description: Returns a list of available TTS and ASR providers
operationId: listVoiceProviders
responses:
'200':
description: List of providers
content:
application/json:
schema:
$ref: '#/components/schemas/ProviderListResponse'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalServerError'
/voice/webhooks:
post:
tags: [Voice]
summary: Register webhook
description: Registers a webhook URL for voice session events
operationId: registerVoiceWebhook
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterWebhookRequest'
example:
url: "https://api.example.com/webhooks/voice"
events:
- "transcription.complete"
- "synthesis.complete"
- "session.state_changed"
secret: "webhook_secret_token"
responses:
'201':
description: Webhook registered
content:
application/json:
schema:
$ref: '#/components/schemas/WebhookResponse'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'403':
$ref: '#/components/responses/Forbidden'
'500':
$ref: '#/components/responses/InternalServerError'
/realtime/{sessionId}:
get:
tags: [Conversation]
summary: WebRTC signaling endpoint
description: WebSocket endpoint for WebRTC signaling (SDP exchange, ICE candidates)
operationId: webrtcSignaling
parameters:
- $ref: '#/components/parameters/SessionId'
responses:
'101':
description: Switching protocols to WebSocket
headers:
Upgrade:
schema:
type: string
example: "websocket"
Connection:
schema:
type: string
example: "Upgrade"
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
components:
securitySchemes:
BearerAuth:
type: http
scheme: bearer
bearerFormat: JWT
description: JWT token or ephemeral session token
parameters:
SessionId:
name: sessionId
in: path
required: true
description: Session ID
schema:
type: string
example: "sess-abc123"
schemas:
CreateSessionRequest:
type: object
required:
- tenant_id
- user_id
- auth_assertion
properties:
tenant_id:
type: string
description: Tenant identifier
example: "tenant-123"
user_id:
type: string
description: User identifier
example: "user-456"
auth_assertion:
type: string
description: JWT authentication token
example: "jwt-token"
portal_context:
type: object
description: Portal context for session
properties:
route:
type: string
example: "/account"
account_id:
type: string
example: "acc-789"
CreateVoiceSessionRequest:
type: object
required:
- session_id
properties:
session_id:
type: string
description: Parent session ID
example: "sess-abc123"
provider:
type: object
properties:
asr:
type: string
enum: [mock, deepgram, google]
default: "mock"
example: "deepgram"
tts:
type: string
enum: [mock, elevenlabs, azure]
default: "mock"
example: "elevenlabs"
voice_config:
$ref: '#/components/schemas/VoiceConfig'
VoiceConfig:
type: object
properties:
voice_id:
type: string
description: Voice ID (provider-specific)
example: "21m00Tcm4TlvDq8ikWAM"
model_id:
type: string
description: Model ID (provider-specific)
example: "eleven_multilingual_v2"
stability:
type: number
format: float
description: Stability parameter (0.0-1.0)
minimum: 0.0
maximum: 1.0
example: 0.5
similarity_boost:
type: number
format: float
description: Similarity boost parameter (0.0-1.0)
minimum: 0.0
maximum: 1.0
example: 0.75
style:
type: number
format: float
description: Style parameter (0.0-1.0, ElevenLabs)
minimum: 0.0
maximum: 1.0
use_speaker_boost:
type: boolean
description: Enable speaker boost (ElevenLabs)
SynthesizeRequest:
type: object
required:
- text
properties:
text:
type: string
description: Text to synthesize
example: "Hello, how can I help you today?"
voice_config:
$ref: '#/components/schemas/VoiceConfig'
format:
type: string
enum: [mp3, wav, pcm]
default: "mp3"
example: "mp3"
sample_rate:
type: integer
enum: [16000, 22050, 44100]
default: 44100
example: 44100
RegisterWebhookRequest:
type: object
required:
- url
- events
properties:
url:
type: string
format: uri
description: Webhook URL
example: "https://api.example.com/webhooks/voice"
events:
type: array
description: Events to subscribe to
items:
type: string
enum: [transcription.complete, transcription.partial, synthesis.complete, session.state_changed, session.created, session.ended]
example: ["transcription.complete", "synthesis.complete"]
secret:
type: string
description: Webhook secret for signature verification
example: "webhook_secret_token"
active:
type: boolean
default: true
Session:
type: object
properties:
session_id:
type: string
tenant_id:
type: string
user_id:
type: string
ephemeral_token:
type: string
config:
type: object
properties:
theme:
type: object
properties:
primaryColor:
type: string
avatar_enabled:
type: boolean
greeting:
type: string
allowed_tools:
type: array
items:
type: string
policy:
type: object
properties:
max_session_duration_minutes:
type: integer
rate_limit_per_minute:
type: integer
require_consent:
type: boolean
expires_at:
type: string
format: date-time
created_at:
type: string
format: date-time
SessionResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/Session'
SessionListResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
type: object
properties:
sessions:
type: array
items:
$ref: '#/components/schemas/Session'
VoiceSession:
type: object
properties:
session_id:
type: string
state:
type: string
enum: [idle, listening, thinking, speaking]
provider:
type: object
properties:
asr:
type: string
tts:
type: string
voice_config:
$ref: '#/components/schemas/VoiceConfig'
created_at:
type: string
format: date-time
VoiceSessionResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/VoiceSession'
TranscriptionEvent:
type: object
properties:
type:
type: string
enum: [partial, final]
text:
type: string
confidence:
type: number
format: float
minimum: 0.0
maximum: 1.0
timestamp:
type: integer
format: int64
words:
type: array
items:
$ref: '#/components/schemas/Word'
Word:
type: object
properties:
word:
type: string
start_time:
type: number
format: float
end_time:
type: number
format: float
confidence:
type: number
format: float
minimum: 0.0
maximum: 1.0
TranscriptionResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/TranscriptionEvent'
VisemeEvent:
type: object
properties:
viseme:
type: string
description: Viseme identifier (e.g., "sil", "aa", "ee", "oh", "ou")
example: "aa"
start_time:
type: number
format: float
description: Start time in seconds
example: 0.1
end_time:
type: number
format: float
description: End time in seconds
example: 0.3
phoneme:
type: string
description: Phoneme identifier (optional)
example: "/a/"
VisemeResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
type: object
properties:
text:
type: string
visemes:
type: array
items:
$ref: '#/components/schemas/VisemeEvent'
duration:
type: number
format: float
Provider:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
enum: [asr, tts]
available:
type: boolean
configured:
type: boolean
features:
type: array
items:
type: string
example: ["streaming", "word_timestamps", "custom_voice"]
ProviderListResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
type: object
properties:
providers:
type: array
items:
$ref: '#/components/schemas/Provider'
Webhook:
type: object
properties:
webhook_id:
type: string
url:
type: string
events:
type: array
items:
type: string
active:
type: boolean
created_at:
type: string
format: date-time
WebhookResponse:
allOf:
- $ref: '#/components/schemas/BaseResponse'
- type: object
properties:
data:
$ref: '#/components/schemas/Webhook'
BaseResponse:
type: object
properties:
success:
type: boolean
example: true
timestamp:
type: string
format: date-time
ErrorResponse:
type: object
properties:
success:
type: boolean
example: false
error:
type: object
properties:
code:
type: string
example: "VALIDATION_ERROR"
message:
type: string
example: "Invalid request parameters"
details:
type: object
timestamp:
type: string
format: date-time
responses:
BadRequest:
description: Bad request - validation error
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
Unauthorized:
description: Unauthorized - missing or invalid authentication
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
Forbidden:
description: Forbidden - insufficient permissions
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
NotFound:
description: Resource not found
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
InternalServerError:
description: Internal server error
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'