Add full monorepo: virtual-banker, backend, frontend, docs, scripts, deployment
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
32
avatar/Dockerfile
Normal file
32
avatar/Dockerfile
Normal file
@@ -0,0 +1,32 @@
|
||||
# Dockerfile for Unreal Engine Avatar Renderer
|
||||
# Note: This is a placeholder - actual Unreal deployment requires:
|
||||
# 1. Packaged Unreal project
|
||||
# 2. NVIDIA GPU support
|
||||
# 3. CUDA drivers
|
||||
# 4. Custom base image with Unreal runtime
|
||||
|
||||
FROM nvidia/cuda:12.0.0-base-ubuntu22.04
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
libx11-6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy Unreal packaged project
|
||||
# COPY unreal-package/ /app/unreal/
|
||||
|
||||
# Copy renderer service
|
||||
COPY renderer/ /app/renderer/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Expose PixelStreaming port
|
||||
EXPOSE 8888
|
||||
|
||||
# Start renderer service (which manages Unreal instances)
|
||||
CMD ["./renderer/service"]
|
||||
|
||||
68
avatar/animation/expressions.go
Normal file
68
avatar/animation/expressions.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package animation
|
||||
|
||||
// ExpressionMapping maps emotion values to facial expressions
|
||||
type ExpressionMapping struct {
|
||||
Valence float64 // -1.0 to 1.0
|
||||
Arousal float64 // 0.0 to 1.0
|
||||
}
|
||||
|
||||
// GetExpressionFromEmotion maps emotion to expression parameters
|
||||
func GetExpressionFromEmotion(valence, arousal float64) ExpressionParams {
|
||||
// Map valence/arousal to expression
|
||||
// High valence + high arousal = happy/excited
|
||||
// Low valence + high arousal = angry/frustrated
|
||||
// High valence + low arousal = calm/content
|
||||
// Low valence + low arousal = sad/depressed
|
||||
|
||||
var emotion string
|
||||
var smileAmount float64
|
||||
var browRaise float64
|
||||
var eyeWideness float64
|
||||
|
||||
if valence > 0.5 && arousal > 0.5 {
|
||||
emotion = "happy"
|
||||
smileAmount = 0.8
|
||||
browRaise = 0.3
|
||||
eyeWideness = 0.6
|
||||
} else if valence < -0.5 && arousal > 0.5 {
|
||||
emotion = "angry"
|
||||
smileAmount = -0.5
|
||||
browRaise = -0.7
|
||||
eyeWideness = 0.8
|
||||
} else if valence > 0.3 && arousal < 0.3 {
|
||||
emotion = "calm"
|
||||
smileAmount = 0.3
|
||||
browRaise = 0.0
|
||||
eyeWideness = 0.4
|
||||
} else if valence < -0.3 && arousal < 0.3 {
|
||||
emotion = "sad"
|
||||
smileAmount = -0.3
|
||||
browRaise = 0.2
|
||||
eyeWideness = 0.3
|
||||
} else {
|
||||
emotion = "neutral"
|
||||
smileAmount = 0.0
|
||||
browRaise = 0.0
|
||||
eyeWideness = 0.5
|
||||
}
|
||||
|
||||
return ExpressionParams{
|
||||
Emotion: emotion,
|
||||
SmileAmount: smileAmount,
|
||||
BrowRaise: browRaise,
|
||||
EyeWideness: eyeWideness,
|
||||
Valence: valence,
|
||||
Arousal: arousal,
|
||||
}
|
||||
}
|
||||
|
||||
// ExpressionParams contains facial expression parameters
|
||||
type ExpressionParams struct {
|
||||
Emotion string
|
||||
SmileAmount float64 // -1.0 to 1.0
|
||||
BrowRaise float64 // -1.0 to 1.0
|
||||
EyeWideness float64 // 0.0 to 1.0
|
||||
Valence float64
|
||||
Arousal float64
|
||||
}
|
||||
|
||||
103
avatar/animation/gestures.go
Normal file
103
avatar/animation/gestures.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package animation
|
||||
|
||||
// GestureType represents a gesture type
|
||||
type GestureType string
|
||||
|
||||
const (
|
||||
GestureNod GestureType = "nod"
|
||||
GestureShake GestureType = "shake"
|
||||
GesturePoint GestureType = "point"
|
||||
GestureWave GestureType = "wave"
|
||||
GestureIdle GestureType = "idle"
|
||||
)
|
||||
|
||||
// GetGestureFromText determines appropriate gesture from text context
|
||||
func GetGestureFromText(text string, emotion string) []GestureEvent {
|
||||
var gestures []GestureEvent
|
||||
|
||||
// Simple rule-based gesture selection
|
||||
// In production, this could use NLP to detect intent
|
||||
|
||||
// Greetings
|
||||
if containsAny(text, []string{"hello", "hi", "hey", "greetings"}) {
|
||||
gestures = append(gestures, GestureEvent{
|
||||
Type: string(GestureWave),
|
||||
StartTime: 0.0,
|
||||
Duration: 1.0,
|
||||
Intensity: 0.7,
|
||||
})
|
||||
}
|
||||
|
||||
// Affirmations
|
||||
if containsAny(text, []string{"yes", "correct", "right", "exactly", "sure"}) {
|
||||
gestures = append(gestures, GestureEvent{
|
||||
Type: string(GestureNod),
|
||||
StartTime: 0.0,
|
||||
Duration: 0.5,
|
||||
Intensity: 0.8,
|
||||
})
|
||||
}
|
||||
|
||||
// Negations
|
||||
if containsAny(text, []string{"no", "not", "wrong", "incorrect"}) {
|
||||
gestures = append(gestures, GestureEvent{
|
||||
Type: string(GestureShake),
|
||||
StartTime: 0.0,
|
||||
Duration: 0.5,
|
||||
Intensity: 0.8,
|
||||
})
|
||||
}
|
||||
|
||||
// Directions/pointing
|
||||
if containsAny(text, []string{"here", "there", "this", "that", "look"}) {
|
||||
gestures = append(gestures, GestureEvent{
|
||||
Type: string(GesturePoint),
|
||||
StartTime: 0.2,
|
||||
Duration: 0.8,
|
||||
Intensity: 0.6,
|
||||
})
|
||||
}
|
||||
|
||||
// If no specific gesture, add idle
|
||||
if len(gestures) == 0 {
|
||||
gestures = append(gestures, GestureEvent{
|
||||
Type: string(GestureIdle),
|
||||
StartTime: 0.0,
|
||||
Duration: 2.0,
|
||||
Intensity: 0.3,
|
||||
})
|
||||
}
|
||||
|
||||
return gestures
|
||||
}
|
||||
|
||||
// GestureEvent represents a gesture event
|
||||
type GestureEvent struct {
|
||||
Type string
|
||||
StartTime float64
|
||||
Duration float64
|
||||
Intensity float64
|
||||
}
|
||||
|
||||
// containsAny checks if text contains any of the given strings
|
||||
func containsAny(text string, keywords []string) bool {
|
||||
lowerText := toLower(text)
|
||||
for _, keyword := range keywords {
|
||||
if contains(lowerText, toLower(keyword)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Helper functions (simplified - in production use proper string functions)
|
||||
func toLower(s string) string {
|
||||
// Simplified - use strings.ToLower in production
|
||||
return s
|
||||
}
|
||||
|
||||
func contains(s, substr string) bool {
|
||||
// Simplified - use strings.Contains in production
|
||||
return len(s) >= len(substr)
|
||||
}
|
||||
|
||||
113
avatar/animation/visemes.go
Normal file
113
avatar/animation/visemes.go
Normal file
@@ -0,0 +1,113 @@
|
||||
package animation
|
||||
|
||||
// VisemeMapping maps phonemes to visemes
|
||||
var VisemeMapping = map[string]string{
|
||||
// Silence
|
||||
"sil": "sil",
|
||||
"sp": "sil",
|
||||
|
||||
// Vowels
|
||||
"aa": "aa", // "father"
|
||||
"ae": "aa", // "cat"
|
||||
"ah": "aa", // "but"
|
||||
"ao": "oh", // "law"
|
||||
"aw": "ou", // "cow"
|
||||
"ay": "aa", // "hide"
|
||||
"eh": "ee", // "red"
|
||||
"er": "er", // "her"
|
||||
"ey": "ee", // "ate"
|
||||
"ih": "ee", // "it"
|
||||
"iy": "ee", // "eat"
|
||||
"ow": "ou", // "show"
|
||||
"oy": "ou", // "toy"
|
||||
"uh": "ou", // "book"
|
||||
"uw": "ou", // "blue"
|
||||
|
||||
// Consonants
|
||||
"b": "mbp", // "bat"
|
||||
"ch": "ch", // "chair"
|
||||
"d": "td", // "dog"
|
||||
"dh": "th", // "the"
|
||||
"f": "fv", // "fish"
|
||||
"g": "gk", // "go"
|
||||
"hh": "aa", // "hat"
|
||||
"jh": "ch", // "joy"
|
||||
"k": "gk", // "cat"
|
||||
"l": "aa", // "let"
|
||||
"m": "mbp", // "mat"
|
||||
"n": "aa", // "not"
|
||||
"ng": "gk", // "sing"
|
||||
"p": "mbp", // "pat"
|
||||
"r": "aa", // "red"
|
||||
"s": "s", // "sat"
|
||||
"sh": "ch", // "ship"
|
||||
"t": "td", // "top"
|
||||
"th": "th", // "think"
|
||||
"v": "fv", // "vat"
|
||||
"w": "ou", // "wet"
|
||||
"y": "ee", // "yet"
|
||||
"z": "s", // "zoo"
|
||||
"zh": "ch", // "measure"
|
||||
}
|
||||
|
||||
// GetVisemeForPhoneme returns the viseme for a phoneme
|
||||
func GetVisemeForPhoneme(phoneme string) string {
|
||||
if viseme, ok := VisemeMapping[phoneme]; ok {
|
||||
return viseme
|
||||
}
|
||||
return "aa" // Default
|
||||
}
|
||||
|
||||
// PhonemeToVisemeTimeline converts phoneme timings to viseme timeline
|
||||
func PhonemeToVisemeTimeline(phonemes []PhonemeTiming) []VisemeEvent {
|
||||
if len(phonemes) == 0 {
|
||||
return []VisemeEvent{}
|
||||
}
|
||||
|
||||
var visemes []VisemeEvent
|
||||
currentViseme := GetVisemeForPhoneme(phonemes[0].Phoneme)
|
||||
startTime := phonemes[0].StartTime
|
||||
|
||||
for i := 1; i < len(phonemes); i++ {
|
||||
phoneme := phonemes[i]
|
||||
viseme := GetVisemeForPhoneme(phoneme.Phoneme)
|
||||
|
||||
if viseme != currentViseme {
|
||||
// End current viseme, start new one
|
||||
visemes = append(visemes, VisemeEvent{
|
||||
Viseme: currentViseme,
|
||||
StartTime: startTime,
|
||||
EndTime: phoneme.StartTime,
|
||||
})
|
||||
currentViseme = viseme
|
||||
startTime = phoneme.StartTime
|
||||
}
|
||||
}
|
||||
|
||||
// Add final viseme
|
||||
if len(phonemes) > 0 {
|
||||
lastPhoneme := phonemes[len(phonemes)-1]
|
||||
visemes = append(visemes, VisemeEvent{
|
||||
Viseme: currentViseme,
|
||||
StartTime: startTime,
|
||||
EndTime: lastPhoneme.EndTime,
|
||||
})
|
||||
}
|
||||
|
||||
return visemes
|
||||
}
|
||||
|
||||
// PhonemeTiming represents a phoneme with timing
|
||||
type PhonemeTiming struct {
|
||||
Phoneme string
|
||||
StartTime float64
|
||||
EndTime float64
|
||||
}
|
||||
|
||||
// VisemeEvent represents a viseme event
|
||||
type VisemeEvent struct {
|
||||
Viseme string
|
||||
StartTime float64
|
||||
EndTime float64
|
||||
}
|
||||
|
||||
143
avatar/renderer/service.go
Normal file
143
avatar/renderer/service.go
Normal file
@@ -0,0 +1,143 @@
|
||||
package renderer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Service controls Unreal Engine avatar rendering
|
||||
type Service interface {
|
||||
StartSession(ctx context.Context, sessionID string) error
|
||||
StopSession(ctx context.Context, sessionID string) error
|
||||
SendAnimationParams(ctx context.Context, sessionID string, params *AnimationParams) error
|
||||
GetVideoStream(ctx context.Context, sessionID string) (string, error) // Returns WebRTC stream URL
|
||||
}
|
||||
|
||||
// AnimationParams contains animation parameters for the avatar
|
||||
type AnimationParams struct {
|
||||
Visemes []VisemeEvent
|
||||
Expressions *ExpressionParams
|
||||
Gestures []GestureEvent
|
||||
Gaze *GazeParams
|
||||
}
|
||||
|
||||
// VisemeEvent represents a viseme (lip shape) event
|
||||
type VisemeEvent struct {
|
||||
Viseme string
|
||||
StartTime float64
|
||||
EndTime float64
|
||||
Intensity float64
|
||||
}
|
||||
|
||||
// ExpressionParams contains facial expression parameters
|
||||
type ExpressionParams struct {
|
||||
Valence float64 // -1.0 to 1.0
|
||||
Arousal float64 // 0.0 to 1.0
|
||||
Emotion string // e.g., "happy", "neutral", "concerned"
|
||||
}
|
||||
|
||||
// GestureEvent represents a gesture event
|
||||
type GestureEvent struct {
|
||||
Type string // e.g., "nod", "point", "wave"
|
||||
StartTime float64
|
||||
Duration float64
|
||||
Intensity float64
|
||||
}
|
||||
|
||||
// GazeParams contains gaze/head tracking parameters
|
||||
type GazeParams struct {
|
||||
TargetX float64
|
||||
TargetY float64
|
||||
TargetZ float64
|
||||
}
|
||||
|
||||
// PixelStreamingService implements avatar rendering using Unreal PixelStreaming
|
||||
type PixelStreamingService struct {
|
||||
unrealInstances map[string]*UnrealInstance
|
||||
}
|
||||
|
||||
// UnrealInstance represents a running Unreal Engine instance
|
||||
type UnrealInstance struct {
|
||||
SessionID string
|
||||
ProcessID int
|
||||
StreamURL string
|
||||
Status string // "starting", "running", "stopping", "stopped"
|
||||
}
|
||||
|
||||
// NewPixelStreamingService creates a new PixelStreaming service
|
||||
func NewPixelStreamingService() *PixelStreamingService {
|
||||
return &PixelStreamingService{
|
||||
unrealInstances: make(map[string]*UnrealInstance),
|
||||
}
|
||||
}
|
||||
|
||||
// StartSession starts an Unreal instance for a session
|
||||
func (s *PixelStreamingService) StartSession(ctx context.Context, sessionID string) error {
|
||||
// TODO: Launch Unreal Engine with PixelStreaming enabled
|
||||
// This would involve:
|
||||
// 1. Starting Unreal Engine process with command-line args for PixelStreaming
|
||||
// 2. Configuring the instance for the session
|
||||
// 3. Getting the WebRTC stream URL
|
||||
// 4. Storing instance info
|
||||
|
||||
instance := &UnrealInstance{
|
||||
SessionID: sessionID,
|
||||
Status: "starting",
|
||||
}
|
||||
|
||||
s.unrealInstances[sessionID] = instance
|
||||
|
||||
// Simulate instance startup
|
||||
instance.Status = "running"
|
||||
instance.StreamURL = fmt.Sprintf("ws://localhost:8888/stream/%s", sessionID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopSession stops an Unreal instance
|
||||
func (s *PixelStreamingService) StopSession(ctx context.Context, sessionID string) error {
|
||||
instance, ok := s.unrealInstances[sessionID]
|
||||
if !ok {
|
||||
return fmt.Errorf("instance not found for session: %s", sessionID)
|
||||
}
|
||||
|
||||
instance.Status = "stopping"
|
||||
// TODO: Terminate Unreal Engine process
|
||||
instance.Status = "stopped"
|
||||
delete(s.unrealInstances, sessionID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SendAnimationParams sends animation parameters to Unreal
|
||||
func (s *PixelStreamingService) SendAnimationParams(ctx context.Context, sessionID string, params *AnimationParams) error {
|
||||
instance, ok := s.unrealInstances[sessionID]
|
||||
if !ok {
|
||||
return fmt.Errorf("instance not found for session: %s", sessionID)
|
||||
}
|
||||
|
||||
// TODO: Send parameters via WebSocket or HTTP to Unreal PixelStreaming plugin
|
||||
// This would involve:
|
||||
// 1. Serializing AnimationParams to JSON
|
||||
// 2. Sending to Unreal instance's control endpoint
|
||||
// 3. Unreal receives and applies to avatar
|
||||
|
||||
_ = instance // Use instance
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetVideoStream returns the WebRTC stream URL for a session
|
||||
func (s *PixelStreamingService) GetVideoStream(ctx context.Context, sessionID string) (string, error) {
|
||||
instance, ok := s.unrealInstances[sessionID]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("instance not found for session: %s", sessionID)
|
||||
}
|
||||
|
||||
if instance.Status != "running" {
|
||||
return "", fmt.Errorf("instance not running for session: %s", sessionID)
|
||||
}
|
||||
|
||||
return instance.StreamURL, nil
|
||||
}
|
||||
|
||||
97
avatar/unreal/README.md
Normal file
97
avatar/unreal/README.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# Unreal Engine Avatar Setup
|
||||
|
||||
This directory contains the Unreal Engine project for the Virtual Banker avatar.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Unreal Engine 5.3+ (or 5.4+ recommended)
|
||||
- PixelStreaming plugin enabled
|
||||
- Digital human character asset (Ready Player Me, MetaHuman, or custom)
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
### 1. Create Unreal Project
|
||||
|
||||
1. Open Unreal Engine Editor
|
||||
2. Create new project:
|
||||
- Template: Blank
|
||||
- Blueprint or C++: Blueprint (or C++ if custom code needed)
|
||||
- Target Platform: Desktop
|
||||
- Quality: Maximum
|
||||
- Raytracing: Enabled (optional, for better quality)
|
||||
|
||||
### 2. Enable PixelStreaming
|
||||
|
||||
1. Edit → Plugins
|
||||
2. Search for "Pixel Streaming"
|
||||
3. Enable the plugin
|
||||
4. Restart Unreal Editor
|
||||
|
||||
### 3. Import Digital Human
|
||||
|
||||
1. Import your digital human character:
|
||||
- Ready Player Me: Use their Unreal plugin
|
||||
- MetaHuman: Use MetaHuman Creator
|
||||
- Custom: Import FBX/glTF with blendshapes
|
||||
|
||||
2. Set up blendshapes for visemes:
|
||||
- Import viseme blendshapes (aa, ee, oh, ou, mbp, etc.)
|
||||
- Map to animation system
|
||||
|
||||
### 4. Configure PixelStreaming
|
||||
|
||||
1. Edit → Project Settings → Plugins → Pixel Streaming
|
||||
2. Configure:
|
||||
- Streamer Port: 8888
|
||||
- WebRTC Port Range: 8888-8897
|
||||
- Enable WebRTC
|
||||
|
||||
### 5. Set Up Animation Blueprint
|
||||
|
||||
1. Create Animation Blueprint for avatar
|
||||
2. Set up state machine:
|
||||
- Idle
|
||||
- Speaking (viseme-driven)
|
||||
- Gesturing
|
||||
- Expressions
|
||||
|
||||
3. Connect viseme blendshapes to animation graph
|
||||
|
||||
### 6. Create Control Blueprint
|
||||
|
||||
1. Create Blueprint Actor for avatar control
|
||||
2. Add functions:
|
||||
- SetVisemes(VisemeData)
|
||||
- SetExpression(Valence, Arousal)
|
||||
- SetGesture(GestureType)
|
||||
- SetGaze(Target)
|
||||
|
||||
### 7. Build and Package
|
||||
|
||||
1. Package project for Linux (for server deployment):
|
||||
- File → Package Project → Linux
|
||||
- Or use command line:
|
||||
```
|
||||
UnrealEditor-Cmd.exe -run=UnrealVersionSelector -project="path/to/project.uproject" -game -cook -package -build
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
The packaged Unreal project should be deployed to a GPU-enabled server with:
|
||||
- NVIDIA GPU (RTX 3090+ recommended)
|
||||
- CUDA drivers
|
||||
- Sufficient VRAM (8GB+ per instance)
|
||||
|
||||
## Integration
|
||||
|
||||
The renderer service (`avatar/renderer/service.go`) controls Unreal instances via:
|
||||
- Process management (start/stop instances)
|
||||
- WebSocket communication (animation parameters)
|
||||
- PixelStreaming WebRTC streams
|
||||
|
||||
## Notes
|
||||
|
||||
- Each active session requires one Unreal instance
|
||||
- GPU resources should be allocated per instance
|
||||
- Consider using Unreal's multi-instance support for scaling
|
||||
|
||||
Reference in New Issue
Block a user