Files
NYSM-NYD/docs/performance.md

1004 lines
29 KiB
Markdown
Raw Normal View History

# Performance Optimization Guide
This guide provides comprehensive strategies for optimizing NowYouSeeMe performance across all system components. Follow these guidelines to achieve the best possible performance for your specific use case.
## 🎯 Performance Targets
### Real-time Requirements
| Metric | Target | Acceptable Range | Critical |
|--------|--------|------------------|----------|
| **Latency** | <20ms | 15-25ms | >30ms |
| **Accuracy** | <10cm | 8-15cm | >20cm |
| **Frame Rate** | 30-60 FPS | 25-60 FPS | <20 FPS |
| **CSI Rate** | ≥100 pkt/s | 80-120 pkt/s | <50 pkt/s |
### Resource Utilization
| Component | CPU Target | GPU Target | Memory Target |
|-----------|------------|------------|---------------|
| **Camera Capture** | <10% | N/A | <500MB |
| **CSI Processing** | <15% | N/A | <1GB |
| **Vision SLAM** | <40% | <60% | <2GB |
| **RF SLAM** | <20% | N/A | <1GB |
| **Sensor Fusion** | <15% | <20% | <1GB |
| **Rendering** | <10% | <80% | <2GB |
## 🔧 Hardware Optimization
### GPU Configuration
#### NVIDIA GPU Setup
```bash
# Check GPU status
nvidia-smi
# Set GPU power management
sudo nvidia-smi -pm 1
# Set GPU memory allocation
export CUDA_VISIBLE_DEVICES=0
export CUDA_MEMORY_FRACTION=0.8
# Optimize GPU settings
nvidia-settings --assign GPUPowerMizerMode=1
```
#### GPU Memory Optimization
```python
# In your application
import torch
import cupy as cp
# Set memory fraction
torch.cuda.set_per_process_memory_fraction(0.8)
# Clear cache periodically
torch.cuda.empty_cache()
cp.get_default_memory_pool().free_all_blocks()
```
### CPU Optimization
#### Multi-threading Configuration
```python
# Configure thread pools
import multiprocessing as mp
# Set optimal thread count
optimal_threads = min(mp.cpu_count(), 8)
mp.set_start_method('spawn', force=True)
# Configure OpenMP
import os
os.environ['OMP_NUM_THREADS'] = str(optimal_threads)
os.environ['MKL_NUM_THREADS'] = str(optimal_threads)
```
#### CPU Affinity
```bash
# Set CPU affinity for critical processes
sudo taskset -cp 0-3 <process_id>
# Or in Python
import os
os.sched_setaffinity(0, {0, 1, 2, 3})
```
### Memory Optimization
#### Memory Management
```python
# Monitor memory usage
import psutil
import gc
def optimize_memory():
"""Optimize memory usage"""
# Force garbage collection
gc.collect()
# Clear caches
torch.cuda.empty_cache()
# Monitor memory
process = psutil.Process()
memory_mb = process.memory_info().rss / 1024 / 1024
print(f"Memory usage: {memory_mb:.1f} MB")
```
#### Memory Pooling
```python
# Use memory pools for frequent allocations
import numpy as np
from memory_profiler import profile
class MemoryPool:
def __init__(self, size=1000):
self.pool = []
self.size = size
def get_array(self, shape, dtype=np.float32):
if self.pool:
return self.pool.pop().reshape(shape)
return np.zeros(shape, dtype=dtype)
def return_array(self, array):
if len(self.pool) < self.size:
self.pool.append(array.flatten())
```
## 📊 Performance Monitoring
### Real-time Monitoring
```python
import time
import threading
from collections import deque
class PerformanceMonitor:
def __init__(self):
self.metrics = {
'latency': deque(maxlen=100),
'fps': deque(maxlen=100),
'accuracy': deque(maxlen=100),
'cpu_usage': deque(maxlen=100),
'gpu_usage': deque(maxlen=100),
'memory_usage': deque(maxlen=100)
}
self.running = False
self.monitor_thread = None
def start_monitoring(self):
"""Start performance monitoring"""
self.running = True
self.monitor_thread = threading.Thread(target=self._monitor_loop)
self.monitor_thread.start()
def stop_monitoring(self):
"""Stop performance monitoring"""
self.running = False
if self.monitor_thread:
self.monitor_thread.join()
def _monitor_loop(self):
"""Main monitoring loop"""
while self.running:
# Collect metrics
self._collect_metrics()
time.sleep(0.1) # 10Hz monitoring
def _collect_metrics(self):
"""Collect current performance metrics"""
# CPU usage
cpu_percent = psutil.cpu_percent()
self.metrics['cpu_usage'].append(cpu_percent)
# Memory usage
memory = psutil.virtual_memory()
self.metrics['memory_usage'].append(memory.percent)
# GPU usage (if available)
try:
import pynvml
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
gpu_util = pynvml.nvmlDeviceGetUtilizationRates(handle)
self.metrics['gpu_usage'].append(gpu_util.gpu)
except:
self.metrics['gpu_usage'].append(0)
def get_average_metrics(self):
"""Get average metrics over the last 100 samples"""
return {
metric: sum(values) / len(values) if values else 0
for metric, values in self.metrics.items()
}
def get_performance_report(self):
"""Generate performance report"""
avg_metrics = self.get_average_metrics()
report = {
'status': 'optimal' if self._check_targets(avg_metrics) else 'needs_optimization',
'metrics': avg_metrics,
'recommendations': self._generate_recommendations(avg_metrics)
}
return report
def _check_targets(self, metrics):
"""Check if metrics meet targets"""
return (
metrics.get('latency', 0) < 20 and
metrics.get('fps', 0) > 30 and
metrics.get('accuracy', 0) < 10
)
def _generate_recommendations(self, metrics):
"""Generate optimization recommendations"""
recommendations = []
if metrics.get('latency', 0) > 20:
recommendations.append("High latency detected - consider reducing processing load")
if metrics.get('fps', 0) < 30:
recommendations.append("Low frame rate - check GPU utilization and rendering settings")
if metrics.get('cpu_usage', 0) > 80:
recommendations.append("High CPU usage - consider reducing thread count or processing quality")
if metrics.get('memory_usage', 0) > 80:
recommendations.append("High memory usage - consider clearing caches or reducing buffer sizes")
return recommendations
```
### Profiling Tools
#### CPU Profiling
```python
import cProfile
import pstats
import io
def profile_function(func, *args, **kwargs):
"""Profile a function's performance"""
pr = cProfile.Profile()
pr.enable()
result = func(*args, **kwargs)
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats(20)
print(s.getvalue())
return result
```
#### Memory Profiling
```python
from memory_profiler import profile
@profile
def memory_intensive_function():
"""Function to profile memory usage"""
# Your memory-intensive code here
pass
```
#### GPU Profiling
```python
import torch
def profile_gpu_operations():
"""Profile GPU operations"""
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
record_shapes=True,
with_stack=True
) as prof:
# Your GPU operations here
pass
print(prof.key_averages().table(sort_by="cuda_time_total"))
```
## ⚡ Algorithm Optimization
### Vision SLAM Optimization
#### Feature Detection Optimization
```python
import cv2
import numpy as np
class OptimizedFeatureDetector:
def __init__(self, max_features=1000, quality_level=0.01):
self.max_features = max_features
self.quality_level = quality_level
self.detector = cv2.FastFeatureDetector_create(
threshold=10,
nonmaxSuppression=True
)
def detect_features(self, image):
"""Optimized feature detection"""
# Convert to grayscale if needed
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Detect features
keypoints = self.detector.detect(gray)
# Limit number of features
if len(keypoints) > self.max_features:
keypoints = sorted(keypoints, key=lambda x: x.response, reverse=True)
keypoints = keypoints[:self.max_features]
return keypoints
```
#### Tracking Optimization
```python
class OptimizedTracker:
def __init__(self):
self.prev_frame = None
self.prev_keypoints = None
self.prev_descriptors = None
def track_features(self, frame, keypoints, descriptors):
"""Optimized feature tracking"""
if self.prev_frame is None:
self.prev_frame = frame
self.prev_keypoints = keypoints
self.prev_descriptors = descriptors
return keypoints, descriptors
# Use optical flow for fast tracking
if len(self.prev_keypoints) > 0:
prev_pts = np.float32([kp.pt for kp in self.prev_keypoints]).reshape(-1, 1, 2)
curr_pts, status, error = cv2.calcOpticalFlowPyrLK(
self.prev_frame, frame, prev_pts, None
)
# Filter good matches
good_old = self.prev_keypoints[status.ravel() == 1]
good_new = keypoints[status.ravel() == 1]
# Update tracking state
self.prev_frame = frame
self.prev_keypoints = good_new
self.prev_descriptors = descriptors[status.ravel() == 1]
return good_new, self.prev_descriptors
return keypoints, descriptors
```
### RF SLAM Optimization
#### CSI Processing Optimization
```python
import numpy as np
from scipy import signal
class OptimizedCSIProcessor:
def __init__(self, sample_rate=1000, window_size=64):
self.sample_rate = sample_rate
self.window_size = window_size
self.window = signal.windows.hann(window_size)
def process_csi_packet(self, csi_data):
"""Optimized CSI packet processing"""
# Apply window function
windowed_data = csi_data * self.window
# FFT with optimized size
fft_size = 2**int(np.log2(len(windowed_data)))
spectrum = np.fft.fft(windowed_data, fft_size)
# Extract relevant frequency bins
relevant_bins = spectrum[:fft_size//2]
return relevant_bins
def estimate_aoa(self, csi_packets):
"""Optimized AoA estimation"""
# Process multiple packets
processed_packets = [self.process_csi_packet(packet) for packet in csi_packets]
# Use MUSIC algorithm for AoA estimation
# (Simplified implementation)
correlation_matrix = np.corrcoef(processed_packets)
eigenvalues, eigenvectors = np.linalg.eigh(correlation_matrix)
# Estimate AoA from eigenstructure
noise_subspace = eigenvectors[:, :-3] # Assume 3 sources
aoa_spectrum = self._music_spectrum(noise_subspace)
return np.argmax(aoa_spectrum)
def _music_spectrum(self, noise_subspace):
"""MUSIC algorithm spectrum"""
# Simplified MUSIC implementation
angles = np.linspace(-np.pi/2, np.pi/2, 180)
spectrum = np.zeros(len(angles))
for i, angle in enumerate(angles):
steering_vector = np.exp(1j * 2 * np.pi * np.arange(4) * np.sin(angle))
spectrum[i] = 1 / (steering_vector.conj() @ noise_subspace @ noise_subspace.conj().T @ steering_vector)
return spectrum
```
### Sensor Fusion Optimization
#### EKF Optimization
```python
import numpy as np
from scipy.linalg import solve_discrete_lyapunov
class OptimizedEKF:
def __init__(self, state_dim=6, measurement_dim=3):
self.state_dim = state_dim
self.measurement_dim = measurement_dim
# Initialize state and covariance
self.x = np.zeros(state_dim)
self.P = np.eye(state_dim) * 0.1
# Process and measurement noise
self.Q = np.eye(state_dim) * 0.01
self.R = np.eye(measurement_dim) * 0.1
def predict(self, dt):
"""Optimized prediction step"""
# State transition matrix (constant velocity model)
F = np.eye(self.state_dim)
F[:3, 3:6] = np.eye(3) * dt
# Predict state
self.x = F @ self.x
# Predict covariance
self.P = F @ self.P @ F.T + self.Q
def update(self, measurement):
"""Optimized update step"""
# Measurement matrix
H = np.zeros((self.measurement_dim, self.state_dim))
H[:3, :3] = np.eye(3)
# Kalman gain
S = H @ self.P @ H.T + self.R
K = self.P @ H.T @ np.linalg.inv(S)
# Update state and covariance
y = measurement - H @ self.x
self.x = self.x + K @ y
self.P = (np.eye(self.state_dim) - K @ H) @ self.P
def get_pose(self):
"""Get current pose estimate"""
return {
'position': self.x[:3],
'velocity': self.x[3:6],
'covariance': self.P
}
```
## 🎨 Rendering Optimization
### OpenGL Optimization
```python
import OpenGL.GL as gl
import numpy as np
class OptimizedRenderer:
def __init__(self):
self.shader_program = None
self.vao = None
self.vbo = None
self.ebo = None
self.setup_gl()
def setup_gl(self):
"""Setup OpenGL for optimal performance"""
# Enable optimizations
gl.glEnable(gl.GL_DEPTH_TEST)
gl.glEnable(gl.GL_CULL_FACE)
gl.glEnable(gl.GL_BLEND)
gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA)
# Set clear color
gl.glClearColor(0.1, 0.1, 0.1, 1.0)
def create_shader_program(self, vertex_source, fragment_source):
"""Create optimized shader program"""
vertex_shader = gl.glCreateShader(gl.GL_VERTEX_SHADER)
gl.glShaderSource(vertex_shader, vertex_source)
gl.glCompileShader(vertex_shader)
fragment_shader = gl.glCreateShader(gl.GL_FRAGMENT_SHADER)
gl.glShaderSource(fragment_shader, fragment_source)
gl.glCompileShader(fragment_shader)
program = gl.glCreateProgram()
gl.glAttachShader(program, vertex_shader)
gl.glAttachShader(program, fragment_shader)
gl.glLinkProgram(program)
# Clean up shaders
gl.glDeleteShader(vertex_shader)
gl.glDeleteShader(fragment_shader)
return program
def setup_buffers(self, vertices, indices):
"""Setup optimized vertex buffers"""
# Create VAO
self.vao = gl.glGenVertexArrays(1)
gl.glBindVertexArray(self.vao)
# Create VBO
self.vbo = gl.glGenBuffers(1)
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self.vbo)
gl.glBufferData(gl.GL_ARRAY_BUFFER, vertices.nbytes, vertices, gl.GL_STATIC_DRAW)
# Create EBO
self.ebo = gl.glGenBuffers(1)
gl.glBindBuffer(gl.GL_ELEMENT_ARRAY_BUFFER, self.ebo)
gl.glBufferData(gl.GL_ELEMENT_ARRAY_BUFFER, indices.nbytes, indices, gl.GL_STATIC_DRAW)
# Set vertex attributes
gl.glVertexAttribPointer(0, 3, gl.GL_FLOAT, gl.GL_FALSE, 24, None)
gl.glEnableVertexAttribArray(0)
gl.glVertexAttribPointer(1, 3, gl.GL_FLOAT, gl.GL_FALSE, 24, ctypes.c_void_p(12))
gl.glEnableVertexAttribArray(1)
def render_frame(self, pose_data):
"""Optimized frame rendering"""
# Clear buffers
gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT)
# Use shader program
gl.glUseProgram(self.shader_program)
# Update uniform matrices
self.update_matrices(pose_data)
# Bind VAO and draw
gl.glBindVertexArray(self.vao)
gl.glDrawElements(gl.GL_TRIANGLES, self.index_count, gl.GL_UNSIGNED_INT, None)
def update_matrices(self, pose_data):
"""Update transformation matrices"""
# Calculate view and projection matrices
view_matrix = self.calculate_view_matrix(pose_data)
projection_matrix = self.calculate_projection_matrix()
# Upload to GPU
gl.glUniformMatrix4fv(self.view_location, 1, gl.GL_FALSE, view_matrix)
gl.glUniformMatrix4fv(self.projection_location, 1, gl.GL_FALSE, projection_matrix)
```
### NeRF Rendering Optimization
```python
import torch
import torch.nn as nn
class OptimizedNeRFRenderer:
def __init__(self, model_path, device='cuda'):
self.device = device
self.model = self.load_model(model_path)
self.model.to(device)
self.model.eval()
# Optimization settings
self.chunk_size = 4096
self.num_samples = 64
def load_model(self, model_path):
"""Load optimized NeRF model"""
# Load pre-trained model
model = torch.load(model_path, map_location=self.device)
return model
@torch.no_grad()
def render_rays(self, rays_o, rays_d, near, far):
"""Optimized ray rendering"""
# Process rays in chunks
outputs = []
for i in range(0, rays_o.shape[0], self.chunk_size):
chunk_o = rays_o[i:i+self.chunk_size]
chunk_d = rays_d[i:i+self.chunk_size]
# Render chunk
chunk_output = self._render_chunk(chunk_o, chunk_d, near, far)
outputs.append(chunk_output)
# Combine outputs
return torch.cat(outputs, dim=0)
def _render_chunk(self, rays_o, rays_d, near, far):
"""Render a chunk of rays"""
# Sample points along rays
t_vals = torch.linspace(0., 1., self.num_samples, device=self.device)
z_vals = near * (1. - t_vals) + far * t_vals
# Expand dimensions
z_vals = z_vals.unsqueeze(0).expand(rays_o.shape[0], -1)
# Sample points
pts = rays_o.unsqueeze(1) + rays_d.unsqueeze(1) * z_vals.unsqueeze(-1)
# Query network
rgb, sigma = self.model(pts, rays_d)
# Volume rendering
rgb_final = self._volume_render(rgb, sigma, z_vals)
return rgb_final
def _volume_render(self, rgb, sigma, z_vals):
"""Volume rendering integration"""
# Calculate distances
dists = z_vals[..., 1:] - z_vals[..., :-1]
dists = torch.cat([dists, torch.tensor([1e10], device=self.device).expand(dists[..., :1].shape)], -1)
# Calculate alpha
alpha = 1. - torch.exp(-sigma * dists)
# Calculate weights
weights = alpha * torch.cumprod(torch.cat([torch.ones((alpha.shape[0], 1), device=self.device), 1.-alpha + 1e-10], -1), -1)[:, :-1]
# Integrate
rgb_final = torch.sum(weights.unsqueeze(-1) * rgb, -2)
return rgb_final
```
## 🔧 Configuration Optimization
### Performance Configuration
```json
{
"performance": {
"target_latency": 20,
"target_fps": 30,
"target_accuracy": 10,
"max_cpu_usage": 80,
"max_gpu_usage": 90,
"max_memory_usage": 80
},
"processing": {
"vision_slam": {
"max_features": 1000,
"min_features": 100,
"update_rate": 30,
"quality_level": 0.01
},
"rf_slam": {
"packet_rate": 100,
"aoa_estimation": "music",
"filter_window": 10
},
"sensor_fusion": {
"fusion_method": "ekf",
"vision_weight": 0.7,
"rf_weight": 0.3,
"process_noise": 0.01
}
},
"rendering": {
"quality": "high",
"resolution": [1280, 720],
"vsync": true,
"antialiasing": true,
"shadow_quality": "medium"
},
"optimization": {
"use_gpu": true,
"use_multithreading": true,
"memory_pooling": true,
"chunk_processing": true
}
}
```
### Dynamic Configuration
```python
class DynamicConfigManager:
def __init__(self, base_config):
self.base_config = base_config
self.current_config = base_config.copy()
self.performance_monitor = PerformanceMonitor()
def optimize_config(self):
"""Dynamically optimize configuration based on performance"""
metrics = self.performance_monitor.get_average_metrics()
# Adjust based on latency
if metrics.get('latency', 0) > 25:
self._reduce_processing_load()
# Adjust based on frame rate
if metrics.get('fps', 0) < 25:
self._reduce_rendering_quality()
# Adjust based on CPU usage
if metrics.get('cpu_usage', 0) > 85:
self._reduce_thread_count()
# Adjust based on memory usage
if metrics.get('memory_usage', 0) > 85:
self._reduce_buffer_sizes()
def _reduce_processing_load(self):
"""Reduce processing load"""
self.current_config['processing']['vision_slam']['max_features'] = max(
500, self.current_config['processing']['vision_slam']['max_features'] - 100
)
self.current_config['processing']['vision_slam']['update_rate'] = max(
20, self.current_config['processing']['vision_slam']['update_rate'] - 5
)
def _reduce_rendering_quality(self):
"""Reduce rendering quality"""
quality_levels = ['high', 'medium', 'low']
current_quality = self.current_config['rendering']['quality']
current_index = quality_levels.index(current_quality)
if current_index < len(quality_levels) - 1:
self.current_config['rendering']['quality'] = quality_levels[current_index + 1]
def _reduce_thread_count(self):
"""Reduce thread count"""
# Implementation for reducing thread count
pass
def _reduce_buffer_sizes(self):
"""Reduce buffer sizes"""
# Implementation for reducing buffer sizes
pass
```
## 📊 Performance Testing
### Benchmark Suite
```python
import time
import statistics
class PerformanceBenchmark:
def __init__(self):
self.results = {}
def benchmark_latency(self, func, *args, iterations=100):
"""Benchmark function latency"""
times = []
for _ in range(iterations):
start_time = time.perf_counter()
func(*args)
end_time = time.perf_counter()
times.append((end_time - start_time) * 1000) # Convert to ms
return {
'mean': statistics.mean(times),
'median': statistics.median(times),
'std': statistics.stdev(times),
'min': min(times),
'max': max(times),
'p95': statistics.quantiles(times, n=20)[18], # 95th percentile
'p99': statistics.quantiles(times, n=100)[98] # 99th percentile
}
def benchmark_throughput(self, func, *args, duration=10):
"""Benchmark function throughput"""
start_time = time.perf_counter()
count = 0
while time.perf_counter() - start_time < duration:
func(*args)
count += 1
return count / duration # Operations per second
def benchmark_memory(self, func, *args):
"""Benchmark memory usage"""
import psutil
import gc
# Force garbage collection
gc.collect()
# Get initial memory
process = psutil.Process()
initial_memory = process.memory_info().rss
# Run function
func(*args)
# Get final memory
final_memory = process.memory_info().rss
return final_memory - initial_memory # Memory increase in bytes
def run_full_benchmark(self):
"""Run complete performance benchmark"""
benchmark_results = {}
# Benchmark camera capture
benchmark_results['camera_capture'] = self.benchmark_latency(
self.camera_capture_test
)
# Benchmark CSI processing
benchmark_results['csi_processing'] = self.benchmark_latency(
self.csi_processing_test
)
# Benchmark SLAM processing
benchmark_results['slam_processing'] = self.benchmark_latency(
self.slam_processing_test
)
# Benchmark rendering
benchmark_results['rendering'] = self.benchmark_latency(
self.rendering_test
)
# Benchmark end-to-end
benchmark_results['end_to_end'] = self.benchmark_latency(
self.end_to_end_test
)
return benchmark_results
def generate_report(self, results):
"""Generate performance report"""
report = {
'summary': {
'total_latency': sum(r['mean'] for r in results.values()),
'bottleneck': max(results.items(), key=lambda x: x[1]['mean'])[0],
'performance_grade': self._calculate_grade(results)
},
'details': results,
'recommendations': self._generate_recommendations(results)
}
return report
def _calculate_grade(self, results):
"""Calculate overall performance grade"""
total_latency = sum(r['mean'] for r in results.values())
if total_latency < 20:
return 'A'
elif total_latency < 30:
return 'B'
elif total_latency < 40:
return 'C'
else:
return 'D'
def _generate_recommendations(self, results):
"""Generate optimization recommendations"""
recommendations = []
for component, metrics in results.items():
if metrics['mean'] > 10: # High latency threshold
recommendations.append(f"Optimize {component} - current latency: {metrics['mean']:.2f}ms")
if metrics['p99'] > metrics['mean'] * 2: # High variance
recommendations.append(f"Reduce variance in {component} - p99: {metrics['p99']:.2f}ms")
return recommendations
```
## 🚀 Deployment Optimization
### Production Configuration
```yaml
# docker-compose.prod.yml
version: '3.8'
services:
nowyouseeme:
build:
context: .
dockerfile: Dockerfile
target: production
container_name: nowyouseeme-prod
ports:
- "8080:8080"
volumes:
- ./config:/app/config:ro
- ./data:/app/data
- ./logs:/app/logs
environment:
- PYTHONPATH=/app/src
- NOWYOUSEE_DEBUG=0
- CUDA_VISIBLE_DEVICES=0
- OMP_NUM_THREADS=4
- MKL_NUM_THREADS=4
devices:
- /dev/video0:/dev/video0
- /dev/bus/usb:/dev/bus/usb
network_mode: host
restart: unless-stopped
deploy:
resources:
limits:
cpus: '4.0'
memory: 8G
reservations:
cpus: '2.0'
memory: 4G
healthcheck:
test: ["CMD", "python3", "-c", "import sys; sys.exit(0)"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
```
### Monitoring Setup
```python
# monitoring.py
import prometheus_client
from prometheus_client import Counter, Histogram, Gauge
class PerformanceMetrics:
def __init__(self):
# Define metrics
self.latency_histogram = Histogram(
'nowyouseeme_latency_seconds',
'End-to-end latency in seconds',
buckets=[0.01, 0.02, 0.03, 0.05, 0.1, 0.2, 0.5]
)
self.fps_gauge = Gauge(
'nowyouseeme_fps',
'Current frame rate'
)
self.accuracy_gauge = Gauge(
'nowyouseeme_accuracy_cm',
'Current tracking accuracy in cm'
)
self.cpu_usage_gauge = Gauge(
'nowyouseeme_cpu_usage_percent',
'CPU usage percentage'
)
self.gpu_usage_gauge = Gauge(
'nowyouseeme_gpu_usage_percent',
'GPU usage percentage'
)
self.memory_usage_gauge = Gauge(
'nowyouseeme_memory_usage_percent',
'Memory usage percentage'
)
def record_latency(self, latency_ms):
"""Record latency measurement"""
self.latency_histogram.observe(latency_ms / 1000.0)
def record_fps(self, fps):
"""Record frame rate"""
self.fps_gauge.set(fps)
def record_accuracy(self, accuracy_cm):
"""Record accuracy measurement"""
self.accuracy_gauge.set(accuracy_cm)
def record_system_metrics(self, cpu_percent, gpu_percent, memory_percent):
"""Record system metrics"""
self.cpu_usage_gauge.set(cpu_percent)
self.gpu_usage_gauge.set(gpu_percent)
self.memory_usage_gauge.set(memory_percent)
# Start metrics server
if __name__ == '__main__':
prometheus_client.start_http_server(8000)
```
---
For more detailed optimization strategies, see:
- [Architecture Guide](architecture.md) - System design and optimization
- [Troubleshooting Guide](troubleshooting.md) - Performance issue resolution
- [API Reference](API_REFERENCE.md) - Performance-related API calls