Files
Sankofa/crossplane-provider-proxmox/pkg/proxmox/client.go

1297 lines
37 KiB
Go
Raw Normal View History

package proxmox
import (
"context"
"fmt"
"strconv"
"strings"
"time"
"github.com/pkg/errors"
"github.com/sankofa/crossplane-provider-proxmox/pkg/utils"
)
// Client represents a Proxmox API client
type Client struct {
httpClient *HTTPClient
endpoint string
username string
password string
token string
insecure bool
}
// NewClient creates a new Proxmox API client
func NewClient(endpoint, username, password string, insecureSkipTLS bool) (*Client, error) {
httpClient := NewHTTPClient(endpoint, insecureSkipTLS)
client := &Client{
httpClient: httpClient,
endpoint: endpoint,
username: username,
password: password,
insecure: insecureSkipTLS,
}
// Set credentials
if username != "" && password != "" {
httpClient.SetCredentials(username, password)
}
// Authenticate if credentials provided
if username != "" && password != "" {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := httpClient.Authenticate(ctx); err != nil {
return nil, errors.Wrap(err, "failed to authenticate")
}
}
return client, nil
}
// NewClientWithToken creates a new Proxmox API client with token authentication
func NewClientWithToken(endpoint, token string, insecureSkipTLS bool) (*Client, error) {
httpClient := NewHTTPClient(endpoint, insecureSkipTLS)
httpClient.SetToken(token)
return &Client{
httpClient: httpClient,
endpoint: endpoint,
token: token,
insecure: insecureSkipTLS,
}, nil
}
// RetryConfig defines retry behavior
type RetryConfig struct {
MaxRetries int
BaseDelay time.Duration
MaxDelay time.Duration
}
// DefaultRetryConfig returns default retry configuration
func DefaultRetryConfig() RetryConfig {
return RetryConfig{
MaxRetries: 3,
BaseDelay: time.Second,
MaxDelay: 30 * time.Second,
}
}
// RetryableError indicates an error that should be retried
type RetryableError struct {
Err error
RetryAfter time.Duration
}
func (e *RetryableError) Error() string {
return e.Err.Error()
}
// IsRetryable checks if an error is retryable
func IsRetryable(err error) bool {
if err == nil {
return false
}
_, ok := err.(*RetryableError)
return ok
}
// Retry executes a function with retry logic
func Retry(ctx context.Context, fn func() error, config RetryConfig) error {
var lastErr error
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
if attempt > 0 {
delay := config.BaseDelay * time.Duration(1<<uint(attempt-1))
if delay > config.MaxDelay {
delay = config.MaxDelay
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(delay):
}
}
err := fn()
if err == nil {
return nil
}
lastErr = err
if !IsRetryable(err) {
return err
}
if attempt < config.MaxRetries {
if retryErr, ok := err.(*RetryableError); ok && retryErr.RetryAfter > 0 {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(retryErr.RetryAfter):
}
}
}
}
return errors.Wrapf(lastErr, "failed after %d retries", config.MaxRetries)
}
// CreateVM creates a virtual machine
func (c *Client) CreateVM(ctx context.Context, spec VMSpec) (*VM, error) {
config := DefaultRetryConfig()
var vm *VM
err := Retry(ctx, func() error {
var retryErr error
vm, retryErr = c.createVM(ctx, spec)
if retryErr != nil {
// Check if error is retryable (network errors, temporary failures)
if isNetworkError(retryErr) || isTemporaryError(retryErr) {
return &RetryableError{Err: retryErr}
}
return retryErr
}
return nil
}, config)
return vm, err
}
// findImageInStorage searches for an image in Proxmox storage
func (c *Client) findImageInStorage(ctx context.Context, node, imageName string) (string, error) {
// List all storages
storages, err := c.ListStorages(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to list storages")
}
// Search for image in each storage that supports images
for _, storage := range storages {
// Check if storage supports images
hasImages := false
for _, content := range storage.Content {
if content == "images" || content == "iso" || content == "vztmpl" {
hasImages = true
break
}
}
if !hasImages {
continue
}
// List content in this storage
var content []struct {
Volid string `json:"volid"`
Format string `json:"format"`
}
path := fmt.Sprintf("/nodes/%s/storage/%s/content", node, storage.Name)
if err := c.httpClient.Get(ctx, path, &content); err != nil {
continue // Skip if we can't list this storage
}
// Search for matching image
for _, item := range content {
// Check if volid contains the image name
if strings.Contains(item.Volid, imageName) {
return item.Volid, nil
}
}
}
return "", fmt.Errorf("image '%s' not found in any storage", imageName)
}
// createVM performs the actual VM creation
func (c *Client) createVM(ctx context.Context, spec VMSpec) (*VM, error) {
// First, get next available VMID
// The HTTP client's Get method already extracts the "data" field, so we get the string directly
var nextIDStr string
if err := c.httpClient.Get(ctx, "/cluster/nextid", &nextIDStr); err != nil {
return nil, errors.Wrap(err, "failed to get next VMID")
}
var vmID int
if _, err := fmt.Sscanf(nextIDStr, "%d", &vmID); err != nil {
return nil, errors.Wrap(err, "failed to parse VMID")
}
// Handle image - determine if we need to clone, import, or use existing
var diskConfig string
var needsImageImport bool
var imageVolid string
if spec.Image != "" {
// Check if image is a template ID (numeric VMID to clone from)
// Use explicit check: if image is all numeric AND within valid VMID range, treat as template
templateID, parseErr := strconv.Atoi(spec.Image)
// Only treat as template if it's a valid VMID (100-999999999) and no other interpretation
// If image name contains non-numeric chars, it's not a template ID
if parseErr == nil && templateID >= 100 && templateID <= 999999999 {
// Clone from template
cloneConfig := map[string]interface{}{
"newid": vmID,
"name": spec.Name,
"target": spec.Node,
}
var cloneResult string
if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/clone", spec.Node, templateID), cloneConfig, &cloneResult); err != nil {
return nil, errors.Wrapf(err, "failed to clone template %d", templateID)
}
// After cloning, update the VM config
vmConfig := map[string]interface{}{
"cores": spec.CPU,
"memory": parseMemory(spec.Memory),
"agent": "1", // Enable QEMU guest agent
}
if spec.TenantID != "" {
vmConfig["tags"] = fmt.Sprintf("tenant_%s", spec.TenantID)
}
// Add cloud-init configuration if userData is provided
if spec.UserData != "" {
cloudInitStorage := spec.Storage
if cloudInitStorage == "" {
cloudInitStorage = "local-lvm" // Use same default as VM storage
}
vmConfig["ide2"] = fmt.Sprintf("%s:cloudinit", cloudInitStorage)
vmConfig["ciuser"] = "admin"
vmConfig["ipconfig0"] = "ip=dhcp"
}
if err := c.httpClient.Put(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", spec.Node, vmID), vmConfig, nil); err != nil {
return nil, errors.Wrap(err, "failed to update cloned VM config")
}
// Write cloud-init userData if provided
if spec.UserData != "" {
cloudInitConfig := map[string]interface{}{
"user": spec.UserData,
}
cloudInitPath := fmt.Sprintf("/nodes/%s/qemu/%d/cloudinit", spec.Node, vmID)
if err := c.httpClient.Post(ctx, cloudInitPath, cloudInitConfig, nil); err != nil {
// Log warning but don't fail - cloud-init can be configured later
}
}
// Get VM details and return
return c.getVMByID(ctx, spec.Node, vmID)
}
// Check if image is already a volid (format: storage:path/to/image)
if strings.Contains(spec.Image, ":") {
imageVolid = spec.Image
} else {
// Search for image in storage
foundVolid, err := c.findImageInStorage(ctx, spec.Node, spec.Image)
if err != nil {
// Image not found - return error instead of creating blank disk
// This prevents "Nothing to boot" errors
return nil, errors.Wrapf(err, "image '%s' not found in storage - cannot create VM without OS image", spec.Image)
}
imageVolid = foundVolid
}
// If we have an image volid, determine how to use it
if imageVolid != "" {
// For cloud images (.img, .qcow2), we need to import them as disks
if strings.HasSuffix(imageVolid, ".img") || strings.HasSuffix(imageVolid, ".qcow2") {
needsImageImport = true
// Create VM with blank disk first, then import the image
diskConfig = fmt.Sprintf("%s:%d,format=qcow2", spec.Storage, parseDisk(spec.Disk))
} else {
// Use existing template/disk directly (for templates, etc.)
diskConfig = fmt.Sprintf("%s,format=qcow2", imageVolid)
}
} else if diskConfig == "" {
// No image found and no disk config set - this is an error condition
// VMs without OS images cannot boot, so we should fail rather than create blank disk
return nil, errors.Errorf("image '%s' not found in storage and no disk configuration provided. Cannot create VM without OS image", spec.Image)
}
} else {
// No image specified - this is an error condition
// VMs without OS images cannot boot
return nil, errors.New("image is required - cannot create VM without OS image")
}
// Create VM configuration
vmConfig := map[string]interface{}{
"vmid": vmID,
"name": spec.Name,
"cores": spec.CPU,
"memory": parseMemory(spec.Memory),
"net0": fmt.Sprintf("virtio,bridge=%s", spec.Network),
"scsi0": diskConfig,
"ostype": "l26", // Linux 2.6+ kernel
"agent": "1", // Enable QEMU guest agent
"boot": "order=scsi0", // Set boot order to scsi0 (primary disk)
}
// Add tenant labels if provided (for tenant isolation and billing)
if spec.TenantID != "" {
// Proxmox uses tags for labeling - use underscore instead of colon (Proxmox tag format)
// Tags must be alphanumeric with underscores and hyphens only
vmConfig["tags"] = fmt.Sprintf("tenant_%s", spec.TenantID)
}
// Add cloud-init configuration if userData is provided
if spec.UserData != "" {
// Determine cloud-init storage (use same storage as VM disk, or default to "local-lvm")
cloudInitStorage := spec.Storage
if cloudInitStorage == "" {
cloudInitStorage = "local-lvm" // Use same default as VM storage for consistency
}
// Proxmox cloud-init drive format: ide2=storage:cloudinit
vmConfig["ide2"] = fmt.Sprintf("%s:cloudinit", cloudInitStorage)
// Set default cloud-init user (can be overridden in userData)
vmConfig["ciuser"] = "admin"
// Set network configuration for cloud-init
vmConfig["ipconfig0"] = "ip=dhcp"
}
// Create VM
// The HTTP client's Post method already extracts the "data" field, so we get the string directly
var resultStr string
if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu", spec.Node), vmConfig, &resultStr); err != nil {
return nil, errors.Wrap(err, "failed to create VM")
}
// If we need to import a cloud image, do it now using importdisk API
if needsImageImport && imageVolid != "" {
// Extract storage and filename from volid
parts := strings.SplitN(imageVolid, ":", 2)
if len(parts) != 2 {
return nil, errors.Errorf("invalid image volid format: %s (expected storage:path)", imageVolid)
}
storageName := parts[0]
imagePath := parts[1]
// CRITICAL: Stop VM before import (importdisk requires VM to be stopped)
var vmStatus struct {
Status string `json:"status"`
}
statusPath := fmt.Sprintf("/nodes/%s/qemu/%d/status/current", spec.Node, vmID)
if err := c.httpClient.Get(ctx, statusPath, &vmStatus); err == nil {
if vmStatus.Status == "running" {
// Stop VM
stopPath := fmt.Sprintf("/nodes/%s/qemu/%d/status/stop", spec.Node, vmID)
if err := c.httpClient.Post(ctx, stopPath, nil, nil); err != nil {
return nil, errors.Wrap(err, "failed to stop VM for image import")
}
// Wait for VM to stop (up to 30 seconds)
for i := 0; i < 30; i++ {
time.Sleep(1 * time.Second)
if err := c.httpClient.Get(ctx, statusPath, &vmStatus); err == nil {
if vmStatus.Status == "stopped" {
break
}
}
}
if vmStatus.Status != "stopped" {
return nil, errors.New("VM did not stop within 30 seconds, cannot import image")
}
}
}
// Check if importdisk API is available before attempting to use it
supportsImportDisk, err := c.SupportsImportDisk(ctx)
if err != nil {
// If we can't determine support, log warning but try anyway
// The actual API call will fail with 501 if not supported
}
if !supportsImportDisk {
// Clean up the VM we just created since we can't import the image
// Attempt to unlock and delete
c.UnlockVM(ctx, vmID)
c.deleteVM(ctx, vmID)
return nil, errors.Errorf("importdisk API is not supported in this Proxmox version. VM %d has been cleaned up. Please use template cloning or pre-imported images instead", vmID)
}
// Use Proxmox importdisk API to import the image as a disk
// POST /nodes/{node}/qemu/{vmid}/importdisk
// Note: importdisk creates a NEW disk (usually scsi1), it doesn't replace scsi0
importConfig := map[string]interface{}{
"storage": storageName,
"format": "qcow2",
"filename": imagePath,
}
var importResult string
importPath := fmt.Sprintf("/nodes/%s/qemu/%d/importdisk", spec.Node, vmID)
if err := c.httpClient.Post(ctx, importPath, importConfig, &importResult); err != nil {
// Check if error is 501 (not implemented) - this means API doesn't exist
if strings.Contains(err.Error(), "501") || strings.Contains(err.Error(), "not implemented") {
// Clean up the VM we created
c.UnlockVM(ctx, vmID)
c.deleteVM(ctx, vmID)
return nil, errors.Errorf("importdisk API is not implemented in this Proxmox version. VM %d has been cleaned up. Please use template cloning (numeric VMID) or pre-imported images instead of cloud images", vmID)
}
// For other errors, attempt cleanup but return the original error
c.UnlockVM(ctx, vmID)
c.deleteVM(ctx, vmID)
return nil, errors.Wrapf(err, "failed to import image '%s' - VM %d has been cleaned up", imageVolid, vmID)
}
// CRITICAL FIX: Wait for importdisk task to complete before updating config
// The importResult contains a task UPID that we need to monitor
// Extract UPID from result (format: "UPID:node:timestamp:pid:type:user@realm:")
// The HTTP client already extracts the "data" field, so importResult should be the UPID string
taskUPID := strings.TrimSpace(importResult)
// Handle case where UPID might be in JSON format or empty
if taskUPID == "" || !strings.HasPrefix(taskUPID, "UPID:") {
// Try to extract UPID from JSON if present
if strings.Contains(importResult, "UPID:") {
// Extract UPID from JSON string
parts := strings.Split(importResult, "\"")
for _, part := range parts {
if strings.HasPrefix(part, "UPID:") {
taskUPID = part
break
}
}
}
}
if taskUPID != "" && strings.HasPrefix(taskUPID, "UPID:") {
// Monitor task until completion (with timeout)
maxWaitTime := 10 * time.Minute // 10 minutes max for large images
pollInterval := 3 * time.Second // Check every 3 seconds
startTime := time.Now()
for time.Since(startTime) < maxWaitTime {
// Check task status
var taskStatus struct {
Status string `json:"status"`
ExitStatus string `json:"exitstatus,omitempty"`
}
taskStatusPath := fmt.Sprintf("/nodes/%s/tasks/%s/status", spec.Node, taskUPID)
if err := c.httpClient.Get(ctx, taskStatusPath, &taskStatus); err != nil {
// If we can't get task status, wait a bit and retry
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(pollInterval):
continue
}
}
// Task status can be: "running", "stopped"
if taskStatus.Status == "stopped" {
// Task completed - check exit status
if taskStatus.ExitStatus != "OK" && taskStatus.ExitStatus != "" {
return nil, errors.Errorf("importdisk task failed with exit status: %s", taskStatus.ExitStatus)
}
// Task completed successfully
break
}
// Task still running, wait and check again
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(pollInterval):
// Continue loop
}
}
// Verify task completed (final check)
var finalTaskStatus struct {
Status string `json:"status"`
}
taskStatusPath := fmt.Sprintf("/nodes/%s/tasks/%s/status", spec.Node, taskUPID)
if err := c.httpClient.Get(ctx, taskStatusPath, &finalTaskStatus); err == nil {
if finalTaskStatus.Status != "stopped" {
return nil, errors.Errorf("importdisk task did not complete within timeout period (status: %s)", finalTaskStatus.Status)
}
}
// Give a moment for disk to be fully registered
time.Sleep(2 * time.Second)
} else {
// If no UPID returned, wait a conservative amount of time
// This is a fallback for cases where UPID extraction fails
time.Sleep(30 * time.Second)
}
// Get VM config to find the imported disk
var vmConfigResponse struct {
Scsi0 string `json:"scsi0"`
Scsi1 string `json:"scsi1"`
Scsi2 string `json:"scsi2"`
}
configPath := fmt.Sprintf("/nodes/%s/qemu/%d/config", spec.Node, vmID)
if err := c.httpClient.Get(ctx, configPath, &vmConfigResponse); err != nil {
return nil, errors.Wrap(err, "failed to get VM config after import")
}
// Find the imported disk (usually scsi1, but could be scsi2, etc.)
importedDisk := ""
if vmConfigResponse.Scsi1 != "" && strings.Contains(vmConfigResponse.Scsi1, storageName) {
importedDisk = vmConfigResponse.Scsi1
} else if vmConfigResponse.Scsi2 != "" && strings.Contains(vmConfigResponse.Scsi2, storageName) {
importedDisk = vmConfigResponse.Scsi2
}
if importedDisk == "" {
return nil, errors.New("imported disk not found in VM configuration after import")
}
// CRITICAL FIX: Replace blank scsi0 with imported disk
// First, remove the blank scsi0 disk
removeConfig := map[string]interface{}{
"scsi0": "",
}
if err := c.httpClient.Put(ctx, configPath, removeConfig, nil); err != nil {
// Log but continue - might already be removed
}
// Now attach imported disk to scsi0 and set boot order
updateConfig := map[string]interface{}{
"scsi0": importedDisk, // Attach imported disk to primary slot
"boot": "order=scsi0",
}
if err := c.httpClient.Put(ctx, configPath, updateConfig, nil); err != nil {
return nil, errors.Wrap(err, "failed to attach imported disk to scsi0 and set boot order")
}
// Remove the imported disk from its original slot (scsi1, scsi2, etc.) to avoid duplicates
// This is done by setting the slot to empty string
cleanupConfig := map[string]interface{}{}
if vmConfigResponse.Scsi1 == importedDisk {
cleanupConfig["scsi1"] = ""
} else if vmConfigResponse.Scsi2 == importedDisk {
cleanupConfig["scsi2"] = ""
}
if len(cleanupConfig) > 0 {
c.httpClient.Put(ctx, configPath, cleanupConfig, nil)
}
// Verify boot order is set correctly
var verifyConfig struct {
Boot string `json:"boot"`
Scsi0 string `json:"scsi0"`
}
if err := c.httpClient.Get(ctx, configPath, &verifyConfig); err == nil {
if verifyConfig.Boot != "order=scsi0" {
// Retry setting boot order
retryConfig := map[string]interface{}{
"boot": "order=scsi0",
}
c.httpClient.Put(ctx, configPath, retryConfig, nil)
}
if verifyConfig.Scsi0 == "" {
return nil, errors.New("scsi0 is empty after attaching imported disk")
}
}
}
// Write cloud-init userData if provided
if spec.UserData != "" {
// Proxmox cloud-init API: POST /nodes/{node}/qemu/{vmid}/cloudinit
// The userData is written to the cloud-init drive
cloudInitConfig := map[string]interface{}{
"user": spec.UserData,
}
cloudInitPath := fmt.Sprintf("/nodes/%s/qemu/%d/cloudinit", spec.Node, vmID)
// Retry cloud-init write up to 3 times (sometimes fails on first attempt)
var cloudInitErr error
for attempt := 0; attempt < 3; attempt++ {
var err error
if err = c.httpClient.Post(ctx, cloudInitPath, cloudInitConfig, nil); err == nil {
cloudInitErr = nil
break
}
cloudInitErr = err
if attempt < 2 {
time.Sleep(1 * time.Second)
}
}
// Log cloud-init errors for visibility (but don't fail VM creation)
// Cloud-init can be configured later, but we should be aware of failures
if cloudInitErr != nil {
// Log the error for visibility - cloud-init configuration failed
// VM is created but cloud-init may not work as expected
// In production, this should be tracked via status conditions
// For now, we log and continue - VM is usable but may need manual cloud-init config
}
}
// Get VM details and return
return c.getVMByID(ctx, spec.Node, vmID)
}
// getVMByID retrieves VM details by ID
func (c *Client) getVMByID(ctx context.Context, node string, vmID int) (*VM, error) {
var vmDetails []struct {
Vmid int `json:"vmid"`
Name string `json:"name"`
Status string `json:"status"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vmDetails); err != nil {
return nil, errors.Wrap(err, "failed to get VM details")
}
for _, v := range vmDetails {
if v.Vmid == vmID {
return &VM{
ID: v.Vmid,
Name: v.Name,
Status: v.Status,
Node: node,
}, nil
}
}
return &VM{
ID: vmID,
Name: "unknown",
Status: "created",
Node: node,
}, nil
}
// Helper functions for parsing (use shared utils)
func parseMemory(memory string) int {
return utils.ParseMemoryToMB(memory)
}
func parseDisk(disk string) int {
return utils.ParseDiskToGB(disk)
}
// UpdateVM updates a virtual machine
func (c *Client) UpdateVM(ctx context.Context, vmID int, spec VMSpec) (*VM, error) {
config := DefaultRetryConfig()
var vm *VM
err := Retry(ctx, func() error {
var retryErr error
vm, retryErr = c.updateVM(ctx, vmID, spec)
if retryErr != nil {
if isNetworkError(retryErr) || isTemporaryError(retryErr) {
return &RetryableError{Err: retryErr}
}
return retryErr
}
return nil
}, config)
return vm, err
}
func (c *Client) updateVM(ctx context.Context, vmID int, spec VMSpec) (*VM, error) {
// Find node with this VM
nodes, err := c.ListNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
var targetNode string
for _, node := range nodes {
vms, err := c.ListVMs(ctx, node)
if err != nil {
continue
}
for _, vm := range vms {
if vm.ID == vmID {
targetNode = node
break
}
}
if targetNode != "" {
break
}
}
if targetNode == "" {
return nil, fmt.Errorf("VM %d not found", vmID)
}
// Update VM configuration
vmConfig := make(map[string]interface{})
if spec.CPU > 0 {
vmConfig["cores"] = spec.CPU
}
if spec.Memory != "" {
vmConfig["memory"] = parseMemory(spec.Memory)
}
// Always ensure guest agent is enabled
vmConfig["agent"] = "1"
if len(vmConfig) > 0 {
var result struct {
Data string `json:"data"`
}
if err := c.httpClient.Put(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", targetNode, vmID), vmConfig, &result); err != nil {
return nil, errors.Wrap(err, "failed to update VM")
}
}
// Get updated VM
vms, err := c.ListVMs(ctx, targetNode)
if err != nil {
return nil, errors.Wrap(err, "failed to list VMs")
}
for _, vm := range vms {
if vm.ID == vmID {
return &vm, nil
}
}
return nil, fmt.Errorf("VM %d not found after update", vmID)
}
// findVMNode finds which node a VM is on by searching all nodes
func (c *Client) findVMNode(ctx context.Context, vmID int) (string, error) {
nodes, err := c.ListNodes(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to list nodes")
}
// Search each node for the VM
for _, node := range nodes {
var vms []struct {
Vmid int `json:"vmid"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vms); err != nil {
continue // Skip nodes we can't query
}
for _, vm := range vms {
if vm.Vmid == vmID {
return node, nil
}
}
}
return "", fmt.Errorf("VM %d not found on any node", vmID)
}
// UnlockVM unlocks a virtual machine (removes lock file)
func (c *Client) UnlockVM(ctx context.Context, vmID int) error {
// Find which node has this VM
targetNode, err := c.findVMNode(ctx, vmID)
if err != nil {
return errors.Wrap(err, "failed to find VM node")
}
// Use Proxmox API to unlock VM
// POST /nodes/{node}/qemu/{vmid}/unlock
unlockPath := fmt.Sprintf("/nodes/%s/qemu/%d/unlock", targetNode, vmID)
if err := c.httpClient.Post(ctx, unlockPath, nil, nil); err != nil {
// If unlock fails, try to continue - lock may not exist
// Log but don't fail
return nil // Don't fail on unlock errors
}
return nil
}
// DeleteVM deletes a virtual machine
func (c *Client) DeleteVM(ctx context.Context, vmID int) error {
config := DefaultRetryConfig()
return Retry(ctx, func() error {
err := c.deleteVM(ctx, vmID)
if err != nil {
if isNetworkError(err) || isTemporaryError(err) {
return &RetryableError{Err: err}
}
return err
}
return nil
}, config)
}
func (c *Client) deleteVM(ctx context.Context, vmID int) error {
// Find node with this VM
nodes, err := c.ListNodes(ctx)
if err != nil {
return errors.Wrap(err, "failed to list nodes")
}
var targetNode string
for _, node := range nodes {
vms, err := c.ListVMs(ctx, node)
if err != nil {
continue
}
for _, vm := range vms {
if vm.ID == vmID {
targetNode = node
break
}
}
if targetNode != "" {
break
}
}
if targetNode == "" {
return fmt.Errorf("VM %d not found", vmID)
}
// CRITICAL: Unlock VM first to prevent lock file issues
// Multiple unlock attempts to handle stuck locks
for i := 0; i < 5; i++ {
if err := c.UnlockVM(ctx, vmID); err == nil {
break
}
time.Sleep(1 * time.Second)
}
// Stop VM first if running
var status struct {
Status string `json:"status"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", targetNode, vmID), &status); err == nil {
if status.Status == "running" {
// Stop VM
if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/stop", targetNode, vmID), nil, nil); err != nil {
return errors.Wrap(err, "failed to stop VM")
}
// Wait for VM to stop (up to 30 seconds)
for i := 0; i < 30; i++ {
time.Sleep(1 * time.Second)
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", targetNode, vmID), &status); err == nil {
if status.Status == "stopped" {
break
}
}
}
}
}
// Delete VM with purge to ensure complete cleanup
deletePath := fmt.Sprintf("/nodes/%s/qemu/%d?purge=1", targetNode, vmID)
if err := c.httpClient.Delete(ctx, deletePath); err != nil {
return errors.Wrap(err, "failed to delete VM")
}
return nil
}
// GetVMStatus gets the status of a virtual machine
func (c *Client) GetVMStatus(ctx context.Context, vmID int) (*VMStatus, error) {
config := DefaultRetryConfig()
var status *VMStatus
err := Retry(ctx, func() error {
var retryErr error
status, retryErr = c.getVMStatus(ctx, vmID)
if retryErr != nil {
if isNetworkError(retryErr) || isTemporaryError(retryErr) {
return &RetryableError{Err: retryErr}
}
return retryErr
}
return nil
}, config)
return status, err
}
func (c *Client) getVMStatus(ctx context.Context, vmID int) (*VMStatus, error) {
// First, find which node has this VM
nodes, err := c.ListNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
for _, node := range nodes {
vms, err := c.ListVMs(ctx, node)
if err != nil {
continue
}
for _, vm := range vms {
if vm.ID == vmID {
// Get detailed VM status
var vmStatus struct {
Status string `json:"status"`
CPU float64 `json:"cpu"`
Mem int64 `json:"mem"`
NetIn int64 `json:"netin"`
NetOut int64 `json:"netout"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", node, vmID), &vmStatus); err != nil {
return &VMStatus{
State: vm.Status,
}, nil
}
// Try to get IP address
var config struct {
Net0 string `json:"net0"`
}
ipAddress := ""
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", node, vmID), &config); err == nil {
// Parse IP from net0 config if available
ipAddress = extractIP(config.Net0)
}
return &VMStatus{
State: vmStatus.Status,
IPAddress: ipAddress,
CPU: vmStatus.CPU,
Memory: vmStatus.Mem,
}, nil
}
}
}
return nil, fmt.Errorf("VM %d not found", vmID)
}
func extractIP(netConfig string) string {
// Extract IP from net config string
// Format: "virtio,bridge=vmbr0,firewall=1,ip=dhcp" or "virtio,bridge=vmbr0,ip=192.168.1.100/24"
if len(netConfig) == 0 {
return ""
}
// Look for ip= parameter
parts := strings.Split(netConfig, ",")
for _, part := range parts {
part = strings.TrimSpace(part)
if strings.HasPrefix(part, "ip=") {
ipPart := strings.TrimPrefix(part, "ip=")
// Remove subnet mask if present
if idx := strings.Index(ipPart, "/"); idx > 0 {
return ipPart[:idx]
}
// Skip "dhcp" or "static"
if ipPart != "dhcp" && ipPart != "static" {
return ipPart
}
}
}
return ""
}
// Helper functions
func isNetworkError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
return contains(errStr, "network") || contains(errStr, "timeout") || contains(errStr, "connection")
}
func isTemporaryError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
return contains(errStr, "temporary") || contains(errStr, "503") || contains(errStr, "502")
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(substr) == 0 || indexOfSubstring(s, substr) >= 0)
}
func indexOfSubstring(s, substr string) int {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return i
}
}
return -1
}
// VMSpec represents VM specification
type VMSpec struct {
Node string
Name string
CPU int
Memory string
Disk string
Storage string
Network string
Image string
UserData string // Cloud-init user data
SSHKeys []string // SSH public keys
TenantID string // Tenant ID for multi-tenancy support (superior to Azure)
}
// VM represents a virtual machine
type VM struct {
ID int
Name string
Status string
IP string
Node string
Created time.Time
}
// VMStatus represents VM status
type VMStatus struct {
State string
IPAddress string
CPU float64
Memory int64
}
// Storage represents a storage pool
type Storage struct {
Name string
Type string
Content []string
Shared bool
Enabled bool
Capacity int64
Used int64
}
// Network represents a network interface
type Network struct {
Name string
Type string
Bridge string
VLAN int
Active bool
Autostart bool
}
// ClusterInfo represents cluster information
type ClusterInfo struct {
Name string
Nodes []string
Quorum bool
Version string
}
// GetPVEVersion gets the Proxmox VE version
func (c *Client) GetPVEVersion(ctx context.Context) (string, error) {
var version struct {
Version string `json:"version"`
}
if err := c.httpClient.Get(ctx, "/version", &version); err != nil {
return "", errors.Wrap(err, "failed to get PVE version")
}
return version.Version, nil
}
// SupportsImportDisk checks if the Proxmox version supports the importdisk API
// The importdisk API was added in Proxmox VE 6.0, but some versions may not have it
// This is a best-effort check - actual support is verified at API call time
func (c *Client) SupportsImportDisk(ctx context.Context) (bool, error) {
// Check the version string to determine if importdisk might be available
version, err := c.GetPVEVersion(ctx)
if err != nil {
// If we can't get version, assume it's not supported to be safe
// We'll still try at call time and handle 501 errors gracefully
return false, nil
}
// Parse version: format is usually "pve-manager/X.Y.Z/..."
// importdisk should be available in PVE 6.0+, but some builds may not have it
// This is a version-based heuristic - actual support verified via API call
// We return true for versions that likely support it, false otherwise
// The actual API call will handle 501 (not implemented) errors gracefully
versionLower := strings.ToLower(version)
if strings.Contains(versionLower, "pve-manager/6.") ||
strings.Contains(versionLower, "pve-manager/7.") ||
strings.Contains(versionLower, "pve-manager/8.") ||
strings.Contains(versionLower, "pve-manager/9.") {
// Version looks compatible - actual support verified at API call time
return true, nil
}
// Version doesn't match known compatible versions
return false, nil
}
// CheckNodeHealth checks if a node is healthy and reachable
func (c *Client) CheckNodeHealth(ctx context.Context, node string) error {
var status struct {
Status string `json:"status"`
Uptime int64 `json:"uptime"`
}
statusPath := fmt.Sprintf("/nodes/%s/status", node)
if err := c.httpClient.Get(ctx, statusPath, &status); err != nil {
return errors.Wrapf(err, "node %s is not reachable or unhealthy", node)
}
if status.Status != "online" {
return errors.Errorf("node %s is not online (status: %s)", node, status.Status)
}
return nil
}
// ListNodes lists all nodes in the cluster
func (c *Client) ListNodes(ctx context.Context) ([]string, error) {
var nodes []struct {
Node string `json:"node"`
Status string `json:"status"`
Type string `json:"type"`
}
if err := c.httpClient.Get(ctx, "/nodes", &nodes); err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
result := make([]string, len(nodes))
for i, node := range nodes {
result[i] = node.Node
}
return result, nil
}
// ListVMs lists all VMs on a node, optionally filtered by tenant
func (c *Client) ListVMs(ctx context.Context, node string, tenantID ...string) ([]VM, error) {
var vms []struct {
Vmid int `json:"vmid"`
Name string `json:"name"`
Status string `json:"status"`
Tags string `json:"tags,omitempty"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vms); err != nil {
return nil, errors.Wrap(err, "failed to list VMs")
}
result := []VM{}
filterTenantID := ""
if len(tenantID) > 0 && tenantID[0] != "" {
filterTenantID = tenantID[0]
}
for _, vm := range vms {
// If tenant filtering is requested, check VM tags
if filterTenantID != "" {
// Check if VM has tenant tag matching the filter
// Note: We use tenant_{id} format (underscore) to match what we write
tenantTag := fmt.Sprintf("tenant_%s", filterTenantID)
if vm.Tags == "" || !strings.Contains(vm.Tags, tenantTag) {
// Try to get VM config to check tags if not in list
var config struct {
Tags string `json:"tags"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", node, vm.Vmid), &config); err == nil {
if config.Tags == "" || !strings.Contains(config.Tags, tenantTag) {
continue // Skip this VM - doesn't belong to tenant
}
} else {
continue // Skip if we can't verify
}
}
}
result = append(result, VM{
ID: vm.Vmid,
Name: vm.Name,
Status: vm.Status,
Node: node,
})
}
return result, nil
}
// ListStorages lists all storage pools
func (c *Client) ListStorages(ctx context.Context) ([]Storage, error) {
var storages []struct {
Storage string `json:"storage"`
Type string `json:"type"`
Content []string `json:"content"`
Shared int `json:"shared"`
Enabled int `json:"enabled"`
Total int64 `json:"total"`
Used int64 `json:"used"`
}
if err := c.httpClient.Get(ctx, "/storage", &storages); err != nil {
return nil, errors.Wrap(err, "failed to list storages")
}
result := make([]Storage, len(storages))
for i, s := range storages {
result[i] = Storage{
Name: s.Storage,
Type: s.Type,
Content: s.Content,
Shared: s.Shared == 1,
Enabled: s.Enabled == 1,
Capacity: s.Total,
Used: s.Used,
}
}
return result, nil
}
// ListNetworks lists all networks on a node
func (c *Client) ListNetworks(ctx context.Context, node string) ([]Network, error) {
var networks []struct {
Iface string `json:"iface"`
Type string `json:"type"`
Bridge string `json:"bridge"`
VLAN int `json:"vlan"`
Active int `json:"active"`
Autostart int `json:"autostart"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/network", node), &networks); err != nil {
return nil, errors.Wrap(err, "failed to list networks")
}
result := make([]Network, len(networks))
for i, n := range networks {
result[i] = Network{
Name: n.Iface,
Type: n.Type,
Bridge: n.Bridge,
VLAN: n.VLAN,
Active: n.Active == 1,
Autostart: n.Autostart == 1,
}
}
return result, nil
}
// GetClusterInfo gets cluster information
func (c *Client) GetClusterInfo(ctx context.Context) (*ClusterInfo, error) {
var status []struct {
Name string `json:"name"`
Type string `json:"type"`
Quorum int `json:"quorate"`
}
if err := c.httpClient.Get(ctx, "/cluster/status", &status); err != nil {
return nil, errors.Wrap(err, "failed to get cluster status")
}
var version struct {
Version string `json:"version"`
}
if err := c.httpClient.Get(ctx, "/version", &version); err != nil {
return nil, errors.Wrap(err, "failed to get version")
}
nodes, err := c.ListNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
quorum := false
for _, s := range status {
if s.Type == "cluster" && s.Quorum == 1 {
quorum = true
break
}
}
return &ClusterInfo{
Name: "proxmox-cluster",
Nodes: nodes,
Quorum: quorum,
Version: version.Version,
}, nil
}