#!/usr/bin/env python3
"""
Fix Broken References Script
Automatically fixes broken markdown links based on file moves during cleanup
"""

import re
import json
from pathlib import Path
from collections import defaultdict

# Mapping of old paths to new paths (from cleanup)
PATH_MAPPINGS = {
    # Root → reports/status/
    r'^BESU_.*\.md$': 'reports/status/',
    r'^FIREFLY_.*\.md$': 'reports/status/',
    r'^DBIS_.*\.md$': 'reports/status/',
    r'^.*STATUS.*\.md$': 'reports/status/',
    r'^.*COMPLETE.*\.md$': 'reports/status/',
    r'^.*FINAL.*\.md$': 'reports/status/',
    r'^.*REPORT.*\.md$': 'reports/status/',
    
    # Root → reports/analyses/
    r'^.*ANALYSIS.*\.md$': 'reports/analyses/',
    r'^IP_CONFLICT.*\.md$': 'reports/analyses/',
    r'^PHASE1_IP.*\.md$': 'reports/analyses/',
    
    # Root → docs/
    r'^CLOUDFLARE_API_SETUP\.md$': 'docs/04-configuration/',
    r'^CLOUDFLARE_TUNNEL.*\.md$': 'docs/04-configuration/',
    r'^SETUP_TUNNEL.*\.md$': 'docs/04-configuration/',
    r'^TUNNEL.*\.md$': 'docs/04-configuration/',
    r'^NGINX_CONFIGURATIONS.*\.md$': 'docs/04-configuration/',
    r'^NO_SSH_ACCESS.*\.md$': 'docs/09-troubleshooting/',
    r'^TROUBLESHOOT.*\.md$': 'docs/09-troubleshooting/',
    r'^FIX_TUNNEL.*\.md$': 'docs/09-troubleshooting/',
    r'^R630-04.*\.md$': 'docs/09-troubleshooting/',
    r'^LIST_VMS.*\.md$': 'docs/01-getting-started/',
    r'^THIRDWEB_RPC.*\.md$': 'docs/01-getting-started/',
    r'^CHAIN138_TOKEN.*\.md$': 'docs/11-references/',
    r'^OMADA.*\.md$': 'docs/11-references/',
    r'^GET_EMAIL.*\.md$': 'docs/11-references/',
    
    # Specific file mappings
    'docs/ENV_STANDARDIZATION.md': 'docs/04-configuration/ENV_STANDARDIZATION.md',
    'docs/MCP_SETUP.md': 'docs/04-configuration/MCP_SETUP.md',
    'MCP_SETUP.md': 'docs/04-configuration/MCP_SETUP.md',
}

# Exclude submodules from fixing
EXCLUDE_PATHS = [
    'ProxmoxVE/',
    'smom-dbis-138/',
    'explorer-monorepo/',
    'metamask-integration/',
    'metaverseDubai/',
    'miracles_in_motion/',
    'dbis_core/',
    'gru-docs/',
    'node_modules/',
    '.git/',
]

def should_process_file(file_path):
    """Check if file should be processed"""
    path_str = str(file_path)
    # Don't process submodules
    if any(exclude in path_str for exclude in EXCLUDE_PATHS):
        return False
    # Only process files in our main project
    return True

def find_file_in_new_location(filename):
    """Find where a file was moved to"""
    # Check reports/status/
    if Path(f'reports/status/{filename}').exists():
        return f'reports/status/{filename}'
    
    # Check reports/analyses/
    if Path(f'reports/analyses/{filename}').exists():
        return f'reports/analyses/{filename}'
    
    # Check reports/
    if Path(f'reports/{filename}').exists():
        return f'reports/{filename}'
    
    # Check docs subdirectories
    for subdir in ['01-getting-started', '04-configuration', '09-troubleshooting', '11-references']:
        if Path(f'docs/{subdir}/{filename}').exists():
            return f'docs/{subdir}/{filename}'
    
    return None

def fix_references_in_file(file_path):
    """Fix broken references in a single file"""
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()
        
        original_content = content
        changes = []
        
        # Pattern for markdown links: [text](path)
        link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
        
        def replace_link(match):
            link_text = match.group(1)
            link_path = match.group(2)
            
            # Skip external links
            if link_path.startswith('http'):
                return match.group(0)
            
            # Skip anchors only
            if link_path.startswith('#'):
                return match.group(0)
            
            # Extract filename
            if '#' in link_path:
                file_part, anchor = link_path.split('#', 1)
                anchor_part = '#' + anchor
            else:
                file_part = link_path
                anchor_part = ''
            
            filename = Path(file_part).name
            
            # Try to find file in new location
            new_location = find_file_in_new_location(filename)
            
            if new_location:
                # Calculate relative path
                current_dir = file_path.parent
                new_path = Path(new_location)
                try:
                    relative_path = new_path.relative_to(current_dir)
                    new_link = f'[{link_text}]({relative_path}{anchor_part})'
                    changes.append(f"  Fixed: {link_path} → {relative_path}{anchor_part}")
                    return new_link
                except ValueError:
                    # Paths don't share common ancestor, use absolute from root
                    new_link = f'[{link_text}](/{new_location}{anchor_part})'
                    changes.append(f"  Fixed: {link_path} → /{new_location}{anchor_part}")
                    return new_link
            
            return match.group(0)
        
        content = re.sub(link_pattern, replace_link, content)
        
        if content != original_content:
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            return changes
        
        return []
    
    except Exception as e:
        return [f"  Error: {e}"]

def main():
    """Main function"""
    root = Path('.')
    fixed_count = 0
    total_changes = []
    
    print("🔧 Fixing broken references...")
    print("")
    
    # Process markdown files
    for md_file in root.rglob('*.md'):
        if not should_process_file(md_file):
            continue
        
        changes = fix_references_in_file(md_file)
        if changes:
            fixed_count += 1
            print(f"✅ Fixed: {md_file}")
            for change in changes:
                print(change)
            total_changes.extend([(str(md_file), c) for c in changes])
    
    print("")
    print(f"✅ Fixed references in {fixed_count} files")
    print(f"   Total changes: {len(total_changes)}")
    
    # Save report
    report = {
        'files_fixed': fixed_count,
        'total_changes': len(total_changes),
        'changes': total_changes
    }
    
    with open('REFERENCE_FIXES_REPORT.json', 'w') as f:
        json.dump(report, f, indent=2)
    
    print("✅ Report saved: REFERENCE_FIXES_REPORT.json")

if __name__ == '__main__':
    main()