Add markdown file analysis tools and reports

2026-01-06 01:42:29 -08:00
parent b45c2006be
commit 1edcec953c
20 changed files with 9323 additions and 8 deletions
--- a/scripts/analyze-markdown-files.py
+++ b/scripts/analyze-markdown-files.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python3
+"""
+Comprehensive Markdown File Analysis Script
+Analyzes all markdown files in the project for:
+- File dates (creation, modification)
+- Duplicate patterns
+- Misplaced files
+- Content inconsistencies
+- Outdated information
+"""
+
+import os
+import re
+import json
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+from typing import Dict, List, Tuple, Set
+import hashlib
+
+class MarkdownAnalyzer:
+    def __init__(self, root_dir: str):
+        self.root_dir = Path(root_dir)
+        self.files = []
+        self.duplicates = defaultdict(list)
+        self.patterns = {
+            'complete': [],
+            'final': [],
+            'status': [],
+            'timestamped': [],
+            'fix': [],
+            'report': [],
+            'temporary': []
+        }
+        self.misplaced = []
+        self.content_hashes = {}
+        self.file_metadata = []
+        
+    def analyze(self):
+        """Run full analysis"""
+        print("🔍 Scanning markdown files...")
+        self._scan_files()
+        print(f"📊 Found {len(self.files)} markdown files")
+        
+        print("\n📅 Analyzing file dates...")
+        self._analyze_dates()
+        
+        print("\n🔎 Identifying patterns...")
+        self._identify_patterns()
+        
+        print("\n📍 Finding misplaced files...")
+        self._find_misplaced()
+        
+        print("\n🔗 Checking for duplicates...")
+        self._check_duplicates()
+        
+        print("\n📝 Analyzing content...")
+        self._analyze_content()
+        
+        return self._generate_report()
+    
+    def _scan_files(self):
+        """Scan for all markdown files"""
+        exclude_dirs = {'.git', 'node_modules', '__pycache__', '.next', 'dist', 'build', 'venv', '.venv'}
+        
+        for md_file in self.root_dir.rglob('*.md'):
+            # Skip excluded directories
+            if any(part in exclude_dirs for part in md_file.parts):
+                continue
+            
+            try:
+                stat = md_file.stat()
+                rel_path = md_file.relative_to(self.root_dir)
+                
+                self.files.append({
+                    'path': str(rel_path),
+                    'full_path': str(md_file),
+                    'size': stat.st_size,
+                    'modified': datetime.fromtimestamp(stat.st_mtime),
+                    'accessed': datetime.fromtimestamp(stat.st_atime),
+                    'created': datetime.fromtimestamp(stat.st_ctime) if hasattr(stat, 'st_birthtime') else None,
+                    'directory': str(rel_path.parent),
+                    'name': md_file.name
+                })
+            except (OSError, PermissionError) as e:
+                print(f"⚠️  Error accessing {md_file}: {e}")
+    
+    def _analyze_dates(self):
+        """Analyze file modification dates"""
+        now = datetime.now()
+        for file_info in self.files:
+            modified = file_info['modified']
+            days_old = (now - modified).days
+            
+            file_info['days_old'] = days_old
+            file_info['age_category'] = (
+                'recent' if days_old < 7 else
+                'recent' if days_old < 30 else
+                'moderate' if days_old < 90 else
+                'old' if days_old < 365 else
+                'very_old'
+            )
+    
+    def _identify_patterns(self):
+        """Identify files by naming patterns"""
+        patterns = {
+            'complete': re.compile(r'COMPLETE', re.I),
+            'final': re.compile(r'FINAL', re.I),
+            'status': re.compile(r'STATUS', re.I),
+            'timestamped': re.compile(r'_\d{8}_\d{6}|\d{8}_\d{6}'),
+            'fix': re.compile(r'FIX|QUICK_FIX|RUN_NOW|EXECUTE', re.I),
+            'report': re.compile(r'REPORT|SUMMARY|ANALYSIS|DIAGNOSTIC', re.I),
+            'temporary': re.compile(r'NOW|READY|EXECUTE|RUN_', re.I)
+        }
+        
+        for file_info in self.files:
+            name = file_info['name']
+            for pattern_name, pattern in patterns.items():
+                if pattern.search(name):
+                    self.patterns[pattern_name].append(file_info)
+                    file_info[f'has_{pattern_name}'] = True
+    
+    def _find_misplaced(self):
+        """Find files in wrong locations"""
+        root_files = [f for f in self.files if f['directory'] == '.']
+        docs_files = [f for f in self.files if f['directory'].startswith('docs')]
+        reports_files = [f for f in self.files if f['directory'].startswith('reports')]
+        
+        # Reports in root
+        for f in root_files:
+            if any(keyword in f['name'].upper() for keyword in ['REPORT', 'STATUS', 'INVENTORY', 'DIAGNOSTIC', 'ANALYSIS']):
+                if not f['name'] in ['README.md', 'PROJECT_STRUCTURE.md']:
+                    self.misplaced.append({
+                        'file': f,
+                        'current': 'root',
+                        'should_be': 'reports/',
+                        'reason': 'Report file in root directory'
+                    })
+        
+        # Status/completion files in docs
+        for f in docs_files:
+            if any(keyword in f['name'].upper() for keyword in ['COMPLETE', 'FINAL', 'STATUS', 'MIGRATION_COMPLETE']):
+                self.misplaced.append({
+                    'file': f,
+                    'current': f['directory'],
+                    'should_be': 'reports/',
+                    'reason': 'Status/completion report in docs directory'
+                })
+        
+        # Temporary fix guides in project root
+        for f in root_files:
+            if any(keyword in f['name'].upper() for keyword in ['FIX_', 'QUICK_FIX', 'RUN_NOW', 'EXECUTE']):
+                self.misplaced.append({
+                    'file': f,
+                    'current': 'root',
+                    'should_be': 'docs/09-troubleshooting/archive/',
+                    'reason': 'Temporary fix guide in root'
+                })
+    
+    def _check_duplicates(self):
+        """Check for duplicate content"""
+        for file_info in self.files:
+            try:
+                with open(file_info['full_path'], 'rb') as f:
+                    content_hash = hashlib.md5(f.read()).hexdigest()
+                
+                if content_hash in self.content_hashes:
+                    self.duplicates[content_hash].append(file_info)
+                else:
+                    self.content_hashes[content_hash] = [file_info]
+            except Exception as e:
+                pass
+    
+    def _analyze_content(self):
+        """Analyze file content for issues"""
+        for file_info in self.files:
+            try:
+                with open(file_info['full_path'], 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+                    lines = content.split('\n')
+                    
+                    file_info['line_count'] = len(lines)
+                    file_info['has_todo'] = 'TODO' in content or 'FIXME' in content
+                    file_info['has_deprecated'] = 'DEPRECATED' in content or 'OBSOLETE' in content
+                    file_info['has_date'] = bool(re.search(r'\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}', content))
+                    
+                    # Check for placeholder dates
+                    if re.search(r'\$\(date\)|date \+', content):
+                        file_info['has_placeholder_date'] = True
+            except Exception as e:
+                file_info['line_count'] = 0
+                file_info['error'] = str(e)
+    
+    def _generate_report(self) -> Dict:
+        """Generate comprehensive report"""
+        report = {
+            'summary': {
+                'total_files': len(self.files),
+                'total_size_mb': sum(f['size'] for f in self.files) / (1024 * 1024),
+                'by_age': defaultdict(int),
+                'by_directory': defaultdict(int)
+            },
+            'patterns': {},
+            'misplaced': [],
+            'duplicates': [],
+            'old_files': [],
+            'empty_files': [],
+            'issues': []
+        }
+        
+        # Summary stats
+        for f in self.files:
+            report['summary']['by_age'][f['age_category']] += 1
+            report['summary']['by_directory'][f['directory']] += 1
+        
+        # Pattern counts
+        for pattern_name, files in self.patterns.items():
+            report['patterns'][pattern_name] = {
+                'count': len(files),
+                'files': [f['path'] for f in files[:20]]  # Limit to 20
+            }
+        
+        # Misplaced files
+        report['misplaced'] = [
+            {
+                'path': m['file']['path'],
+                'current': m['current'],
+                'should_be': m['should_be'],
+                'reason': m['reason']
+            }
+            for m in self.misplaced
+        ]
+        
+        # Duplicate content
+        for hash_val, files in self.duplicates.items():
+            if len(files) > 1:
+                report['duplicates'].append({
+                    'hash': hash_val[:8],
+                    'count': len(files),
+                    'files': [f['path'] for f in files]
+                })
+        
+        # Old files (>90 days)
+        report['old_files'] = [
+            {
+                'path': f['path'],
+                'days_old': f['days_old'],
+                'modified': f['modified'].isoformat()
+            }
+            for f in self.files if f['days_old'] > 90
+        ]
+        
+        # Empty or very small files
+        report['empty_files'] = [
+            {
+                'path': f['path'],
+                'size': f['size'],
+                'line_count': f.get('line_count', 0)
+            }
+            for f in self.files if f['size'] < 100 or f.get('line_count', 0) < 5
+        ]
+        
+        # Issues
+        for f in self.files:
+            issues = []
+            if f.get('has_placeholder_date'):
+                issues.append('Contains placeholder date')
+            if f.get('has_deprecated'):
+                issues.append('Marks itself as deprecated')
+            if f['days_old'] > 365:
+                issues.append('Very old (>1 year)')
+            if f['size'] < 50:
+                issues.append('Very small file')
+            
+            if issues:
+                report['issues'].append({
+                    'path': f['path'],
+                    'issues': issues
+                })
+        
+        return report
+
+def main():
+    root_dir = Path(__file__).parent.parent
+    analyzer = MarkdownAnalyzer(root_dir)
+    report = analyzer.analyze()
+    
+    # Save JSON report
+    json_file = root_dir / 'MARKDOWN_ANALYSIS.json'
+    with open(json_file, 'w') as f:
+        json.dump(report, f, indent=2, default=str)
+    print(f"\n✅ JSON report saved to: {json_file}")
+    
+    # Generate markdown report
+    md_file = root_dir / 'MARKDOWN_ANALYSIS_REPORT.md'
+    with open(md_file, 'w') as f:
+        f.write(generate_markdown_report(report))
+    print(f"✅ Markdown report saved to: {md_file}")
+    
+    return report
+
+def generate_markdown_report(report: Dict) -> str:
+    """Generate human-readable markdown report"""
+    md = []
+    md.append("# Markdown Files Analysis Report\n")
+    md.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    
+    # Summary
+    md.append("## Summary\n")
+    md.append(f"- **Total Files**: {report['summary']['total_files']}")
+    md.append(f"- **Total Size**: {report['summary']['total_size_mb']:.2f} MB\n")
+    
+    md.append("### Files by Age\n")
+    for age, count in sorted(report['summary']['by_age'].items()):
+        md.append(f"- **{age.title()}**: {count}")
+    md.append("")
+    
+    # Patterns
+    md.append("## File Patterns\n")
+    for pattern_name, data in report['patterns'].items():
+        md.append(f"### {pattern_name.title()} ({data['count']} files)\n")
+        for file_path in data['files'][:10]:
+            md.append(f"- `{file_path}`")
+        if data['count'] > 10:
+            md.append(f"- ... and {data['count'] - 10} more")
+        md.append("")
+    
+    # Misplaced files
+    md.append("## Misplaced Files\n")
+    md.append(f"Found **{len(report['misplaced'])}** misplaced files:\n")
+    for m in report['misplaced'][:50]:
+        md.append(f"- **{m['path']}**")
+        md.append(f"  - Current: `{m['current']}`")
+        md.append(f"  - Should be: `{m['should_be']}`")
+        md.append(f"  - Reason: {m['reason']}\n")
+    
+    # Duplicates
+    md.append("## Duplicate Content\n")
+    md.append(f"Found **{len(report['duplicates'])}** sets of duplicate files:\n")
+    for dup in report['duplicates'][:20]:
+        md.append(f"- **{dup['count']} files** with same content:")
+        for file_path in dup['files']:
+            md.append(f"  - `{file_path}`")
+        md.append("")
+    
+    # Old files
+    md.append("## Old Files (>90 days)\n")
+    md.append(f"Found **{len(report['old_files'])}** old files:\n")
+    for f in sorted(report['old_files'], key=lambda x: x['days_old'], reverse=True)[:50]:
+        md.append(f"- **{f['path']}** ({f['days_old']} days old, modified: {f['modified'][:10]})")
+    md.append("")
+    
+    # Issues
+    md.append("## Files with Issues\n")
+    md.append(f"Found **{len(report['issues'])}** files with issues:\n")
+    for issue in report['issues'][:50]:
+        md.append(f"- **{issue['path']}**")
+        for i in issue['issues']:
+            md.append(f"  - {i}")
+        md.append("")
+    
+    return "\n".join(md)
+
+if __name__ == '__main__':
+    main()
--- a/scripts/check-content-inconsistencies.py
+++ b/scripts/check-content-inconsistencies.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python3
+"""
+Content Inconsistency Checker
+Compares related markdown files for inconsistencies in:
+- Dates
+- Status information
+- Configuration values
+- References to other files
+"""
+
+import os
+import re
+import json
+from pathlib import Path
+from collections import defaultdict
+from typing import Dict, List, Set, Tuple
+from datetime import datetime
+
+class ContentInconsistencyChecker:
+    def __init__(self, root_dir: str):
+        self.root_dir = Path(root_dir)
+        self.inconsistencies = []
+        self.file_contents = {}
+        
+    def check(self):
+        """Run all consistency checks"""
+        print("🔍 Checking content inconsistencies...")
+        
+        # Load file contents
+        self._load_files()
+        
+        # Check for inconsistencies
+        print("\n📅 Checking date inconsistencies...")
+        self._check_dates()
+        
+        print("\n📊 Checking status inconsistencies...")
+        self._check_status()
+        
+        print("\n🔗 Checking cross-references...")
+        self._check_references()
+        
+        print("\n⚙️  Checking configuration values...")
+        self._check_config_values()
+        
+        print("\n📝 Checking duplicate content...")
+        self._check_duplicate_content()
+        
+        return self._generate_report()
+    
+    def _load_files(self):
+        """Load markdown file contents"""
+        exclude_dirs = {'.git', 'node_modules', '__pycache__', '.next', 'dist', 'build', 'venv'}
+        
+        for md_file in self.root_dir.rglob('*.md'):
+            if any(part in exclude_dirs for part in md_file.parts):
+                continue
+            
+            try:
+                with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+                    rel_path = str(md_file.relative_to(self.root_dir))
+                    self.file_contents[rel_path] = {
+                        'content': content,
+                        'path': rel_path,
+                        'lines': content.split('\n')
+                    }
+            except Exception as e:
+                pass
+    
+    def _check_dates(self):
+        """Check for inconsistent dates"""
+        date_patterns = [
+            r'(\d{4}-\d{2}-\d{2})',  # YYYY-MM-DD
+            r'(\d{1,2}/\d{1,2}/\d{4})',  # MM/DD/YYYY
+            r'Date[:\s]+(\d{4}-\d{2}-\d{2})',
+            r'Generated[:\s]+(\d{4}-\d{2}-\d{2})',
+            r'Last Updated[:\s]+(\d{4}-\d{2}-\d{2})',
+        ]
+        
+        # Group files by project/component
+        project_files = defaultdict(list)
+        for path in self.file_contents:
+            if 'rpc-translator-138' in path:
+                project_files['rpc-translator-138'].append(path)
+            elif path.startswith('docs/'):
+                project_files['docs'].append(path)
+            elif path.startswith('reports/'):
+                project_files['reports'].append(path)
+            elif '/' not in path or path.count('/') == 0:
+                project_files['root'].append(path)
+        
+        # Check dates within each project
+        for project, files in project_files.items():
+            dates_found = []
+            for file_path in files:
+                content = self.file_contents[file_path]['content']
+                for pattern in date_patterns:
+                    matches = re.findall(pattern, content)
+                    for match in matches:
+                        dates_found.append((file_path, match))
+            
+            # Check for very old dates (>1 year)
+            now = datetime.now()
+            for file_path, date_str in dates_found:
+                try:
+                    if '-' in date_str:
+                        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
+                    elif '/' in date_str:
+                        parts = date_str.split('/')
+                        if len(parts) == 3:
+                            date_obj = datetime.strptime(date_str, '%m/%d/%Y')
+                        else:
+                            continue
+                    else:
+                        continue
+                    
+                    days_diff = (now - date_obj).days
+                    if days_diff > 365:
+                        self.inconsistencies.append({
+                            'type': 'old_date',
+                            'file': file_path,
+                            'issue': f'Date {date_str} is {days_diff} days old',
+                            'severity': 'medium'
+                        })
+                except:
+                    pass
+    
+    def _check_status(self):
+        """Check for inconsistent status information"""
+        status_patterns = [
+            r'Status[:\s]+([✅❌🔄⚠️]+|COMPLETE|INCOMPLETE|PENDING|ACTIVE|INACTIVE)',
+            r'\*\*Status\*\*[:\s]+([✅❌🔄⚠️]+|COMPLETE|INCOMPLETE|PENDING)',
+        ]
+        
+        # Group related status files
+        status_groups = defaultdict(list)
+        for path in self.file_contents:
+            filename = Path(path).name
+            if 'COMPLETE' in filename or 'STATUS' in filename or 'FINAL' in filename:
+                # Extract base name
+                base = re.sub(r'_(COMPLETE|FINAL|STATUS).*', '', filename)
+                base = re.sub(r'COMPLETE|FINAL|STATUS', '', base)
+                status_groups[base].append(path)
+        
+        # Check for conflicting statuses
+        for base, files in status_groups.items():
+            if len(files) > 1:
+                statuses = []
+                for file_path in files:
+                    content = self.file_contents[file_path]['content']
+                    for pattern in status_patterns:
+                        matches = re.findall(pattern, content, re.IGNORECASE)
+                        statuses.extend([(file_path, m) for m in matches])
+                
+                if len(set(s[1] for s in statuses)) > 1:
+                    self.inconsistencies.append({
+                        'type': 'conflicting_status',
+                        'files': files,
+                        'issue': f'Multiple status files for {base} with different statuses',
+                        'severity': 'high'
+                    })
+    
+    def _check_references(self):
+        """Check for broken or inconsistent cross-references"""
+        reference_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+        
+        for path, data in self.file_contents.items():
+            content = data['content']
+            matches = re.findall(reference_pattern, content)
+            
+            for link_text, link_path in matches:
+                # Skip external links
+                if link_path.startswith('http'):
+                    continue
+                
+                # Check if referenced file exists
+                if '#' in link_path:
+                    file_path, anchor = link_path.split('#', 1)
+                else:
+                    file_path = link_path
+                    anchor = None
+                
+                # Resolve relative paths
+                if not file_path.startswith('/'):
+                    current_dir = Path(path).parent
+                    resolved = (current_dir / file_path).resolve()
+                    try:
+                        relative_resolved = resolved.relative_to(self.root_dir)
+                    except ValueError:
+                        # Path is outside project root, skip
+                        continue
+                else:
+                    relative_resolved = Path(file_path.lstrip('/'))
+                
+                # Check if file exists
+                full_path = self.root_dir / relative_resolved
+                if not full_path.exists():
+                    self.inconsistencies.append({
+                        'type': 'broken_reference',
+                        'file': path,
+                        'issue': f'Broken link to {link_path}',
+                        'severity': 'medium'
+                    })
+    
+    def _check_config_values(self):
+        """Check for inconsistent configuration values"""
+        # Look for IP addresses, VMIDs, ports
+        ip_pattern = r'192\.168\.11\.(\d+)'
+        vmid_pattern = r'VMID[:\s]+(\d+)'
+        
+        configs_by_component = defaultdict(lambda: defaultdict(set))
+        
+        for path, data in self.file_contents.items():
+            content = data['content']
+            
+            # Extract IPs
+            ips = re.findall(ip_pattern, content)
+            for ip in ips:
+                component = self._identify_component(path)
+                configs_by_component[component]['ips'].add(f'192.168.11.{ip}')
+            
+            # Extract VMIDs
+            vmids = re.findall(vmid_pattern, content, re.IGNORECASE)
+            for vmid in vmids:
+                component = self._identify_component(path)
+                configs_by_component[component]['vmids'].add(vmid)
+        
+        # Check for inconsistencies (same component, different values)
+        for component, configs in configs_by_component.items():
+            if len(configs['ips']) > 10:  # Too many IPs might indicate inconsistency
+                self.inconsistencies.append({
+                    'type': 'too_many_ips',
+                    'component': component,
+                    'issue': f'Component {component} references {len(configs["ips"])} different IPs',
+                    'severity': 'low'
+                })
+    
+    def _check_duplicate_content(self):
+        """Check for duplicate or near-duplicate content"""
+        # Simple check: files with very similar first 10 lines
+        file_signatures = {}
+        
+        for path, data in self.file_contents.items():
+            first_lines = '\n'.join(data['lines'][:10])
+            signature = hash(first_lines)
+            
+            if signature in file_signatures:
+                self.inconsistencies.append({
+                    'type': 'duplicate_intro',
+                    'files': [file_signatures[signature], path],
+                    'issue': 'Files have identical first 10 lines',
+                    'severity': 'low'
+                })
+            else:
+                file_signatures[signature] = path
+    
+    def _identify_component(self, path: str) -> str:
+        """Identify component from file path"""
+        if 'rpc-translator' in path:
+            return 'rpc-translator-138'
+        elif 'besu' in path.lower():
+            return 'besu'
+        elif 'dbis' in path.lower():
+            return 'dbis'
+        elif 'firefly' in path.lower():
+            return 'firefly'
+        else:
+            return 'other'
+    
+    def _generate_report(self) -> Dict:
+        """Generate inconsistency report"""
+        report = {
+            'summary': {
+                'total_inconsistencies': len(self.inconsistencies),
+                'by_type': defaultdict(int),
+                'by_severity': defaultdict(int)
+            },
+            'inconsistencies': []
+        }
+        
+        for inc in self.inconsistencies:
+            report['summary']['by_type'][inc['type']] += 1
+            report['summary']['by_severity'][inc['severity']] += 1
+            report['inconsistencies'].append(inc)
+        
+        return report
+
+def main():
+    root_dir = Path(__file__).parent.parent
+    checker = ContentInconsistencyChecker(root_dir)
+    report = checker.check()
+    
+    # Save report
+    json_file = root_dir / 'CONTENT_INCONSISTENCIES.json'
+    with open(json_file, 'w') as f:
+        json.dump(report, f, indent=2, default=str)
+    print(f"\n✅ Report saved to: {json_file}")
+    
+    # Print summary
+    print("\n📊 Summary:")
+    print(f"  Total inconsistencies: {report['summary']['total_inconsistencies']}")
+    print(f"  By type: {dict(report['summary']['by_type'])}")
+    print(f"  By severity: {dict(report['summary']['by_severity'])}")
+    
+    return report
+
+if __name__ == '__main__':
+    main()
--- a/scripts/cleanup-markdown-files.sh
+++ b/scripts/cleanup-markdown-files.sh
@@ -0,0 +1,319 @@
+#!/bin/bash
+# Markdown Files Cleanup Script
+# Automatically organizes markdown files based on analysis
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$PROJECT_ROOT"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Dry-run mode (set to false to actually move files)
+DRY_RUN=${DRY_RUN:-true}
+
+# Log file
+LOG_FILE="$PROJECT_ROOT/MARKDOWN_CLEANUP_LOG_$(date +%Y%m%d_%H%M%S).log"
+
+log() {
+    echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE"
+}
+
+warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE"
+}
+
+error() {
+    echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE"
+}
+
+success() {
+    echo -e "${GREEN}[OK]${NC} $1" | tee -a "$LOG_FILE"
+}
+
+move_file() {
+    local src="$1"
+    local dest="$2"
+    local reason="$3"
+    
+    if [ ! -f "$src" ]; then
+        warn "File not found: $src"
+        return 1
+    fi
+    
+    # Create destination directory if needed
+    local dest_dir=$(dirname "$dest")
+    if [ "$DRY_RUN" = "true" ]; then
+        log "Would move: $src -> $dest"
+        log "  Reason: $reason"
+        log "  Would create directory: $dest_dir"
+    else
+        mkdir -p "$dest_dir"
+        if mv "$src" "$dest" 2>/dev/null; then
+            success "Moved: $src -> $dest"
+            echo "  Reason: $reason" >> "$LOG_FILE"
+        else
+            error "Failed to move: $src -> $dest"
+            return 1
+        fi
+    fi
+}
+
+# Create necessary directories
+create_directories() {
+    log "Creating directory structure..."
+    local dirs=(
+        "reports/archive/2026-01-05"
+        "reports/status"
+        "reports/inventories"
+        "reports/analyses"
+        "docs/09-troubleshooting/archive"
+        "rpc-translator-138/docs/archive"
+    )
+    
+    for dir in "${dirs[@]}"; do
+        if [ "$DRY_RUN" = "true" ]; then
+            log "Would create: $dir"
+        else
+            mkdir -p "$dir"
+            success "Created: $dir"
+        fi
+    done
+}
+
+# Move timestamped inventory files
+move_timestamped_inventories() {
+    log "Moving timestamped inventory files..."
+    local files=(
+        "CONTAINER_INVENTORY_20260105_142214.md"
+        "CONTAINER_INVENTORY_20260105_142314.md"
+        "CONTAINER_INVENTORY_20260105_142357.md"
+        "CONTAINER_INVENTORY_20260105_142455.md"
+        "CONTAINER_INVENTORY_20260105_142712.md"
+        "CONTAINER_INVENTORY_20260105_142753.md"
+        "CONTAINER_INVENTORY_20260105_142842.md"
+        "CONTAINER_INVENTORY_20260105_144309.md"
+        "CONTAINER_INVENTORY_20260105_153516.md"
+        "CONTAINER_INVENTORY_20260105_154200.md"
+        "SERVICE_DEPENDENCIES_20260105_143608.md"
+        "SERVICE_DEPENDENCIES_20260105_143624.md"
+        "IP_AVAILABILITY_20260105_143535.md"
+        "DHCP_CONTAINERS_20260105_143507.md"
+    )
+    
+    for file in "${files[@]}"; do
+        if [ -f "$file" ]; then
+            move_file "$file" "reports/archive/2026-01-05/$file" "Timestamped inventory/report file"
+        fi
+    done
+}
+
+# Move status/completion reports from root to reports
+move_status_reports() {
+    log "Moving status/completion reports from root..."
+    
+    # Pattern matching for status reports
+    find . -maxdepth 1 -name "*.md" -type f | while read -r file; do
+        filename=$(basename "$file")
+        
+        # Skip essential files
+        if [[ "$filename" == "README.md" ]] || [[ "$filename" == "PROJECT_STRUCTURE.md" ]]; then
+            continue
+        fi
+        
+        # Check if it's a status/report file
+        if [[ "$filename" =~ (STATUS|COMPLETE|FINAL|REPORT|SUMMARY|ANALYSIS|DIAGNOSTIC|INVENTORY) ]]; then
+            move_file "$file" "reports/status/$filename" "Status/completion report in root"
+        fi
+    done
+}
+
+# Move VMID-specific reports
+move_vmid_reports() {
+    log "Moving VMID-specific reports..."
+    
+    find . -maxdepth 1 -name "VMID*.md" -type f | while read -r file; do
+        filename=$(basename "$file")
+        move_file "$file" "reports/$filename" "VMID-specific report"
+    done
+}
+
+# Move IP conflict and network analysis reports
+move_network_reports() {
+    log "Moving network analysis reports..."
+    
+    local files=(
+        "IP_CONFLICT_ANALYSIS.md"
+        "IP_CONFLICT_192.168.11.14_RESOLUTION.md"
+        "IP_CONFLICTS_RESOLUTION_COMPLETE.md"
+        "VMID_IP_CONFLICTS_ANALYSIS.md"
+        "VMID_IP_ADDRESS_LIST.md"
+        "FINAL_VMID_IP_MAPPING.md"
+        "IP_ASSIGNMENT_PLAN.md"
+        "PHASE1_IP_CONFLICT_RESOLUTION.md"
+        "PHASE1_IP_INVESTIGATION_COMPLETE.md"
+        "PHASE1_IP_INVESTIGATION_STATUS.md"
+        "R630-04_IP_CONFLICT_DISCOVERY.md"
+        "RESERVED_IP_CONFLICTS_ANALYSIS.md"
+        "RESERVED_IP_FIX_COMPLETE.md"
+        "RESERVED_IP_FIX_COMPLETE_FINAL.md"
+        "RESERVED_IP_FIX_SUMMARY.md"
+        "DHCP_CONTAINERS_LIST.md"
+        "DHCP_TO_STATIC_CONVERSION_COMPLETE.md"
+        "DHCP_TO_STATIC_CONVERSION_FINAL_REPORT.md"
+    )
+    
+    for file in "${files[@]}"; do
+        if [ -f "$file" ]; then
+            move_file "$file" "reports/analyses/$file" "Network/IP analysis report"
+        fi
+    done
+}
+
+# Move service status reports
+move_service_reports() {
+    log "Moving service status reports..."
+    
+    local files=(
+        "BLOCK_PRODUCTION_REVIEW.md"
+        "BLOCK_PRODUCTION_STATUS.md"
+        "SERVICE_VERIFICATION_REPORT.md"
+        "RPC_ENDPOINT_DIAGNOSTICS_REPORT.md"
+        "RPC_SSL_ISSUE_SUMMARY.md"
+        "RPC_TRANSACTION_FAILURE_INVESTIGATION.md"
+        "RPC_TRANSACTION_FAILURE_ROOT_CAUSE.md"
+        "BESU_*.md"
+        "FIREFLY_*.md"
+        "DBIS_*.md"
+        "EXPLORER_*.md"
+        "BLOCKSCOUT_*.md"
+    )
+    
+    # Handle specific files
+    for pattern in "${files[@]}"; do
+        find . -maxdepth 1 -name "$pattern" -type f | while read -r file; do
+            filename=$(basename "$file")
+            # Skip if it's a script or config file
+            if [[ ! "$filename" =~ (\.sh|\.py|\.js|\.json)$ ]]; then
+                move_file "$file" "reports/status/$filename" "Service status report"
+            fi
+        done
+    done
+}
+
+# Move temporary fix guides from rpc-translator-138
+move_rpc_translator_temp_files() {
+    log "Moving temporary files from rpc-translator-138..."
+    
+    if [ ! -d "rpc-translator-138" ]; then
+        warn "rpc-translator-138 directory not found"
+        return
+    fi
+    
+    local temp_patterns=(
+        "FIX_*.md"
+        "QUICK_FIX*.md"
+        "RUN_NOW.md"
+        "EXECUTE_NOW.md"
+        "EXECUTION_READY.md"
+        "LOAD_KEYS_NOW.md"
+        "FIX_PERMISSIONS*.md"
+        "*COMPLETE*.md"
+        "*FINAL*.md"
+        "*STATUS*.md"
+    )
+    
+    for pattern in "${temp_patterns[@]}"; do
+        find rpc-translator-138 -maxdepth 1 -name "$pattern" -type f | while read -r file; do
+            filename=$(basename "$file")
+            # Skip README and important docs
+            if [[ "$filename" != "README.md" ]] && [[ ! "$filename" =~ ^(DEPLOYMENT|API_METHODS|QUICK_REFERENCE|QUICK_START|QUICK_SETUP) ]]; then
+                move_file "$file" "rpc-translator-138/docs/archive/$filename" "Temporary fix/status file"
+            fi
+        done
+    done
+}
+
+# Move completion/migration status files from docs
+move_docs_status_files() {
+    log "Moving status files from docs directory..."
+    
+    if [ ! -d "docs" ]; then
+        warn "docs directory not found"
+        return
+    fi
+    
+    find docs -maxdepth 1 -name "*COMPLETE*.md" -o -name "*FINAL*.md" -o -name "*MIGRATION*.md" | while read -r file; do
+        filename=$(basename "$file")
+        # Skip if it's actual documentation
+        if [[ ! "$filename" =~ ^(DOCUMENTATION|CONTRIBUTOR|STYLE|GUIDE|README) ]]; then
+            move_file "$file" "reports/$filename" "Status file in docs directory"
+        fi
+    done
+}
+
+# Consolidate duplicate status files (keep most recent)
+consolidate_duplicates() {
+    log "Identifying duplicate status files..."
+    
+    # This is a placeholder - actual consolidation requires content comparison
+    # For now, we'll just log potential duplicates
+    local status_files=(
+        "ALL_TASKS_COMPLETE.md"
+        "ALL_TASKS_COMPLETE_FINAL.md"
+        "ALL_STEPS_COMPLETE.md"
+        "ALL_NEXT_STEPS_COMPLETE.md"
+    )
+    
+    for file in "${status_files[@]}"; do
+        if [ -f "$file" ]; then
+            warn "Potential duplicate: $file (consider consolidating)"
+        fi
+    done
+}
+
+# Main execution
+main() {
+    log "========================================="
+    log "Markdown Files Cleanup Script"
+    log "========================================="
+    log "Project Root: $PROJECT_ROOT"
+    log "Dry Run: $DRY_RUN"
+    log "Log File: $LOG_FILE"
+    log ""
+    
+    if [ "$DRY_RUN" = "true" ]; then
+        warn "DRY RUN MODE - No files will be moved"
+        warn "Set DRY_RUN=false to actually move files"
+        log ""
+    fi
+    
+    create_directories
+    move_timestamped_inventories
+    move_status_reports
+    move_vmid_reports
+    move_network_reports
+    move_service_reports
+    move_rpc_translator_temp_files
+    move_docs_status_files
+    consolidate_duplicates
+    
+    log ""
+    log "========================================="
+    log "Cleanup complete!"
+    log "========================================="
+    
+    if [ "$DRY_RUN" = "true" ]; then
+        log ""
+        log "Review the log above, then run with:"
+        log "  DRY_RUN=false $0"
+    fi
+}
+
+main "$@"