Add markdown file analysis tools and reports

This commit is contained in:
defiQUG
2026-01-06 01:42:29 -08:00
parent b45c2006be
commit 1edcec953c
20 changed files with 9323 additions and 8 deletions

365
scripts/analyze-markdown-files.py Executable file
View File

@@ -0,0 +1,365 @@
#!/usr/bin/env python3
"""
Comprehensive Markdown File Analysis Script
Analyzes all markdown files in the project for:
- File dates (creation, modification)
- Duplicate patterns
- Misplaced files
- Content inconsistencies
- Outdated information
"""
import os
import re
import json
from pathlib import Path
from datetime import datetime
from collections import defaultdict
from typing import Dict, List, Tuple, Set
import hashlib
class MarkdownAnalyzer:
def __init__(self, root_dir: str):
self.root_dir = Path(root_dir)
self.files = []
self.duplicates = defaultdict(list)
self.patterns = {
'complete': [],
'final': [],
'status': [],
'timestamped': [],
'fix': [],
'report': [],
'temporary': []
}
self.misplaced = []
self.content_hashes = {}
self.file_metadata = []
def analyze(self):
"""Run full analysis"""
print("🔍 Scanning markdown files...")
self._scan_files()
print(f"📊 Found {len(self.files)} markdown files")
print("\n📅 Analyzing file dates...")
self._analyze_dates()
print("\n🔎 Identifying patterns...")
self._identify_patterns()
print("\n📍 Finding misplaced files...")
self._find_misplaced()
print("\n🔗 Checking for duplicates...")
self._check_duplicates()
print("\n📝 Analyzing content...")
self._analyze_content()
return self._generate_report()
def _scan_files(self):
"""Scan for all markdown files"""
exclude_dirs = {'.git', 'node_modules', '__pycache__', '.next', 'dist', 'build', 'venv', '.venv'}
for md_file in self.root_dir.rglob('*.md'):
# Skip excluded directories
if any(part in exclude_dirs for part in md_file.parts):
continue
try:
stat = md_file.stat()
rel_path = md_file.relative_to(self.root_dir)
self.files.append({
'path': str(rel_path),
'full_path': str(md_file),
'size': stat.st_size,
'modified': datetime.fromtimestamp(stat.st_mtime),
'accessed': datetime.fromtimestamp(stat.st_atime),
'created': datetime.fromtimestamp(stat.st_ctime) if hasattr(stat, 'st_birthtime') else None,
'directory': str(rel_path.parent),
'name': md_file.name
})
except (OSError, PermissionError) as e:
print(f"⚠️ Error accessing {md_file}: {e}")
def _analyze_dates(self):
"""Analyze file modification dates"""
now = datetime.now()
for file_info in self.files:
modified = file_info['modified']
days_old = (now - modified).days
file_info['days_old'] = days_old
file_info['age_category'] = (
'recent' if days_old < 7 else
'recent' if days_old < 30 else
'moderate' if days_old < 90 else
'old' if days_old < 365 else
'very_old'
)
def _identify_patterns(self):
"""Identify files by naming patterns"""
patterns = {
'complete': re.compile(r'COMPLETE', re.I),
'final': re.compile(r'FINAL', re.I),
'status': re.compile(r'STATUS', re.I),
'timestamped': re.compile(r'_\d{8}_\d{6}|\d{8}_\d{6}'),
'fix': re.compile(r'FIX|QUICK_FIX|RUN_NOW|EXECUTE', re.I),
'report': re.compile(r'REPORT|SUMMARY|ANALYSIS|DIAGNOSTIC', re.I),
'temporary': re.compile(r'NOW|READY|EXECUTE|RUN_', re.I)
}
for file_info in self.files:
name = file_info['name']
for pattern_name, pattern in patterns.items():
if pattern.search(name):
self.patterns[pattern_name].append(file_info)
file_info[f'has_{pattern_name}'] = True
def _find_misplaced(self):
"""Find files in wrong locations"""
root_files = [f for f in self.files if f['directory'] == '.']
docs_files = [f for f in self.files if f['directory'].startswith('docs')]
reports_files = [f for f in self.files if f['directory'].startswith('reports')]
# Reports in root
for f in root_files:
if any(keyword in f['name'].upper() for keyword in ['REPORT', 'STATUS', 'INVENTORY', 'DIAGNOSTIC', 'ANALYSIS']):
if not f['name'] in ['README.md', 'PROJECT_STRUCTURE.md']:
self.misplaced.append({
'file': f,
'current': 'root',
'should_be': 'reports/',
'reason': 'Report file in root directory'
})
# Status/completion files in docs
for f in docs_files:
if any(keyword in f['name'].upper() for keyword in ['COMPLETE', 'FINAL', 'STATUS', 'MIGRATION_COMPLETE']):
self.misplaced.append({
'file': f,
'current': f['directory'],
'should_be': 'reports/',
'reason': 'Status/completion report in docs directory'
})
# Temporary fix guides in project root
for f in root_files:
if any(keyword in f['name'].upper() for keyword in ['FIX_', 'QUICK_FIX', 'RUN_NOW', 'EXECUTE']):
self.misplaced.append({
'file': f,
'current': 'root',
'should_be': 'docs/09-troubleshooting/archive/',
'reason': 'Temporary fix guide in root'
})
def _check_duplicates(self):
"""Check for duplicate content"""
for file_info in self.files:
try:
with open(file_info['full_path'], 'rb') as f:
content_hash = hashlib.md5(f.read()).hexdigest()
if content_hash in self.content_hashes:
self.duplicates[content_hash].append(file_info)
else:
self.content_hashes[content_hash] = [file_info]
except Exception as e:
pass
def _analyze_content(self):
"""Analyze file content for issues"""
for file_info in self.files:
try:
with open(file_info['full_path'], 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
lines = content.split('\n')
file_info['line_count'] = len(lines)
file_info['has_todo'] = 'TODO' in content or 'FIXME' in content
file_info['has_deprecated'] = 'DEPRECATED' in content or 'OBSOLETE' in content
file_info['has_date'] = bool(re.search(r'\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}', content))
# Check for placeholder dates
if re.search(r'\$\(date\)|date \+', content):
file_info['has_placeholder_date'] = True
except Exception as e:
file_info['line_count'] = 0
file_info['error'] = str(e)
def _generate_report(self) -> Dict:
"""Generate comprehensive report"""
report = {
'summary': {
'total_files': len(self.files),
'total_size_mb': sum(f['size'] for f in self.files) / (1024 * 1024),
'by_age': defaultdict(int),
'by_directory': defaultdict(int)
},
'patterns': {},
'misplaced': [],
'duplicates': [],
'old_files': [],
'empty_files': [],
'issues': []
}
# Summary stats
for f in self.files:
report['summary']['by_age'][f['age_category']] += 1
report['summary']['by_directory'][f['directory']] += 1
# Pattern counts
for pattern_name, files in self.patterns.items():
report['patterns'][pattern_name] = {
'count': len(files),
'files': [f['path'] for f in files[:20]] # Limit to 20
}
# Misplaced files
report['misplaced'] = [
{
'path': m['file']['path'],
'current': m['current'],
'should_be': m['should_be'],
'reason': m['reason']
}
for m in self.misplaced
]
# Duplicate content
for hash_val, files in self.duplicates.items():
if len(files) > 1:
report['duplicates'].append({
'hash': hash_val[:8],
'count': len(files),
'files': [f['path'] for f in files]
})
# Old files (>90 days)
report['old_files'] = [
{
'path': f['path'],
'days_old': f['days_old'],
'modified': f['modified'].isoformat()
}
for f in self.files if f['days_old'] > 90
]
# Empty or very small files
report['empty_files'] = [
{
'path': f['path'],
'size': f['size'],
'line_count': f.get('line_count', 0)
}
for f in self.files if f['size'] < 100 or f.get('line_count', 0) < 5
]
# Issues
for f in self.files:
issues = []
if f.get('has_placeholder_date'):
issues.append('Contains placeholder date')
if f.get('has_deprecated'):
issues.append('Marks itself as deprecated')
if f['days_old'] > 365:
issues.append('Very old (>1 year)')
if f['size'] < 50:
issues.append('Very small file')
if issues:
report['issues'].append({
'path': f['path'],
'issues': issues
})
return report
def main():
root_dir = Path(__file__).parent.parent
analyzer = MarkdownAnalyzer(root_dir)
report = analyzer.analyze()
# Save JSON report
json_file = root_dir / 'MARKDOWN_ANALYSIS.json'
with open(json_file, 'w') as f:
json.dump(report, f, indent=2, default=str)
print(f"\n✅ JSON report saved to: {json_file}")
# Generate markdown report
md_file = root_dir / 'MARKDOWN_ANALYSIS_REPORT.md'
with open(md_file, 'w') as f:
f.write(generate_markdown_report(report))
print(f"✅ Markdown report saved to: {md_file}")
return report
def generate_markdown_report(report: Dict) -> str:
"""Generate human-readable markdown report"""
md = []
md.append("# Markdown Files Analysis Report\n")
md.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
# Summary
md.append("## Summary\n")
md.append(f"- **Total Files**: {report['summary']['total_files']}")
md.append(f"- **Total Size**: {report['summary']['total_size_mb']:.2f} MB\n")
md.append("### Files by Age\n")
for age, count in sorted(report['summary']['by_age'].items()):
md.append(f"- **{age.title()}**: {count}")
md.append("")
# Patterns
md.append("## File Patterns\n")
for pattern_name, data in report['patterns'].items():
md.append(f"### {pattern_name.title()} ({data['count']} files)\n")
for file_path in data['files'][:10]:
md.append(f"- `{file_path}`")
if data['count'] > 10:
md.append(f"- ... and {data['count'] - 10} more")
md.append("")
# Misplaced files
md.append("## Misplaced Files\n")
md.append(f"Found **{len(report['misplaced'])}** misplaced files:\n")
for m in report['misplaced'][:50]:
md.append(f"- **{m['path']}**")
md.append(f" - Current: `{m['current']}`")
md.append(f" - Should be: `{m['should_be']}`")
md.append(f" - Reason: {m['reason']}\n")
# Duplicates
md.append("## Duplicate Content\n")
md.append(f"Found **{len(report['duplicates'])}** sets of duplicate files:\n")
for dup in report['duplicates'][:20]:
md.append(f"- **{dup['count']} files** with same content:")
for file_path in dup['files']:
md.append(f" - `{file_path}`")
md.append("")
# Old files
md.append("## Old Files (>90 days)\n")
md.append(f"Found **{len(report['old_files'])}** old files:\n")
for f in sorted(report['old_files'], key=lambda x: x['days_old'], reverse=True)[:50]:
md.append(f"- **{f['path']}** ({f['days_old']} days old, modified: {f['modified'][:10]})")
md.append("")
# Issues
md.append("## Files with Issues\n")
md.append(f"Found **{len(report['issues'])}** files with issues:\n")
for issue in report['issues'][:50]:
md.append(f"- **{issue['path']}**")
for i in issue['issues']:
md.append(f" - {i}")
md.append("")
return "\n".join(md)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,308 @@
#!/usr/bin/env python3
"""
Content Inconsistency Checker
Compares related markdown files for inconsistencies in:
- Dates
- Status information
- Configuration values
- References to other files
"""
import os
import re
import json
from pathlib import Path
from collections import defaultdict
from typing import Dict, List, Set, Tuple
from datetime import datetime
class ContentInconsistencyChecker:
def __init__(self, root_dir: str):
self.root_dir = Path(root_dir)
self.inconsistencies = []
self.file_contents = {}
def check(self):
"""Run all consistency checks"""
print("🔍 Checking content inconsistencies...")
# Load file contents
self._load_files()
# Check for inconsistencies
print("\n📅 Checking date inconsistencies...")
self._check_dates()
print("\n📊 Checking status inconsistencies...")
self._check_status()
print("\n🔗 Checking cross-references...")
self._check_references()
print("\n⚙️ Checking configuration values...")
self._check_config_values()
print("\n📝 Checking duplicate content...")
self._check_duplicate_content()
return self._generate_report()
def _load_files(self):
"""Load markdown file contents"""
exclude_dirs = {'.git', 'node_modules', '__pycache__', '.next', 'dist', 'build', 'venv'}
for md_file in self.root_dir.rglob('*.md'):
if any(part in exclude_dirs for part in md_file.parts):
continue
try:
with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
rel_path = str(md_file.relative_to(self.root_dir))
self.file_contents[rel_path] = {
'content': content,
'path': rel_path,
'lines': content.split('\n')
}
except Exception as e:
pass
def _check_dates(self):
"""Check for inconsistent dates"""
date_patterns = [
r'(\d{4}-\d{2}-\d{2})', # YYYY-MM-DD
r'(\d{1,2}/\d{1,2}/\d{4})', # MM/DD/YYYY
r'Date[:\s]+(\d{4}-\d{2}-\d{2})',
r'Generated[:\s]+(\d{4}-\d{2}-\d{2})',
r'Last Updated[:\s]+(\d{4}-\d{2}-\d{2})',
]
# Group files by project/component
project_files = defaultdict(list)
for path in self.file_contents:
if 'rpc-translator-138' in path:
project_files['rpc-translator-138'].append(path)
elif path.startswith('docs/'):
project_files['docs'].append(path)
elif path.startswith('reports/'):
project_files['reports'].append(path)
elif '/' not in path or path.count('/') == 0:
project_files['root'].append(path)
# Check dates within each project
for project, files in project_files.items():
dates_found = []
for file_path in files:
content = self.file_contents[file_path]['content']
for pattern in date_patterns:
matches = re.findall(pattern, content)
for match in matches:
dates_found.append((file_path, match))
# Check for very old dates (>1 year)
now = datetime.now()
for file_path, date_str in dates_found:
try:
if '-' in date_str:
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
elif '/' in date_str:
parts = date_str.split('/')
if len(parts) == 3:
date_obj = datetime.strptime(date_str, '%m/%d/%Y')
else:
continue
else:
continue
days_diff = (now - date_obj).days
if days_diff > 365:
self.inconsistencies.append({
'type': 'old_date',
'file': file_path,
'issue': f'Date {date_str} is {days_diff} days old',
'severity': 'medium'
})
except:
pass
def _check_status(self):
"""Check for inconsistent status information"""
status_patterns = [
r'Status[:\s]+([✅❌🔄⚠️]+|COMPLETE|INCOMPLETE|PENDING|ACTIVE|INACTIVE)',
r'\*\*Status\*\*[:\s]+([✅❌🔄⚠️]+|COMPLETE|INCOMPLETE|PENDING)',
]
# Group related status files
status_groups = defaultdict(list)
for path in self.file_contents:
filename = Path(path).name
if 'COMPLETE' in filename or 'STATUS' in filename or 'FINAL' in filename:
# Extract base name
base = re.sub(r'_(COMPLETE|FINAL|STATUS).*', '', filename)
base = re.sub(r'COMPLETE|FINAL|STATUS', '', base)
status_groups[base].append(path)
# Check for conflicting statuses
for base, files in status_groups.items():
if len(files) > 1:
statuses = []
for file_path in files:
content = self.file_contents[file_path]['content']
for pattern in status_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
statuses.extend([(file_path, m) for m in matches])
if len(set(s[1] for s in statuses)) > 1:
self.inconsistencies.append({
'type': 'conflicting_status',
'files': files,
'issue': f'Multiple status files for {base} with different statuses',
'severity': 'high'
})
def _check_references(self):
"""Check for broken or inconsistent cross-references"""
reference_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
for path, data in self.file_contents.items():
content = data['content']
matches = re.findall(reference_pattern, content)
for link_text, link_path in matches:
# Skip external links
if link_path.startswith('http'):
continue
# Check if referenced file exists
if '#' in link_path:
file_path, anchor = link_path.split('#', 1)
else:
file_path = link_path
anchor = None
# Resolve relative paths
if not file_path.startswith('/'):
current_dir = Path(path).parent
resolved = (current_dir / file_path).resolve()
try:
relative_resolved = resolved.relative_to(self.root_dir)
except ValueError:
# Path is outside project root, skip
continue
else:
relative_resolved = Path(file_path.lstrip('/'))
# Check if file exists
full_path = self.root_dir / relative_resolved
if not full_path.exists():
self.inconsistencies.append({
'type': 'broken_reference',
'file': path,
'issue': f'Broken link to {link_path}',
'severity': 'medium'
})
def _check_config_values(self):
"""Check for inconsistent configuration values"""
# Look for IP addresses, VMIDs, ports
ip_pattern = r'192\.168\.11\.(\d+)'
vmid_pattern = r'VMID[:\s]+(\d+)'
configs_by_component = defaultdict(lambda: defaultdict(set))
for path, data in self.file_contents.items():
content = data['content']
# Extract IPs
ips = re.findall(ip_pattern, content)
for ip in ips:
component = self._identify_component(path)
configs_by_component[component]['ips'].add(f'192.168.11.{ip}')
# Extract VMIDs
vmids = re.findall(vmid_pattern, content, re.IGNORECASE)
for vmid in vmids:
component = self._identify_component(path)
configs_by_component[component]['vmids'].add(vmid)
# Check for inconsistencies (same component, different values)
for component, configs in configs_by_component.items():
if len(configs['ips']) > 10: # Too many IPs might indicate inconsistency
self.inconsistencies.append({
'type': 'too_many_ips',
'component': component,
'issue': f'Component {component} references {len(configs["ips"])} different IPs',
'severity': 'low'
})
def _check_duplicate_content(self):
"""Check for duplicate or near-duplicate content"""
# Simple check: files with very similar first 10 lines
file_signatures = {}
for path, data in self.file_contents.items():
first_lines = '\n'.join(data['lines'][:10])
signature = hash(first_lines)
if signature in file_signatures:
self.inconsistencies.append({
'type': 'duplicate_intro',
'files': [file_signatures[signature], path],
'issue': 'Files have identical first 10 lines',
'severity': 'low'
})
else:
file_signatures[signature] = path
def _identify_component(self, path: str) -> str:
"""Identify component from file path"""
if 'rpc-translator' in path:
return 'rpc-translator-138'
elif 'besu' in path.lower():
return 'besu'
elif 'dbis' in path.lower():
return 'dbis'
elif 'firefly' in path.lower():
return 'firefly'
else:
return 'other'
def _generate_report(self) -> Dict:
"""Generate inconsistency report"""
report = {
'summary': {
'total_inconsistencies': len(self.inconsistencies),
'by_type': defaultdict(int),
'by_severity': defaultdict(int)
},
'inconsistencies': []
}
for inc in self.inconsistencies:
report['summary']['by_type'][inc['type']] += 1
report['summary']['by_severity'][inc['severity']] += 1
report['inconsistencies'].append(inc)
return report
def main():
root_dir = Path(__file__).parent.parent
checker = ContentInconsistencyChecker(root_dir)
report = checker.check()
# Save report
json_file = root_dir / 'CONTENT_INCONSISTENCIES.json'
with open(json_file, 'w') as f:
json.dump(report, f, indent=2, default=str)
print(f"\n✅ Report saved to: {json_file}")
# Print summary
print("\n📊 Summary:")
print(f" Total inconsistencies: {report['summary']['total_inconsistencies']}")
print(f" By type: {dict(report['summary']['by_type'])}")
print(f" By severity: {dict(report['summary']['by_severity'])}")
return report
if __name__ == '__main__':
main()

319
scripts/cleanup-markdown-files.sh Executable file
View File

@@ -0,0 +1,319 @@
#!/bin/bash
# Markdown Files Cleanup Script
# Automatically organizes markdown files based on analysis
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$PROJECT_ROOT"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Dry-run mode (set to false to actually move files)
DRY_RUN=${DRY_RUN:-true}
# Log file
LOG_FILE="$PROJECT_ROOT/MARKDOWN_CLEANUP_LOG_$(date +%Y%m%d_%H%M%S).log"
log() {
echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE"
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE"
}
success() {
echo -e "${GREEN}[OK]${NC} $1" | tee -a "$LOG_FILE"
}
move_file() {
local src="$1"
local dest="$2"
local reason="$3"
if [ ! -f "$src" ]; then
warn "File not found: $src"
return 1
fi
# Create destination directory if needed
local dest_dir=$(dirname "$dest")
if [ "$DRY_RUN" = "true" ]; then
log "Would move: $src -> $dest"
log " Reason: $reason"
log " Would create directory: $dest_dir"
else
mkdir -p "$dest_dir"
if mv "$src" "$dest" 2>/dev/null; then
success "Moved: $src -> $dest"
echo " Reason: $reason" >> "$LOG_FILE"
else
error "Failed to move: $src -> $dest"
return 1
fi
fi
}
# Create necessary directories
create_directories() {
log "Creating directory structure..."
local dirs=(
"reports/archive/2026-01-05"
"reports/status"
"reports/inventories"
"reports/analyses"
"docs/09-troubleshooting/archive"
"rpc-translator-138/docs/archive"
)
for dir in "${dirs[@]}"; do
if [ "$DRY_RUN" = "true" ]; then
log "Would create: $dir"
else
mkdir -p "$dir"
success "Created: $dir"
fi
done
}
# Move timestamped inventory files
move_timestamped_inventories() {
log "Moving timestamped inventory files..."
local files=(
"CONTAINER_INVENTORY_20260105_142214.md"
"CONTAINER_INVENTORY_20260105_142314.md"
"CONTAINER_INVENTORY_20260105_142357.md"
"CONTAINER_INVENTORY_20260105_142455.md"
"CONTAINER_INVENTORY_20260105_142712.md"
"CONTAINER_INVENTORY_20260105_142753.md"
"CONTAINER_INVENTORY_20260105_142842.md"
"CONTAINER_INVENTORY_20260105_144309.md"
"CONTAINER_INVENTORY_20260105_153516.md"
"CONTAINER_INVENTORY_20260105_154200.md"
"SERVICE_DEPENDENCIES_20260105_143608.md"
"SERVICE_DEPENDENCIES_20260105_143624.md"
"IP_AVAILABILITY_20260105_143535.md"
"DHCP_CONTAINERS_20260105_143507.md"
)
for file in "${files[@]}"; do
if [ -f "$file" ]; then
move_file "$file" "reports/archive/2026-01-05/$file" "Timestamped inventory/report file"
fi
done
}
# Move status/completion reports from root to reports
move_status_reports() {
log "Moving status/completion reports from root..."
# Pattern matching for status reports
find . -maxdepth 1 -name "*.md" -type f | while read -r file; do
filename=$(basename "$file")
# Skip essential files
if [[ "$filename" == "README.md" ]] || [[ "$filename" == "PROJECT_STRUCTURE.md" ]]; then
continue
fi
# Check if it's a status/report file
if [[ "$filename" =~ (STATUS|COMPLETE|FINAL|REPORT|SUMMARY|ANALYSIS|DIAGNOSTIC|INVENTORY) ]]; then
move_file "$file" "reports/status/$filename" "Status/completion report in root"
fi
done
}
# Move VMID-specific reports
move_vmid_reports() {
log "Moving VMID-specific reports..."
find . -maxdepth 1 -name "VMID*.md" -type f | while read -r file; do
filename=$(basename "$file")
move_file "$file" "reports/$filename" "VMID-specific report"
done
}
# Move IP conflict and network analysis reports
move_network_reports() {
log "Moving network analysis reports..."
local files=(
"IP_CONFLICT_ANALYSIS.md"
"IP_CONFLICT_192.168.11.14_RESOLUTION.md"
"IP_CONFLICTS_RESOLUTION_COMPLETE.md"
"VMID_IP_CONFLICTS_ANALYSIS.md"
"VMID_IP_ADDRESS_LIST.md"
"FINAL_VMID_IP_MAPPING.md"
"IP_ASSIGNMENT_PLAN.md"
"PHASE1_IP_CONFLICT_RESOLUTION.md"
"PHASE1_IP_INVESTIGATION_COMPLETE.md"
"PHASE1_IP_INVESTIGATION_STATUS.md"
"R630-04_IP_CONFLICT_DISCOVERY.md"
"RESERVED_IP_CONFLICTS_ANALYSIS.md"
"RESERVED_IP_FIX_COMPLETE.md"
"RESERVED_IP_FIX_COMPLETE_FINAL.md"
"RESERVED_IP_FIX_SUMMARY.md"
"DHCP_CONTAINERS_LIST.md"
"DHCP_TO_STATIC_CONVERSION_COMPLETE.md"
"DHCP_TO_STATIC_CONVERSION_FINAL_REPORT.md"
)
for file in "${files[@]}"; do
if [ -f "$file" ]; then
move_file "$file" "reports/analyses/$file" "Network/IP analysis report"
fi
done
}
# Move service status reports
move_service_reports() {
log "Moving service status reports..."
local files=(
"BLOCK_PRODUCTION_REVIEW.md"
"BLOCK_PRODUCTION_STATUS.md"
"SERVICE_VERIFICATION_REPORT.md"
"RPC_ENDPOINT_DIAGNOSTICS_REPORT.md"
"RPC_SSL_ISSUE_SUMMARY.md"
"RPC_TRANSACTION_FAILURE_INVESTIGATION.md"
"RPC_TRANSACTION_FAILURE_ROOT_CAUSE.md"
"BESU_*.md"
"FIREFLY_*.md"
"DBIS_*.md"
"EXPLORER_*.md"
"BLOCKSCOUT_*.md"
)
# Handle specific files
for pattern in "${files[@]}"; do
find . -maxdepth 1 -name "$pattern" -type f | while read -r file; do
filename=$(basename "$file")
# Skip if it's a script or config file
if [[ ! "$filename" =~ (\.sh|\.py|\.js|\.json)$ ]]; then
move_file "$file" "reports/status/$filename" "Service status report"
fi
done
done
}
# Move temporary fix guides from rpc-translator-138
move_rpc_translator_temp_files() {
log "Moving temporary files from rpc-translator-138..."
if [ ! -d "rpc-translator-138" ]; then
warn "rpc-translator-138 directory not found"
return
fi
local temp_patterns=(
"FIX_*.md"
"QUICK_FIX*.md"
"RUN_NOW.md"
"EXECUTE_NOW.md"
"EXECUTION_READY.md"
"LOAD_KEYS_NOW.md"
"FIX_PERMISSIONS*.md"
"*COMPLETE*.md"
"*FINAL*.md"
"*STATUS*.md"
)
for pattern in "${temp_patterns[@]}"; do
find rpc-translator-138 -maxdepth 1 -name "$pattern" -type f | while read -r file; do
filename=$(basename "$file")
# Skip README and important docs
if [[ "$filename" != "README.md" ]] && [[ ! "$filename" =~ ^(DEPLOYMENT|API_METHODS|QUICK_REFERENCE|QUICK_START|QUICK_SETUP) ]]; then
move_file "$file" "rpc-translator-138/docs/archive/$filename" "Temporary fix/status file"
fi
done
done
}
# Move completion/migration status files from docs
move_docs_status_files() {
log "Moving status files from docs directory..."
if [ ! -d "docs" ]; then
warn "docs directory not found"
return
fi
find docs -maxdepth 1 -name "*COMPLETE*.md" -o -name "*FINAL*.md" -o -name "*MIGRATION*.md" | while read -r file; do
filename=$(basename "$file")
# Skip if it's actual documentation
if [[ ! "$filename" =~ ^(DOCUMENTATION|CONTRIBUTOR|STYLE|GUIDE|README) ]]; then
move_file "$file" "reports/$filename" "Status file in docs directory"
fi
done
}
# Consolidate duplicate status files (keep most recent)
consolidate_duplicates() {
log "Identifying duplicate status files..."
# This is a placeholder - actual consolidation requires content comparison
# For now, we'll just log potential duplicates
local status_files=(
"ALL_TASKS_COMPLETE.md"
"ALL_TASKS_COMPLETE_FINAL.md"
"ALL_STEPS_COMPLETE.md"
"ALL_NEXT_STEPS_COMPLETE.md"
)
for file in "${status_files[@]}"; do
if [ -f "$file" ]; then
warn "Potential duplicate: $file (consider consolidating)"
fi
done
}
# Main execution
main() {
log "========================================="
log "Markdown Files Cleanup Script"
log "========================================="
log "Project Root: $PROJECT_ROOT"
log "Dry Run: $DRY_RUN"
log "Log File: $LOG_FILE"
log ""
if [ "$DRY_RUN" = "true" ]; then
warn "DRY RUN MODE - No files will be moved"
warn "Set DRY_RUN=false to actually move files"
log ""
fi
create_directories
move_timestamped_inventories
move_status_reports
move_vmid_reports
move_network_reports
move_service_reports
move_rpc_translator_temp_files
move_docs_status_files
consolidate_duplicates
log ""
log "========================================="
log "Cleanup complete!"
log "========================================="
if [ "$DRY_RUN" = "true" ]; then
log ""
log "Review the log above, then run with:"
log " DRY_RUN=false $0"
fi
}
main "$@"