Add markdown file analysis tools and reports
This commit is contained in:
365
scripts/analyze-markdown-files.py
Executable file
365
scripts/analyze-markdown-files.py
Executable file
@@ -0,0 +1,365 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comprehensive Markdown File Analysis Script
|
||||
Analyzes all markdown files in the project for:
|
||||
- File dates (creation, modification)
|
||||
- Duplicate patterns
|
||||
- Misplaced files
|
||||
- Content inconsistencies
|
||||
- Outdated information
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Tuple, Set
|
||||
import hashlib
|
||||
|
||||
class MarkdownAnalyzer:
|
||||
def __init__(self, root_dir: str):
|
||||
self.root_dir = Path(root_dir)
|
||||
self.files = []
|
||||
self.duplicates = defaultdict(list)
|
||||
self.patterns = {
|
||||
'complete': [],
|
||||
'final': [],
|
||||
'status': [],
|
||||
'timestamped': [],
|
||||
'fix': [],
|
||||
'report': [],
|
||||
'temporary': []
|
||||
}
|
||||
self.misplaced = []
|
||||
self.content_hashes = {}
|
||||
self.file_metadata = []
|
||||
|
||||
def analyze(self):
|
||||
"""Run full analysis"""
|
||||
print("🔍 Scanning markdown files...")
|
||||
self._scan_files()
|
||||
print(f"📊 Found {len(self.files)} markdown files")
|
||||
|
||||
print("\n📅 Analyzing file dates...")
|
||||
self._analyze_dates()
|
||||
|
||||
print("\n🔎 Identifying patterns...")
|
||||
self._identify_patterns()
|
||||
|
||||
print("\n📍 Finding misplaced files...")
|
||||
self._find_misplaced()
|
||||
|
||||
print("\n🔗 Checking for duplicates...")
|
||||
self._check_duplicates()
|
||||
|
||||
print("\n📝 Analyzing content...")
|
||||
self._analyze_content()
|
||||
|
||||
return self._generate_report()
|
||||
|
||||
def _scan_files(self):
|
||||
"""Scan for all markdown files"""
|
||||
exclude_dirs = {'.git', 'node_modules', '__pycache__', '.next', 'dist', 'build', 'venv', '.venv'}
|
||||
|
||||
for md_file in self.root_dir.rglob('*.md'):
|
||||
# Skip excluded directories
|
||||
if any(part in exclude_dirs for part in md_file.parts):
|
||||
continue
|
||||
|
||||
try:
|
||||
stat = md_file.stat()
|
||||
rel_path = md_file.relative_to(self.root_dir)
|
||||
|
||||
self.files.append({
|
||||
'path': str(rel_path),
|
||||
'full_path': str(md_file),
|
||||
'size': stat.st_size,
|
||||
'modified': datetime.fromtimestamp(stat.st_mtime),
|
||||
'accessed': datetime.fromtimestamp(stat.st_atime),
|
||||
'created': datetime.fromtimestamp(stat.st_ctime) if hasattr(stat, 'st_birthtime') else None,
|
||||
'directory': str(rel_path.parent),
|
||||
'name': md_file.name
|
||||
})
|
||||
except (OSError, PermissionError) as e:
|
||||
print(f"⚠️ Error accessing {md_file}: {e}")
|
||||
|
||||
def _analyze_dates(self):
|
||||
"""Analyze file modification dates"""
|
||||
now = datetime.now()
|
||||
for file_info in self.files:
|
||||
modified = file_info['modified']
|
||||
days_old = (now - modified).days
|
||||
|
||||
file_info['days_old'] = days_old
|
||||
file_info['age_category'] = (
|
||||
'recent' if days_old < 7 else
|
||||
'recent' if days_old < 30 else
|
||||
'moderate' if days_old < 90 else
|
||||
'old' if days_old < 365 else
|
||||
'very_old'
|
||||
)
|
||||
|
||||
def _identify_patterns(self):
|
||||
"""Identify files by naming patterns"""
|
||||
patterns = {
|
||||
'complete': re.compile(r'COMPLETE', re.I),
|
||||
'final': re.compile(r'FINAL', re.I),
|
||||
'status': re.compile(r'STATUS', re.I),
|
||||
'timestamped': re.compile(r'_\d{8}_\d{6}|\d{8}_\d{6}'),
|
||||
'fix': re.compile(r'FIX|QUICK_FIX|RUN_NOW|EXECUTE', re.I),
|
||||
'report': re.compile(r'REPORT|SUMMARY|ANALYSIS|DIAGNOSTIC', re.I),
|
||||
'temporary': re.compile(r'NOW|READY|EXECUTE|RUN_', re.I)
|
||||
}
|
||||
|
||||
for file_info in self.files:
|
||||
name = file_info['name']
|
||||
for pattern_name, pattern in patterns.items():
|
||||
if pattern.search(name):
|
||||
self.patterns[pattern_name].append(file_info)
|
||||
file_info[f'has_{pattern_name}'] = True
|
||||
|
||||
def _find_misplaced(self):
|
||||
"""Find files in wrong locations"""
|
||||
root_files = [f for f in self.files if f['directory'] == '.']
|
||||
docs_files = [f for f in self.files if f['directory'].startswith('docs')]
|
||||
reports_files = [f for f in self.files if f['directory'].startswith('reports')]
|
||||
|
||||
# Reports in root
|
||||
for f in root_files:
|
||||
if any(keyword in f['name'].upper() for keyword in ['REPORT', 'STATUS', 'INVENTORY', 'DIAGNOSTIC', 'ANALYSIS']):
|
||||
if not f['name'] in ['README.md', 'PROJECT_STRUCTURE.md']:
|
||||
self.misplaced.append({
|
||||
'file': f,
|
||||
'current': 'root',
|
||||
'should_be': 'reports/',
|
||||
'reason': 'Report file in root directory'
|
||||
})
|
||||
|
||||
# Status/completion files in docs
|
||||
for f in docs_files:
|
||||
if any(keyword in f['name'].upper() for keyword in ['COMPLETE', 'FINAL', 'STATUS', 'MIGRATION_COMPLETE']):
|
||||
self.misplaced.append({
|
||||
'file': f,
|
||||
'current': f['directory'],
|
||||
'should_be': 'reports/',
|
||||
'reason': 'Status/completion report in docs directory'
|
||||
})
|
||||
|
||||
# Temporary fix guides in project root
|
||||
for f in root_files:
|
||||
if any(keyword in f['name'].upper() for keyword in ['FIX_', 'QUICK_FIX', 'RUN_NOW', 'EXECUTE']):
|
||||
self.misplaced.append({
|
||||
'file': f,
|
||||
'current': 'root',
|
||||
'should_be': 'docs/09-troubleshooting/archive/',
|
||||
'reason': 'Temporary fix guide in root'
|
||||
})
|
||||
|
||||
def _check_duplicates(self):
|
||||
"""Check for duplicate content"""
|
||||
for file_info in self.files:
|
||||
try:
|
||||
with open(file_info['full_path'], 'rb') as f:
|
||||
content_hash = hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
if content_hash in self.content_hashes:
|
||||
self.duplicates[content_hash].append(file_info)
|
||||
else:
|
||||
self.content_hashes[content_hash] = [file_info]
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def _analyze_content(self):
|
||||
"""Analyze file content for issues"""
|
||||
for file_info in self.files:
|
||||
try:
|
||||
with open(file_info['full_path'], 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
lines = content.split('\n')
|
||||
|
||||
file_info['line_count'] = len(lines)
|
||||
file_info['has_todo'] = 'TODO' in content or 'FIXME' in content
|
||||
file_info['has_deprecated'] = 'DEPRECATED' in content or 'OBSOLETE' in content
|
||||
file_info['has_date'] = bool(re.search(r'\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}', content))
|
||||
|
||||
# Check for placeholder dates
|
||||
if re.search(r'\$\(date\)|date \+', content):
|
||||
file_info['has_placeholder_date'] = True
|
||||
except Exception as e:
|
||||
file_info['line_count'] = 0
|
||||
file_info['error'] = str(e)
|
||||
|
||||
def _generate_report(self) -> Dict:
|
||||
"""Generate comprehensive report"""
|
||||
report = {
|
||||
'summary': {
|
||||
'total_files': len(self.files),
|
||||
'total_size_mb': sum(f['size'] for f in self.files) / (1024 * 1024),
|
||||
'by_age': defaultdict(int),
|
||||
'by_directory': defaultdict(int)
|
||||
},
|
||||
'patterns': {},
|
||||
'misplaced': [],
|
||||
'duplicates': [],
|
||||
'old_files': [],
|
||||
'empty_files': [],
|
||||
'issues': []
|
||||
}
|
||||
|
||||
# Summary stats
|
||||
for f in self.files:
|
||||
report['summary']['by_age'][f['age_category']] += 1
|
||||
report['summary']['by_directory'][f['directory']] += 1
|
||||
|
||||
# Pattern counts
|
||||
for pattern_name, files in self.patterns.items():
|
||||
report['patterns'][pattern_name] = {
|
||||
'count': len(files),
|
||||
'files': [f['path'] for f in files[:20]] # Limit to 20
|
||||
}
|
||||
|
||||
# Misplaced files
|
||||
report['misplaced'] = [
|
||||
{
|
||||
'path': m['file']['path'],
|
||||
'current': m['current'],
|
||||
'should_be': m['should_be'],
|
||||
'reason': m['reason']
|
||||
}
|
||||
for m in self.misplaced
|
||||
]
|
||||
|
||||
# Duplicate content
|
||||
for hash_val, files in self.duplicates.items():
|
||||
if len(files) > 1:
|
||||
report['duplicates'].append({
|
||||
'hash': hash_val[:8],
|
||||
'count': len(files),
|
||||
'files': [f['path'] for f in files]
|
||||
})
|
||||
|
||||
# Old files (>90 days)
|
||||
report['old_files'] = [
|
||||
{
|
||||
'path': f['path'],
|
||||
'days_old': f['days_old'],
|
||||
'modified': f['modified'].isoformat()
|
||||
}
|
||||
for f in self.files if f['days_old'] > 90
|
||||
]
|
||||
|
||||
# Empty or very small files
|
||||
report['empty_files'] = [
|
||||
{
|
||||
'path': f['path'],
|
||||
'size': f['size'],
|
||||
'line_count': f.get('line_count', 0)
|
||||
}
|
||||
for f in self.files if f['size'] < 100 or f.get('line_count', 0) < 5
|
||||
]
|
||||
|
||||
# Issues
|
||||
for f in self.files:
|
||||
issues = []
|
||||
if f.get('has_placeholder_date'):
|
||||
issues.append('Contains placeholder date')
|
||||
if f.get('has_deprecated'):
|
||||
issues.append('Marks itself as deprecated')
|
||||
if f['days_old'] > 365:
|
||||
issues.append('Very old (>1 year)')
|
||||
if f['size'] < 50:
|
||||
issues.append('Very small file')
|
||||
|
||||
if issues:
|
||||
report['issues'].append({
|
||||
'path': f['path'],
|
||||
'issues': issues
|
||||
})
|
||||
|
||||
return report
|
||||
|
||||
def main():
|
||||
root_dir = Path(__file__).parent.parent
|
||||
analyzer = MarkdownAnalyzer(root_dir)
|
||||
report = analyzer.analyze()
|
||||
|
||||
# Save JSON report
|
||||
json_file = root_dir / 'MARKDOWN_ANALYSIS.json'
|
||||
with open(json_file, 'w') as f:
|
||||
json.dump(report, f, indent=2, default=str)
|
||||
print(f"\n✅ JSON report saved to: {json_file}")
|
||||
|
||||
# Generate markdown report
|
||||
md_file = root_dir / 'MARKDOWN_ANALYSIS_REPORT.md'
|
||||
with open(md_file, 'w') as f:
|
||||
f.write(generate_markdown_report(report))
|
||||
print(f"✅ Markdown report saved to: {md_file}")
|
||||
|
||||
return report
|
||||
|
||||
def generate_markdown_report(report: Dict) -> str:
|
||||
"""Generate human-readable markdown report"""
|
||||
md = []
|
||||
md.append("# Markdown Files Analysis Report\n")
|
||||
md.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
|
||||
# Summary
|
||||
md.append("## Summary\n")
|
||||
md.append(f"- **Total Files**: {report['summary']['total_files']}")
|
||||
md.append(f"- **Total Size**: {report['summary']['total_size_mb']:.2f} MB\n")
|
||||
|
||||
md.append("### Files by Age\n")
|
||||
for age, count in sorted(report['summary']['by_age'].items()):
|
||||
md.append(f"- **{age.title()}**: {count}")
|
||||
md.append("")
|
||||
|
||||
# Patterns
|
||||
md.append("## File Patterns\n")
|
||||
for pattern_name, data in report['patterns'].items():
|
||||
md.append(f"### {pattern_name.title()} ({data['count']} files)\n")
|
||||
for file_path in data['files'][:10]:
|
||||
md.append(f"- `{file_path}`")
|
||||
if data['count'] > 10:
|
||||
md.append(f"- ... and {data['count'] - 10} more")
|
||||
md.append("")
|
||||
|
||||
# Misplaced files
|
||||
md.append("## Misplaced Files\n")
|
||||
md.append(f"Found **{len(report['misplaced'])}** misplaced files:\n")
|
||||
for m in report['misplaced'][:50]:
|
||||
md.append(f"- **{m['path']}**")
|
||||
md.append(f" - Current: `{m['current']}`")
|
||||
md.append(f" - Should be: `{m['should_be']}`")
|
||||
md.append(f" - Reason: {m['reason']}\n")
|
||||
|
||||
# Duplicates
|
||||
md.append("## Duplicate Content\n")
|
||||
md.append(f"Found **{len(report['duplicates'])}** sets of duplicate files:\n")
|
||||
for dup in report['duplicates'][:20]:
|
||||
md.append(f"- **{dup['count']} files** with same content:")
|
||||
for file_path in dup['files']:
|
||||
md.append(f" - `{file_path}`")
|
||||
md.append("")
|
||||
|
||||
# Old files
|
||||
md.append("## Old Files (>90 days)\n")
|
||||
md.append(f"Found **{len(report['old_files'])}** old files:\n")
|
||||
for f in sorted(report['old_files'], key=lambda x: x['days_old'], reverse=True)[:50]:
|
||||
md.append(f"- **{f['path']}** ({f['days_old']} days old, modified: {f['modified'][:10]})")
|
||||
md.append("")
|
||||
|
||||
# Issues
|
||||
md.append("## Files with Issues\n")
|
||||
md.append(f"Found **{len(report['issues'])}** files with issues:\n")
|
||||
for issue in report['issues'][:50]:
|
||||
md.append(f"- **{issue['path']}**")
|
||||
for i in issue['issues']:
|
||||
md.append(f" - {i}")
|
||||
md.append("")
|
||||
|
||||
return "\n".join(md)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
308
scripts/check-content-inconsistencies.py
Executable file
308
scripts/check-content-inconsistencies.py
Executable file
@@ -0,0 +1,308 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Content Inconsistency Checker
|
||||
Compares related markdown files for inconsistencies in:
|
||||
- Dates
|
||||
- Status information
|
||||
- Configuration values
|
||||
- References to other files
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Set, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
class ContentInconsistencyChecker:
|
||||
def __init__(self, root_dir: str):
|
||||
self.root_dir = Path(root_dir)
|
||||
self.inconsistencies = []
|
||||
self.file_contents = {}
|
||||
|
||||
def check(self):
|
||||
"""Run all consistency checks"""
|
||||
print("🔍 Checking content inconsistencies...")
|
||||
|
||||
# Load file contents
|
||||
self._load_files()
|
||||
|
||||
# Check for inconsistencies
|
||||
print("\n📅 Checking date inconsistencies...")
|
||||
self._check_dates()
|
||||
|
||||
print("\n📊 Checking status inconsistencies...")
|
||||
self._check_status()
|
||||
|
||||
print("\n🔗 Checking cross-references...")
|
||||
self._check_references()
|
||||
|
||||
print("\n⚙️ Checking configuration values...")
|
||||
self._check_config_values()
|
||||
|
||||
print("\n📝 Checking duplicate content...")
|
||||
self._check_duplicate_content()
|
||||
|
||||
return self._generate_report()
|
||||
|
||||
def _load_files(self):
|
||||
"""Load markdown file contents"""
|
||||
exclude_dirs = {'.git', 'node_modules', '__pycache__', '.next', 'dist', 'build', 'venv'}
|
||||
|
||||
for md_file in self.root_dir.rglob('*.md'):
|
||||
if any(part in exclude_dirs for part in md_file.parts):
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
rel_path = str(md_file.relative_to(self.root_dir))
|
||||
self.file_contents[rel_path] = {
|
||||
'content': content,
|
||||
'path': rel_path,
|
||||
'lines': content.split('\n')
|
||||
}
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def _check_dates(self):
|
||||
"""Check for inconsistent dates"""
|
||||
date_patterns = [
|
||||
r'(\d{4}-\d{2}-\d{2})', # YYYY-MM-DD
|
||||
r'(\d{1,2}/\d{1,2}/\d{4})', # MM/DD/YYYY
|
||||
r'Date[:\s]+(\d{4}-\d{2}-\d{2})',
|
||||
r'Generated[:\s]+(\d{4}-\d{2}-\d{2})',
|
||||
r'Last Updated[:\s]+(\d{4}-\d{2}-\d{2})',
|
||||
]
|
||||
|
||||
# Group files by project/component
|
||||
project_files = defaultdict(list)
|
||||
for path in self.file_contents:
|
||||
if 'rpc-translator-138' in path:
|
||||
project_files['rpc-translator-138'].append(path)
|
||||
elif path.startswith('docs/'):
|
||||
project_files['docs'].append(path)
|
||||
elif path.startswith('reports/'):
|
||||
project_files['reports'].append(path)
|
||||
elif '/' not in path or path.count('/') == 0:
|
||||
project_files['root'].append(path)
|
||||
|
||||
# Check dates within each project
|
||||
for project, files in project_files.items():
|
||||
dates_found = []
|
||||
for file_path in files:
|
||||
content = self.file_contents[file_path]['content']
|
||||
for pattern in date_patterns:
|
||||
matches = re.findall(pattern, content)
|
||||
for match in matches:
|
||||
dates_found.append((file_path, match))
|
||||
|
||||
# Check for very old dates (>1 year)
|
||||
now = datetime.now()
|
||||
for file_path, date_str in dates_found:
|
||||
try:
|
||||
if '-' in date_str:
|
||||
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
elif '/' in date_str:
|
||||
parts = date_str.split('/')
|
||||
if len(parts) == 3:
|
||||
date_obj = datetime.strptime(date_str, '%m/%d/%Y')
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
days_diff = (now - date_obj).days
|
||||
if days_diff > 365:
|
||||
self.inconsistencies.append({
|
||||
'type': 'old_date',
|
||||
'file': file_path,
|
||||
'issue': f'Date {date_str} is {days_diff} days old',
|
||||
'severity': 'medium'
|
||||
})
|
||||
except:
|
||||
pass
|
||||
|
||||
def _check_status(self):
|
||||
"""Check for inconsistent status information"""
|
||||
status_patterns = [
|
||||
r'Status[:\s]+([✅❌🔄⚠️]+|COMPLETE|INCOMPLETE|PENDING|ACTIVE|INACTIVE)',
|
||||
r'\*\*Status\*\*[:\s]+([✅❌🔄⚠️]+|COMPLETE|INCOMPLETE|PENDING)',
|
||||
]
|
||||
|
||||
# Group related status files
|
||||
status_groups = defaultdict(list)
|
||||
for path in self.file_contents:
|
||||
filename = Path(path).name
|
||||
if 'COMPLETE' in filename or 'STATUS' in filename or 'FINAL' in filename:
|
||||
# Extract base name
|
||||
base = re.sub(r'_(COMPLETE|FINAL|STATUS).*', '', filename)
|
||||
base = re.sub(r'COMPLETE|FINAL|STATUS', '', base)
|
||||
status_groups[base].append(path)
|
||||
|
||||
# Check for conflicting statuses
|
||||
for base, files in status_groups.items():
|
||||
if len(files) > 1:
|
||||
statuses = []
|
||||
for file_path in files:
|
||||
content = self.file_contents[file_path]['content']
|
||||
for pattern in status_patterns:
|
||||
matches = re.findall(pattern, content, re.IGNORECASE)
|
||||
statuses.extend([(file_path, m) for m in matches])
|
||||
|
||||
if len(set(s[1] for s in statuses)) > 1:
|
||||
self.inconsistencies.append({
|
||||
'type': 'conflicting_status',
|
||||
'files': files,
|
||||
'issue': f'Multiple status files for {base} with different statuses',
|
||||
'severity': 'high'
|
||||
})
|
||||
|
||||
def _check_references(self):
|
||||
"""Check for broken or inconsistent cross-references"""
|
||||
reference_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
||||
|
||||
for path, data in self.file_contents.items():
|
||||
content = data['content']
|
||||
matches = re.findall(reference_pattern, content)
|
||||
|
||||
for link_text, link_path in matches:
|
||||
# Skip external links
|
||||
if link_path.startswith('http'):
|
||||
continue
|
||||
|
||||
# Check if referenced file exists
|
||||
if '#' in link_path:
|
||||
file_path, anchor = link_path.split('#', 1)
|
||||
else:
|
||||
file_path = link_path
|
||||
anchor = None
|
||||
|
||||
# Resolve relative paths
|
||||
if not file_path.startswith('/'):
|
||||
current_dir = Path(path).parent
|
||||
resolved = (current_dir / file_path).resolve()
|
||||
try:
|
||||
relative_resolved = resolved.relative_to(self.root_dir)
|
||||
except ValueError:
|
||||
# Path is outside project root, skip
|
||||
continue
|
||||
else:
|
||||
relative_resolved = Path(file_path.lstrip('/'))
|
||||
|
||||
# Check if file exists
|
||||
full_path = self.root_dir / relative_resolved
|
||||
if not full_path.exists():
|
||||
self.inconsistencies.append({
|
||||
'type': 'broken_reference',
|
||||
'file': path,
|
||||
'issue': f'Broken link to {link_path}',
|
||||
'severity': 'medium'
|
||||
})
|
||||
|
||||
def _check_config_values(self):
|
||||
"""Check for inconsistent configuration values"""
|
||||
# Look for IP addresses, VMIDs, ports
|
||||
ip_pattern = r'192\.168\.11\.(\d+)'
|
||||
vmid_pattern = r'VMID[:\s]+(\d+)'
|
||||
|
||||
configs_by_component = defaultdict(lambda: defaultdict(set))
|
||||
|
||||
for path, data in self.file_contents.items():
|
||||
content = data['content']
|
||||
|
||||
# Extract IPs
|
||||
ips = re.findall(ip_pattern, content)
|
||||
for ip in ips:
|
||||
component = self._identify_component(path)
|
||||
configs_by_component[component]['ips'].add(f'192.168.11.{ip}')
|
||||
|
||||
# Extract VMIDs
|
||||
vmids = re.findall(vmid_pattern, content, re.IGNORECASE)
|
||||
for vmid in vmids:
|
||||
component = self._identify_component(path)
|
||||
configs_by_component[component]['vmids'].add(vmid)
|
||||
|
||||
# Check for inconsistencies (same component, different values)
|
||||
for component, configs in configs_by_component.items():
|
||||
if len(configs['ips']) > 10: # Too many IPs might indicate inconsistency
|
||||
self.inconsistencies.append({
|
||||
'type': 'too_many_ips',
|
||||
'component': component,
|
||||
'issue': f'Component {component} references {len(configs["ips"])} different IPs',
|
||||
'severity': 'low'
|
||||
})
|
||||
|
||||
def _check_duplicate_content(self):
|
||||
"""Check for duplicate or near-duplicate content"""
|
||||
# Simple check: files with very similar first 10 lines
|
||||
file_signatures = {}
|
||||
|
||||
for path, data in self.file_contents.items():
|
||||
first_lines = '\n'.join(data['lines'][:10])
|
||||
signature = hash(first_lines)
|
||||
|
||||
if signature in file_signatures:
|
||||
self.inconsistencies.append({
|
||||
'type': 'duplicate_intro',
|
||||
'files': [file_signatures[signature], path],
|
||||
'issue': 'Files have identical first 10 lines',
|
||||
'severity': 'low'
|
||||
})
|
||||
else:
|
||||
file_signatures[signature] = path
|
||||
|
||||
def _identify_component(self, path: str) -> str:
|
||||
"""Identify component from file path"""
|
||||
if 'rpc-translator' in path:
|
||||
return 'rpc-translator-138'
|
||||
elif 'besu' in path.lower():
|
||||
return 'besu'
|
||||
elif 'dbis' in path.lower():
|
||||
return 'dbis'
|
||||
elif 'firefly' in path.lower():
|
||||
return 'firefly'
|
||||
else:
|
||||
return 'other'
|
||||
|
||||
def _generate_report(self) -> Dict:
|
||||
"""Generate inconsistency report"""
|
||||
report = {
|
||||
'summary': {
|
||||
'total_inconsistencies': len(self.inconsistencies),
|
||||
'by_type': defaultdict(int),
|
||||
'by_severity': defaultdict(int)
|
||||
},
|
||||
'inconsistencies': []
|
||||
}
|
||||
|
||||
for inc in self.inconsistencies:
|
||||
report['summary']['by_type'][inc['type']] += 1
|
||||
report['summary']['by_severity'][inc['severity']] += 1
|
||||
report['inconsistencies'].append(inc)
|
||||
|
||||
return report
|
||||
|
||||
def main():
|
||||
root_dir = Path(__file__).parent.parent
|
||||
checker = ContentInconsistencyChecker(root_dir)
|
||||
report = checker.check()
|
||||
|
||||
# Save report
|
||||
json_file = root_dir / 'CONTENT_INCONSISTENCIES.json'
|
||||
with open(json_file, 'w') as f:
|
||||
json.dump(report, f, indent=2, default=str)
|
||||
print(f"\n✅ Report saved to: {json_file}")
|
||||
|
||||
# Print summary
|
||||
print("\n📊 Summary:")
|
||||
print(f" Total inconsistencies: {report['summary']['total_inconsistencies']}")
|
||||
print(f" By type: {dict(report['summary']['by_type'])}")
|
||||
print(f" By severity: {dict(report['summary']['by_severity'])}")
|
||||
|
||||
return report
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
319
scripts/cleanup-markdown-files.sh
Executable file
319
scripts/cleanup-markdown-files.sh
Executable file
@@ -0,0 +1,319 @@
|
||||
#!/bin/bash
|
||||
# Markdown Files Cleanup Script
|
||||
# Automatically organizes markdown files based on analysis
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Dry-run mode (set to false to actually move files)
|
||||
DRY_RUN=${DRY_RUN:-true}
|
||||
|
||||
# Log file
|
||||
LOG_FILE="$PROJECT_ROOT/MARKDOWN_CLEANUP_LOG_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
log() {
|
||||
echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
success() {
|
||||
echo -e "${GREEN}[OK]${NC} $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
move_file() {
|
||||
local src="$1"
|
||||
local dest="$2"
|
||||
local reason="$3"
|
||||
|
||||
if [ ! -f "$src" ]; then
|
||||
warn "File not found: $src"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Create destination directory if needed
|
||||
local dest_dir=$(dirname "$dest")
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log "Would move: $src -> $dest"
|
||||
log " Reason: $reason"
|
||||
log " Would create directory: $dest_dir"
|
||||
else
|
||||
mkdir -p "$dest_dir"
|
||||
if mv "$src" "$dest" 2>/dev/null; then
|
||||
success "Moved: $src -> $dest"
|
||||
echo " Reason: $reason" >> "$LOG_FILE"
|
||||
else
|
||||
error "Failed to move: $src -> $dest"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Create necessary directories
|
||||
create_directories() {
|
||||
log "Creating directory structure..."
|
||||
local dirs=(
|
||||
"reports/archive/2026-01-05"
|
||||
"reports/status"
|
||||
"reports/inventories"
|
||||
"reports/analyses"
|
||||
"docs/09-troubleshooting/archive"
|
||||
"rpc-translator-138/docs/archive"
|
||||
)
|
||||
|
||||
for dir in "${dirs[@]}"; do
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log "Would create: $dir"
|
||||
else
|
||||
mkdir -p "$dir"
|
||||
success "Created: $dir"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Move timestamped inventory files
|
||||
move_timestamped_inventories() {
|
||||
log "Moving timestamped inventory files..."
|
||||
local files=(
|
||||
"CONTAINER_INVENTORY_20260105_142214.md"
|
||||
"CONTAINER_INVENTORY_20260105_142314.md"
|
||||
"CONTAINER_INVENTORY_20260105_142357.md"
|
||||
"CONTAINER_INVENTORY_20260105_142455.md"
|
||||
"CONTAINER_INVENTORY_20260105_142712.md"
|
||||
"CONTAINER_INVENTORY_20260105_142753.md"
|
||||
"CONTAINER_INVENTORY_20260105_142842.md"
|
||||
"CONTAINER_INVENTORY_20260105_144309.md"
|
||||
"CONTAINER_INVENTORY_20260105_153516.md"
|
||||
"CONTAINER_INVENTORY_20260105_154200.md"
|
||||
"SERVICE_DEPENDENCIES_20260105_143608.md"
|
||||
"SERVICE_DEPENDENCIES_20260105_143624.md"
|
||||
"IP_AVAILABILITY_20260105_143535.md"
|
||||
"DHCP_CONTAINERS_20260105_143507.md"
|
||||
)
|
||||
|
||||
for file in "${files[@]}"; do
|
||||
if [ -f "$file" ]; then
|
||||
move_file "$file" "reports/archive/2026-01-05/$file" "Timestamped inventory/report file"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Move status/completion reports from root to reports
|
||||
move_status_reports() {
|
||||
log "Moving status/completion reports from root..."
|
||||
|
||||
# Pattern matching for status reports
|
||||
find . -maxdepth 1 -name "*.md" -type f | while read -r file; do
|
||||
filename=$(basename "$file")
|
||||
|
||||
# Skip essential files
|
||||
if [[ "$filename" == "README.md" ]] || [[ "$filename" == "PROJECT_STRUCTURE.md" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check if it's a status/report file
|
||||
if [[ "$filename" =~ (STATUS|COMPLETE|FINAL|REPORT|SUMMARY|ANALYSIS|DIAGNOSTIC|INVENTORY) ]]; then
|
||||
move_file "$file" "reports/status/$filename" "Status/completion report in root"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Move VMID-specific reports
|
||||
move_vmid_reports() {
|
||||
log "Moving VMID-specific reports..."
|
||||
|
||||
find . -maxdepth 1 -name "VMID*.md" -type f | while read -r file; do
|
||||
filename=$(basename "$file")
|
||||
move_file "$file" "reports/$filename" "VMID-specific report"
|
||||
done
|
||||
}
|
||||
|
||||
# Move IP conflict and network analysis reports
|
||||
move_network_reports() {
|
||||
log "Moving network analysis reports..."
|
||||
|
||||
local files=(
|
||||
"IP_CONFLICT_ANALYSIS.md"
|
||||
"IP_CONFLICT_192.168.11.14_RESOLUTION.md"
|
||||
"IP_CONFLICTS_RESOLUTION_COMPLETE.md"
|
||||
"VMID_IP_CONFLICTS_ANALYSIS.md"
|
||||
"VMID_IP_ADDRESS_LIST.md"
|
||||
"FINAL_VMID_IP_MAPPING.md"
|
||||
"IP_ASSIGNMENT_PLAN.md"
|
||||
"PHASE1_IP_CONFLICT_RESOLUTION.md"
|
||||
"PHASE1_IP_INVESTIGATION_COMPLETE.md"
|
||||
"PHASE1_IP_INVESTIGATION_STATUS.md"
|
||||
"R630-04_IP_CONFLICT_DISCOVERY.md"
|
||||
"RESERVED_IP_CONFLICTS_ANALYSIS.md"
|
||||
"RESERVED_IP_FIX_COMPLETE.md"
|
||||
"RESERVED_IP_FIX_COMPLETE_FINAL.md"
|
||||
"RESERVED_IP_FIX_SUMMARY.md"
|
||||
"DHCP_CONTAINERS_LIST.md"
|
||||
"DHCP_TO_STATIC_CONVERSION_COMPLETE.md"
|
||||
"DHCP_TO_STATIC_CONVERSION_FINAL_REPORT.md"
|
||||
)
|
||||
|
||||
for file in "${files[@]}"; do
|
||||
if [ -f "$file" ]; then
|
||||
move_file "$file" "reports/analyses/$file" "Network/IP analysis report"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Move service status reports
|
||||
move_service_reports() {
|
||||
log "Moving service status reports..."
|
||||
|
||||
local files=(
|
||||
"BLOCK_PRODUCTION_REVIEW.md"
|
||||
"BLOCK_PRODUCTION_STATUS.md"
|
||||
"SERVICE_VERIFICATION_REPORT.md"
|
||||
"RPC_ENDPOINT_DIAGNOSTICS_REPORT.md"
|
||||
"RPC_SSL_ISSUE_SUMMARY.md"
|
||||
"RPC_TRANSACTION_FAILURE_INVESTIGATION.md"
|
||||
"RPC_TRANSACTION_FAILURE_ROOT_CAUSE.md"
|
||||
"BESU_*.md"
|
||||
"FIREFLY_*.md"
|
||||
"DBIS_*.md"
|
||||
"EXPLORER_*.md"
|
||||
"BLOCKSCOUT_*.md"
|
||||
)
|
||||
|
||||
# Handle specific files
|
||||
for pattern in "${files[@]}"; do
|
||||
find . -maxdepth 1 -name "$pattern" -type f | while read -r file; do
|
||||
filename=$(basename "$file")
|
||||
# Skip if it's a script or config file
|
||||
if [[ ! "$filename" =~ (\.sh|\.py|\.js|\.json)$ ]]; then
|
||||
move_file "$file" "reports/status/$filename" "Service status report"
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
# Move temporary fix guides from rpc-translator-138
|
||||
move_rpc_translator_temp_files() {
|
||||
log "Moving temporary files from rpc-translator-138..."
|
||||
|
||||
if [ ! -d "rpc-translator-138" ]; then
|
||||
warn "rpc-translator-138 directory not found"
|
||||
return
|
||||
fi
|
||||
|
||||
local temp_patterns=(
|
||||
"FIX_*.md"
|
||||
"QUICK_FIX*.md"
|
||||
"RUN_NOW.md"
|
||||
"EXECUTE_NOW.md"
|
||||
"EXECUTION_READY.md"
|
||||
"LOAD_KEYS_NOW.md"
|
||||
"FIX_PERMISSIONS*.md"
|
||||
"*COMPLETE*.md"
|
||||
"*FINAL*.md"
|
||||
"*STATUS*.md"
|
||||
)
|
||||
|
||||
for pattern in "${temp_patterns[@]}"; do
|
||||
find rpc-translator-138 -maxdepth 1 -name "$pattern" -type f | while read -r file; do
|
||||
filename=$(basename "$file")
|
||||
# Skip README and important docs
|
||||
if [[ "$filename" != "README.md" ]] && [[ ! "$filename" =~ ^(DEPLOYMENT|API_METHODS|QUICK_REFERENCE|QUICK_START|QUICK_SETUP) ]]; then
|
||||
move_file "$file" "rpc-translator-138/docs/archive/$filename" "Temporary fix/status file"
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
# Move completion/migration status files from docs
|
||||
move_docs_status_files() {
|
||||
log "Moving status files from docs directory..."
|
||||
|
||||
if [ ! -d "docs" ]; then
|
||||
warn "docs directory not found"
|
||||
return
|
||||
fi
|
||||
|
||||
find docs -maxdepth 1 -name "*COMPLETE*.md" -o -name "*FINAL*.md" -o -name "*MIGRATION*.md" | while read -r file; do
|
||||
filename=$(basename "$file")
|
||||
# Skip if it's actual documentation
|
||||
if [[ ! "$filename" =~ ^(DOCUMENTATION|CONTRIBUTOR|STYLE|GUIDE|README) ]]; then
|
||||
move_file "$file" "reports/$filename" "Status file in docs directory"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Consolidate duplicate status files (keep most recent)
|
||||
consolidate_duplicates() {
|
||||
log "Identifying duplicate status files..."
|
||||
|
||||
# This is a placeholder - actual consolidation requires content comparison
|
||||
# For now, we'll just log potential duplicates
|
||||
local status_files=(
|
||||
"ALL_TASKS_COMPLETE.md"
|
||||
"ALL_TASKS_COMPLETE_FINAL.md"
|
||||
"ALL_STEPS_COMPLETE.md"
|
||||
"ALL_NEXT_STEPS_COMPLETE.md"
|
||||
)
|
||||
|
||||
for file in "${status_files[@]}"; do
|
||||
if [ -f "$file" ]; then
|
||||
warn "Potential duplicate: $file (consider consolidating)"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
log "========================================="
|
||||
log "Markdown Files Cleanup Script"
|
||||
log "========================================="
|
||||
log "Project Root: $PROJECT_ROOT"
|
||||
log "Dry Run: $DRY_RUN"
|
||||
log "Log File: $LOG_FILE"
|
||||
log ""
|
||||
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
warn "DRY RUN MODE - No files will be moved"
|
||||
warn "Set DRY_RUN=false to actually move files"
|
||||
log ""
|
||||
fi
|
||||
|
||||
create_directories
|
||||
move_timestamped_inventories
|
||||
move_status_reports
|
||||
move_vmid_reports
|
||||
move_network_reports
|
||||
move_service_reports
|
||||
move_rpc_translator_temp_files
|
||||
move_docs_status_files
|
||||
consolidate_duplicates
|
||||
|
||||
log ""
|
||||
log "========================================="
|
||||
log "Cleanup complete!"
|
||||
log "========================================="
|
||||
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log ""
|
||||
log "Review the log above, then run with:"
|
||||
log " DRY_RUN=false $0"
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user