Enhance CROSS_REFERENCE_VERIFICATION_REPORT.md with comprehensive link verification results, including a summary of scanned links, valid links, broken links, and invalid anchors. Update GLOSSARY.md to include pronunciations, usage examples, and related terms for key concepts. Revise IMPLEMENTATION_STATUS.md to reflect updated task completion metrics and enhance tracking of implementation phases. Standardize reference links across various operational examples for improved navigation and consistency.
This commit is contained in:
262
scripts/verify_links.py
Executable file
262
scripts/verify_links.py
Executable file
@@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DBIS Cross-Reference Verification Script
|
||||
Automated link verification and cross-reference checking
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
# Colors for terminal output
|
||||
class Colors:
|
||||
RED = '\033[0;31m'
|
||||
GREEN = '\033[0;32m'
|
||||
YELLOW = '\033[1;33m'
|
||||
BLUE = '\033[0;34m'
|
||||
NC = '\033[0m' # No Color
|
||||
|
||||
def find_markdown_files(root_dir):
|
||||
"""Find all markdown files in the project."""
|
||||
md_files = []
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
# Skip certain directories
|
||||
dirs[:] = [d for d in dirs if d not in ['.git', 'node_modules', '__pycache__']]
|
||||
for file in files:
|
||||
if file.endswith('.md'):
|
||||
md_files.append(os.path.join(root, file))
|
||||
return md_files
|
||||
|
||||
def extract_links(content, file_path):
|
||||
"""Extract all markdown links from content."""
|
||||
links = []
|
||||
# Pattern: [text](path) or [text](path#anchor)
|
||||
pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
||||
|
||||
for match in re.finditer(pattern, content):
|
||||
link_text = match.group(1)
|
||||
link_path = match.group(2)
|
||||
links.append({
|
||||
'text': link_text,
|
||||
'path': link_path,
|
||||
'line': content[:match.start()].count('\n') + 1,
|
||||
'source_file': file_path
|
||||
})
|
||||
|
||||
return links
|
||||
|
||||
def resolve_path(link_path, source_file):
|
||||
"""Resolve relative path to absolute path."""
|
||||
source_dir = os.path.dirname(source_file)
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# Skip external links
|
||||
if link_path.startswith(('http://', 'https://', 'mailto:')):
|
||||
return None, 'external'
|
||||
|
||||
# Handle anchor-only links
|
||||
if link_path.startswith('#'):
|
||||
return source_file, 'anchor'
|
||||
|
||||
# Split path and anchor
|
||||
if '#' in link_path:
|
||||
file_part, anchor = link_path.split('#', 1)
|
||||
else:
|
||||
file_part = link_path
|
||||
anchor = None
|
||||
|
||||
# Resolve file path
|
||||
if file_part.startswith('/'):
|
||||
# Absolute path from project root
|
||||
full_path = os.path.join(project_root, file_part.lstrip('/'))
|
||||
elif file_part.startswith('../'):
|
||||
# Relative path going up - resolve relative to source directory
|
||||
full_path = os.path.normpath(os.path.join(source_dir, file_part))
|
||||
# Ensure it's still within project root
|
||||
if not os.path.commonpath([project_root, full_path]) == project_root:
|
||||
# Path went outside project root, try from project root
|
||||
# Remove ../ and resolve from project root
|
||||
rel_path = file_part
|
||||
while rel_path.startswith('../'):
|
||||
rel_path = rel_path[3:]
|
||||
full_path = os.path.join(project_root, rel_path)
|
||||
else:
|
||||
# Relative path in same directory or subdirectory
|
||||
full_path = os.path.normpath(os.path.join(source_dir, file_part))
|
||||
|
||||
return full_path, anchor
|
||||
|
||||
def verify_link(link, project_root):
|
||||
"""Verify if a link is valid."""
|
||||
link_path = link['path']
|
||||
source_file = link['source_file']
|
||||
|
||||
resolved_path, anchor = resolve_path(link_path, source_file)
|
||||
|
||||
if resolved_path is None:
|
||||
return True, 'external', None # External links are considered valid
|
||||
|
||||
if resolved_path == 'anchor':
|
||||
return True, 'anchor', None # Anchor-only links are valid
|
||||
|
||||
# Check if file exists
|
||||
if os.path.isfile(resolved_path):
|
||||
# If anchor exists, check if it's in the file (simplified check)
|
||||
if anchor:
|
||||
try:
|
||||
with open(resolved_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Check for anchor in headings
|
||||
anchor_pattern = rf'#+\s+.*{re.escape(anchor)}'
|
||||
if re.search(anchor_pattern, content, re.IGNORECASE):
|
||||
return True, 'valid', None
|
||||
else:
|
||||
return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
|
||||
except Exception as e:
|
||||
return False, 'error', str(e)
|
||||
return True, 'valid', None
|
||||
else:
|
||||
return False, 'missing_file', f"File not found: {resolved_path}"
|
||||
|
||||
def main():
|
||||
"""Main verification function."""
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(script_dir)
|
||||
report_file = os.path.join(project_root, 'CROSS_REFERENCE_VERIFICATION_REPORT.md')
|
||||
|
||||
print("=" * 50)
|
||||
print("DBIS Cross-Reference Verification")
|
||||
print("=" * 50)
|
||||
print()
|
||||
print(f"{Colors.BLUE}Project Root:{Colors.NC} {project_root}")
|
||||
print()
|
||||
|
||||
# Find all markdown files
|
||||
print(f"{Colors.BLUE}Scanning markdown files...{Colors.NC}")
|
||||
md_files = find_markdown_files(project_root)
|
||||
print(f"Found {len(md_files)} markdown files")
|
||||
print()
|
||||
|
||||
# Extract and verify links
|
||||
all_links = []
|
||||
broken_links = []
|
||||
stats = {
|
||||
'total': 0,
|
||||
'valid': 0,
|
||||
'external': 0,
|
||||
'broken': 0,
|
||||
'invalid_anchor': 0
|
||||
}
|
||||
|
||||
for md_file in md_files:
|
||||
try:
|
||||
with open(md_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
links = extract_links(content, md_file)
|
||||
all_links.extend(links)
|
||||
|
||||
for link in links:
|
||||
stats['total'] += 1
|
||||
is_valid, link_type, error = verify_link(link, project_root)
|
||||
|
||||
if link_type == 'external':
|
||||
stats['external'] += 1
|
||||
stats['valid'] += 1
|
||||
elif is_valid:
|
||||
stats['valid'] += 1
|
||||
else:
|
||||
stats['broken'] += 1
|
||||
if link_type == 'invalid_anchor':
|
||||
stats['invalid_anchor'] += 1
|
||||
broken_links.append({
|
||||
'link': link,
|
||||
'type': link_type,
|
||||
'error': error
|
||||
})
|
||||
rel_file = os.path.relpath(link['source_file'], project_root)
|
||||
print(f"{Colors.RED}✗{Colors.NC} {rel_file}:{link['line']} -> {link['path']}")
|
||||
if error:
|
||||
print(f" {Colors.YELLOW}Error:{Colors.NC} {error}")
|
||||
except Exception as e:
|
||||
print(f"{Colors.RED}Error reading {md_file}:{Colors.NC} {e}")
|
||||
|
||||
# Generate report
|
||||
success_rate = (stats['valid'] / stats['total'] * 100) if stats['total'] > 0 else 0
|
||||
|
||||
report_content = f"""# CROSS-REFERENCE VERIFICATION REPORT
|
||||
## Automated Link Verification Results
|
||||
|
||||
**Generated:** {datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")}
|
||||
**Project Root:** {project_root}
|
||||
|
||||
---
|
||||
|
||||
## SUMMARY
|
||||
|
||||
- **Total Links Scanned:** {stats['total']}
|
||||
- **Valid Links:** {stats['valid']}
|
||||
- **External Links:** {stats['external']}
|
||||
- **Broken Links:** {stats['broken']}
|
||||
- **Invalid Anchors:** {stats['invalid_anchor']}
|
||||
- **Success Rate:** {success_rate:.2f}%
|
||||
|
||||
---
|
||||
|
||||
## BROKEN LINKS
|
||||
|
||||
"""
|
||||
|
||||
if broken_links:
|
||||
for broken in broken_links:
|
||||
link = broken['link']
|
||||
rel_file = os.path.relpath(link['source_file'], project_root)
|
||||
report_content += f"- **{rel_file}:{link['line']}** -> `{link['path']}`\n"
|
||||
if broken['error']:
|
||||
report_content += f" - Error: {broken['error']}\n"
|
||||
report_content += "\n"
|
||||
else:
|
||||
report_content += "✅ No broken links found!\n\n"
|
||||
|
||||
report_content += """---
|
||||
|
||||
## RECOMMENDATIONS
|
||||
|
||||
1. Fix all broken links identified above
|
||||
2. Verify and correct invalid anchors
|
||||
3. Update cross-references in affected documents
|
||||
4. Re-run verification after fixes
|
||||
|
||||
---
|
||||
|
||||
**END OF VERIFICATION REPORT**
|
||||
"""
|
||||
|
||||
with open(report_file, 'w', encoding='utf-8') as f:
|
||||
f.write(report_content)
|
||||
|
||||
# Print summary
|
||||
print()
|
||||
print("=" * 50)
|
||||
print("Verification Summary")
|
||||
print("=" * 50)
|
||||
print()
|
||||
print(f"{Colors.GREEN}Total Links Scanned:{Colors.NC} {stats['total']}")
|
||||
print(f"{Colors.GREEN}Valid Links:{Colors.NC} {stats['valid']}")
|
||||
print(f"{Colors.BLUE}External Links:{Colors.NC} {stats['external']}")
|
||||
print(f"{Colors.RED}Broken Links:{Colors.NC} {stats['broken']}")
|
||||
if stats['invalid_anchor'] > 0:
|
||||
print(f"{Colors.YELLOW}Invalid Anchors:{Colors.NC} {stats['invalid_anchor']}")
|
||||
print(f"{Colors.BLUE}Success Rate:{Colors.NC} {success_rate:.2f}%")
|
||||
print()
|
||||
print(f"{Colors.BLUE}Report generated:{Colors.NC} {report_file}")
|
||||
print()
|
||||
print(f"{Colors.GREEN}✓ Verification complete!{Colors.NC}")
|
||||
|
||||
return 0 if stats['broken'] == 0 else 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user