Update CROSS_REFERENCE_VERIFICATION_REPORT.md with revised link verification results, showing an increase in total scanned links and valid links, while significantly reducing broken links and invalid anchors. Enhance Cross_Reference_Verification_Process.md with clearer examples for link formats. Modify verify_links.py to improve anchor verification by checking for both HTML and markdown anchors, enhancing overall link validation accuracy.

This commit is contained in:
defiQUG
2025-12-08 04:03:47 -08:00
parent 13bad3aef2
commit 9ed0f0db3d
6 changed files with 781 additions and 87 deletions

View File

@@ -59,6 +59,22 @@ def resolve_path(link_path, source_file):
# Handle anchor-only links
if link_path.startswith('#'):
anchor = link_path[1:] # Remove the #
# Verify anchor exists in source file
try:
with open(source_file, 'r', encoding='utf-8') as f:
content = f.read()
# Check for HTML anchor
html_pattern = rf'<a\s+id=["\']{re.escape(anchor)}["\']'
if re.search(html_pattern, content, re.IGNORECASE):
return source_file, None # Valid anchor
# Check for heading
normalized = anchor.lower().replace(' ', '-')
heading_pattern = rf'#+\s+.*{re.escape(normalized)}'
if re.search(heading_pattern, content, re.IGNORECASE):
return source_file, None # Valid anchor
except:
pass
return source_file, 'anchor'
# Split path and anchor
@@ -109,12 +125,23 @@ def verify_link(link, project_root):
try:
with open(resolved_path, 'r', encoding='utf-8') as f:
content = f.read()
# Check for anchor in headings
# Check for anchor in headings or HTML anchor tags
# Try heading pattern first
anchor_pattern = rf'#+\s+.*{re.escape(anchor)}'
if re.search(anchor_pattern, content, re.IGNORECASE):
return True, 'valid', None
else:
return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
# Try HTML anchor tag
html_anchor_pattern = rf'<a\s+id=["\']{re.escape(anchor)}["\']'
if re.search(html_anchor_pattern, content, re.IGNORECASE):
return True, 'valid', None
# Try markdown anchor (lowercase, spaces to hyphens)
normalized_anchor = anchor.lower().replace(' ', '-')
if normalized_anchor in content.lower():
# Check if it's in a heading
heading_pattern = rf'#+\s+.*{re.escape(normalized_anchor)}'
if re.search(heading_pattern, content, re.IGNORECASE):
return True, 'valid', None
return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
except Exception as e:
return False, 'error', str(e)
return True, 'valid', None