Update CROSS_REFERENCE_VERIFICATION_REPORT.md with revised link verification results, showing an increase in total scanned links and valid links, while significantly reducing broken links and invalid anchors. Enhance Cross_Reference_Verification_Process.md with clearer examples for link formats. Modify verify_links.py to improve anchor verification by checking for both HTML and markdown anchors, enhancing overall link validation accuracy.

2025-12-08 04:03:47 -08:00
parent 13bad3aef2
commit 9ed0f0db3d
6 changed files with 781 additions and 87 deletions
--- a/scripts/verify_links.py
+++ b/scripts/verify_links.py
@@ -59,6 +59,22 @@ def resolve_path(link_path, source_file):
    
    # Handle anchor-only links
    if link_path.startswith('#'):
+        anchor = link_path[1:]  # Remove the #
+        # Verify anchor exists in source file
+        try:
+            with open(source_file, 'r', encoding='utf-8') as f:
+                content = f.read()
+                # Check for HTML anchor
+                html_pattern = rf'<a\s+id=["\']{re.escape(anchor)}["\']'
+                if re.search(html_pattern, content, re.IGNORECASE):
+                    return source_file, None  # Valid anchor
+                # Check for heading
+                normalized = anchor.lower().replace(' ', '-')
+                heading_pattern = rf'#+\s+.*{re.escape(normalized)}'
+                if re.search(heading_pattern, content, re.IGNORECASE):
+                    return source_file, None  # Valid anchor
+        except:
+            pass
        return source_file, 'anchor'
    
    # Split path and anchor
@@ -109,12 +125,23 @@ def verify_link(link, project_root):
            try:
                with open(resolved_path, 'r', encoding='utf-8') as f:
                    content = f.read()
-                    # Check for anchor in headings
+                    # Check for anchor in headings or HTML anchor tags
+                    # Try heading pattern first
                    anchor_pattern = rf'#+\s+.*{re.escape(anchor)}'
                    if re.search(anchor_pattern, content, re.IGNORECASE):
                        return True, 'valid', None
-                    else:
-                        return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
+                    # Try HTML anchor tag
+                    html_anchor_pattern = rf'<a\s+id=["\']{re.escape(anchor)}["\']'
+                    if re.search(html_anchor_pattern, content, re.IGNORECASE):
+                        return True, 'valid', None
+                    # Try markdown anchor (lowercase, spaces to hyphens)
+                    normalized_anchor = anchor.lower().replace(' ', '-')
+                    if normalized_anchor in content.lower():
+                        # Check if it's in a heading
+                        heading_pattern = rf'#+\s+.*{re.escape(normalized_anchor)}'
+                        if re.search(heading_pattern, content, re.IGNORECASE):
+                            return True, 'valid', None
+                    return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
            except Exception as e:
                return False, 'error', str(e)
        return True, 'valid', None