Update CROSS_REFERENCE_VERIFICATION_REPORT.md with revised link verification results, showing an increase in total scanned links and valid links, while significantly reducing broken links and invalid anchors. Enhance Cross_Reference_Verification_Process.md with clearer examples for link formats. Modify verify_links.py to improve anchor verification by checking for both HTML and markdown anchors, enhancing overall link validation accuracy.
This commit is contained in:
@@ -59,6 +59,22 @@ def resolve_path(link_path, source_file):
|
||||
|
||||
# Handle anchor-only links
|
||||
if link_path.startswith('#'):
|
||||
anchor = link_path[1:] # Remove the #
|
||||
# Verify anchor exists in source file
|
||||
try:
|
||||
with open(source_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Check for HTML anchor
|
||||
html_pattern = rf'<a\s+id=["\']{re.escape(anchor)}["\']'
|
||||
if re.search(html_pattern, content, re.IGNORECASE):
|
||||
return source_file, None # Valid anchor
|
||||
# Check for heading
|
||||
normalized = anchor.lower().replace(' ', '-')
|
||||
heading_pattern = rf'#+\s+.*{re.escape(normalized)}'
|
||||
if re.search(heading_pattern, content, re.IGNORECASE):
|
||||
return source_file, None # Valid anchor
|
||||
except:
|
||||
pass
|
||||
return source_file, 'anchor'
|
||||
|
||||
# Split path and anchor
|
||||
@@ -109,12 +125,23 @@ def verify_link(link, project_root):
|
||||
try:
|
||||
with open(resolved_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Check for anchor in headings
|
||||
# Check for anchor in headings or HTML anchor tags
|
||||
# Try heading pattern first
|
||||
anchor_pattern = rf'#+\s+.*{re.escape(anchor)}'
|
||||
if re.search(anchor_pattern, content, re.IGNORECASE):
|
||||
return True, 'valid', None
|
||||
else:
|
||||
return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
|
||||
# Try HTML anchor tag
|
||||
html_anchor_pattern = rf'<a\s+id=["\']{re.escape(anchor)}["\']'
|
||||
if re.search(html_anchor_pattern, content, re.IGNORECASE):
|
||||
return True, 'valid', None
|
||||
# Try markdown anchor (lowercase, spaces to hyphens)
|
||||
normalized_anchor = anchor.lower().replace(' ', '-')
|
||||
if normalized_anchor in content.lower():
|
||||
# Check if it's in a heading
|
||||
heading_pattern = rf'#+\s+.*{re.escape(normalized_anchor)}'
|
||||
if re.search(heading_pattern, content, re.IGNORECASE):
|
||||
return True, 'valid', None
|
||||
return False, 'invalid_anchor', f"Anchor '{anchor}' not found"
|
||||
except Exception as e:
|
||||
return False, 'error', str(e)
|
||||
return True, 'valid', None
|
||||
|
||||
Reference in New Issue
Block a user