Files
dbis_docs/scripts/verify_cross_references_simple.sh

190 lines
5.2 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# DBIS Cross-Reference Verification Script (Simplified)
# Automated link verification and cross-reference checking
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
REPORT_FILE="${PROJECT_ROOT}/CROSS_REFERENCE_VERIFICATION_REPORT.md"
BROKEN_LINKS_FILE="${PROJECT_ROOT}/BROKEN_LINKS.txt"
TOTAL_LINKS=0
VALID_LINKS=0
BROKEN_COUNT=0
echo "=========================================="
echo "DBIS Cross-Reference Verification"
echo "=========================================="
echo ""
echo -e "${BLUE}Project Root:${NC} $PROJECT_ROOT"
echo ""
# Initialize report
cat > "$REPORT_FILE" << 'EOF'
# CROSS-REFERENCE VERIFICATION REPORT
## Automated Link Verification Results
**Generated:**
**Project Root:**
---
## SUMMARY
- **Total Links Scanned:** 0
- **Valid Links:** 0
- **Broken Links:** 0
- **Success Rate:** 0.00%
---
## BROKEN LINKS
EOF
# Function to check if file exists
check_file_exists() {
local file_path="$1"
local source_file="$2"
local source_dir="$(dirname "$source_file")"
# Handle different path types
if [[ "$file_path" == http://* ]] || [[ "$file_path" == https://* ]] || [[ "$file_path" == mailto:* ]]; then
return 0 # External links - skip for now
fi
# Handle relative paths
if [[ "$file_path" == /* ]]; then
# Absolute path from project root
full_path="${PROJECT_ROOT}${file_path}"
elif [[ "$file_path" == ../* ]]; then
# Relative path going up
resolved_path="$(cd "$source_dir" && cd "$(dirname "$file_path")" 2>/dev/null && pwd)/$(basename "$file_path")"
if [[ -f "$resolved_path" ]]; then
return 0
fi
else
# Relative path in same directory or subdirectory
full_path="${source_dir}/${file_path}"
fi
# Remove anchor if present
file_only="${full_path%%#*}"
if [[ -f "$file_only" ]]; then
return 0
else
return 1
fi
}
# Main verification process
echo -e "${BLUE}Scanning markdown files...${NC}"
echo ""
find "$PROJECT_ROOT" -name "*.md" -type f | while IFS= read -r file; do
# Skip certain directories
if [[ "$file" == *"/node_modules/"* ]] || [[ "$file" == *"/.git/"* ]] || [[ "$file" == *"/scripts/"* ]]; then
continue
fi
relative_file="${file#$PROJECT_ROOT/}"
# Extract markdown links using grep
grep -oE '\[([^\]]+)\]\(([^)]+)\)' "$file" 2>/dev/null | while IFS= read -r link_match; do
# Extract the path part (between parentheses)
link_path=$(echo "$link_match" | sed -n 's/.*(\(.*\))/\1/p')
if [[ -z "$link_path" ]]; then
continue
fi
TOTAL_LINKS=$((TOTAL_LINKS + 1))
# Skip external links
if [[ "$link_path" == http://* ]] || [[ "$link_path" == https://* ]] || [[ "$link_path" == mailto:* ]]; then
VALID_LINKS=$((VALID_LINKS + 1))
continue
fi
# Check if file exists
if check_file_exists "$link_path" "$file"; then
VALID_LINKS=$((VALID_LINKS + 1))
else
BROKEN_COUNT=$((BROKEN_COUNT + 1))
echo -e "${RED}${NC} $relative_file -> $link_path" | tee -a "$BROKEN_LINKS_FILE"
fi
done
done
# Update report with summary
{
echo "**Generated:** $(date -u +"%Y-%m-%d %H:%M:%S UTC")"
echo "**Project Root:** $PROJECT_ROOT"
echo ""
echo "---"
echo ""
echo "## SUMMARY"
echo ""
echo "- **Total Links Scanned:** $TOTAL_LINKS"
echo "- **Valid Links:** $VALID_LINKS"
echo "- **Broken Links:** $BROKEN_COUNT"
if [[ $TOTAL_LINKS -gt 0 ]]; then
SUCCESS_RATE=$(awk "BEGIN {printf \"%.2f\", ($VALID_LINKS/$TOTAL_LINKS)*100}")
echo "- **Success Rate:** ${SUCCESS_RATE}%"
else
echo "- **Success Rate:** N/A"
fi
echo ""
echo "---"
echo ""
echo "## BROKEN LINKS"
echo ""
if [[ -f "$BROKEN_LINKS_FILE" ]]; then
cat "$BROKEN_LINKS_FILE"
else
echo "✅ No broken links found!"
fi
echo ""
echo "---"
echo ""
echo "## RECOMMENDATIONS"
echo ""
echo "1. Fix all broken links identified above"
echo "2. Verify and correct invalid paths"
echo "3. Update cross-references in affected documents"
echo "4. Re-run verification after fixes"
echo ""
echo "---"
echo ""
echo "**END OF VERIFICATION REPORT**"
} > "$REPORT_FILE"
echo ""
echo "=========================================="
echo "Verification Summary"
echo "=========================================="
echo ""
echo -e "${GREEN}Total Links Scanned:${NC} $TOTAL_LINKS"
echo -e "${GREEN}Valid Links:${NC} $VALID_LINKS"
echo -e "${RED}Broken Links:${NC} $BROKEN_COUNT"
if [[ $TOTAL_LINKS -gt 0 ]]; then
SUCCESS_RATE=$(awk "BEGIN {printf \"%.2f\", ($VALID_LINKS/$TOTAL_LINKS)*100}")
echo -e "${BLUE}Success Rate:${NC} ${SUCCESS_RATE}%"
fi
echo ""
echo -e "${BLUE}Report generated:${NC} $REPORT_FILE"
if [[ -f "$BROKEN_LINKS_FILE" ]]; then
echo -e "${BLUE}Broken links list:${NC} $BROKEN_LINKS_FILE"
fi
echo ""
echo -e "${GREEN}✓ Verification complete!${NC}"