#!/bin/bash # DBIS Cross-Reference Verification Script # Automated link verification and cross-reference checking set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Configuration PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" REPORT_FILE="${PROJECT_ROOT}/CROSS_REFERENCE_VERIFICATION_REPORT.md" BROKEN_LINKS=() MISSING_FILES=() INVALID_ANCHORS=() TOTAL_LINKS=0 VALID_LINKS=0 echo "==========================================" echo "DBIS Cross-Reference Verification" echo "==========================================" echo "" # Function to extract markdown links extract_links() { local file="$1" grep -oE '\[([^\]]+)\]\(([^)]+)\)' "$file" | while IFS= read -r link; do echo "$link" done } # Function to verify file link verify_file_link() { local link_path="$1" local source_file="$2" local source_dir="$(dirname "$source_file")" # Handle relative paths if [[ "$link_path" == /* ]]; then # Absolute path from project root full_path="${PROJECT_ROOT}${link_path}" elif [[ "$link_path" == ../* ]]; then # Relative path going up full_path="$(cd "$source_dir" && cd "$(dirname "$link_path")" && pwd)/$(basename "$link_path")" else # Relative path in same directory full_path="${source_dir}/${link_path}" fi # Remove anchor if present file_path="${full_path%%#*}" if [[ -f "$file_path" ]]; then return 0 else return 1 fi } # Function to verify anchor verify_anchor() { local file_path="$1" local anchor="$2" if [[ -z "$anchor" ]]; then return 0 # No anchor to verify fi # Check if anchor exists in file (simplified check) if grep -q "^#*.*${anchor}" "$file_path" 2>/dev/null; then return 0 else return 1 fi } # Main verification process echo "Scanning markdown files..." echo "" find "$PROJECT_ROOT" -name "*.md" -type f | while IFS= read -r file; do # Skip certain directories if [[ "$file" == *"/node_modules/"* ]] || [[ "$file" == *"/.git/"* ]]; then continue fi relative_file="${file#$PROJECT_ROOT/}" # Extract all links from file while IFS= read -r link_line; do if [[ -z "$link_line" ]]; then continue fi # Extract link text and path if [[ "$link_line" =~ \[([^\]]+)\]\(([^)]+)\) ]]; then link_text="${BASH_REMATCH[1]}" link_path="${BASH_REMATCH[2]}" TOTAL_LINKS=$((TOTAL_LINKS + 1)) # Skip external links if [[ "$link_path" == http://* ]] || [[ "$link_path" == https://* ]] || [[ "$link_path" == mailto:* ]]; then VALID_LINKS=$((VALID_LINKS + 1)) continue fi # Split path and anchor if [[ "$link_path" == *"#"* ]]; then file_part="${link_path%%#*}" anchor_part="${link_path#*#}" else file_part="$link_path" anchor_part="" fi # Verify file if verify_file_link "$file_part" "$file"; then # Get full file path source_dir="$(dirname "$file")" if [[ "$file_part" == /* ]]; then full_file_path="${PROJECT_ROOT}${file_part}" elif [[ "$file_part" == ../* ]]; then full_file_path="$(cd "$source_dir" && cd "$(dirname "$file_part")" && pwd)/$(basename "$file_part")" else full_file_path="${source_dir}/${file_part}" fi # Verify anchor if present if [[ -n "$anchor_part" ]]; then if verify_anchor "$full_file_path" "$anchor_part"; then VALID_LINKS=$((VALID_LINKS + 1)) else INVALID_ANCHORS+=("$relative_file -> $link_path (anchor: $anchor_part)") echo -e "${YELLOW}WARNING:${NC} Invalid anchor in $relative_file: $link_path" fi else VALID_LINKS=$((VALID_LINKS + 1)) fi else BROKEN_LINKS+=("$relative_file -> $link_path") MISSING_FILES+=("$link_path") echo -e "${RED}ERROR:${NC} Broken link in $relative_file: $link_path" fi fi done < <(extract_links "$file") done # Generate report echo "" echo "==========================================" echo "Verification Summary" echo "==========================================" echo "" echo "Total Links Found: $TOTAL_LINKS" echo "Valid Links: $VALID_LINKS" echo "Broken Links: ${#BROKEN_LINKS[@]}" echo "Invalid Anchors: ${#INVALID_ANCHORS[@]}" echo "" # Create report file cat > "$REPORT_FILE" << EOF # CROSS-REFERENCE VERIFICATION REPORT ## Automated Link Verification Results **Generated:** $(date -u +"%Y-%m-%d %H:%M:%S UTC") **Project Root:** $PROJECT_ROOT --- ## SUMMARY - **Total Links Scanned:** $TOTAL_LINKS - **Valid Links:** $VALID_LINKS - **Broken Links:** ${#BROKEN_LINKS[@]} - **Invalid Anchors:** ${#INVALID_ANCHORS[@]} - **Success Rate:** $(awk "BEGIN {printf \"%.2f\", ($VALID_LINKS/$TOTAL_LINKS)*100}")% --- ## BROKEN LINKS EOF if [[ ${#BROKEN_LINKS[@]} -eq 0 ]]; then echo "✅ No broken links found!" >> "$REPORT_FILE" else for link in "${BROKEN_LINKS[@]}"; do echo "- $link" >> "$REPORT_FILE" done fi cat >> "$REPORT_FILE" << EOF --- ## INVALID ANCHORS EOF if [[ ${#INVALID_ANCHORS[@]} -eq 0 ]]; then echo "✅ No invalid anchors found!" >> "$REPORT_FILE" else for anchor in "${INVALID_ANCHORS[@]}"; do echo "- $anchor" >> "$REPORT_FILE" done fi cat >> "$REPORT_FILE" << EOF --- ## RECOMMENDATIONS 1. Fix all broken links identified above 2. Verify and correct invalid anchors 3. Update cross-references in affected documents 4. Re-run verification after fixes --- **END OF VERIFICATION REPORT** EOF echo "Report generated: $REPORT_FILE" echo "" echo -e "${GREEN}Verification complete!${NC}"