#!/usr/bin/env bash # Sync Wormhole AI documentation exports for offline use, MCP mirror, and RAG prep. # See docs/04-configuration/WORMHOLE_AI_RESOURCES_LLM_PLAYBOOK.md set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" OUT="${WORMHOLE_AI_DOCS_DIR:-$REPO_ROOT/third-party/wormhole-ai-docs}" BASE="https://wormhole.com/docs" CATEGORIES=( basics ntt connect wtt settlement executor multigov queries transfer typescript-sdk solidity-sdk cctp reference ) usage() { echo "Usage: $0 [--dry-run] [--full-jsonl]" echo " WORMHOLE_AI_DOCS_DIR output directory (default: third-party/wormhole-ai-docs)" echo " INCLUDE_FULL_JSONL=1 or pass --full-jsonl to download llms-full.jsonl (large)" exit "${1:-0}" } DRY=0 FULL=0 while [[ $# -gt 0 ]]; do case "$1" in --dry-run) DRY=1 ;; --full-jsonl) FULL=1 ;; -h|--help) usage 0 ;; *) echo "Unknown option: $1" >&2; usage 1 ;; esac shift done if [[ -n "${INCLUDE_FULL_JSONL:-}" && "$INCLUDE_FULL_JSONL" != "0" ]]; then FULL=1 fi echo "Output directory: $OUT" if [[ "$DRY" -eq 1 ]]; then echo "[dry-run] would mkdir, curl downloads, write manifest.json" exit 0 fi mkdir -p "$OUT/categories" fetch() { local url="$1" local dest="$2" echo " GET $url -> $dest" curl -fsSL --connect-timeout 30 --max-time 600 -o "$dest" "$url" } fetch "$BASE/llms.txt" "$OUT/llms.txt" fetch "$BASE/ai/site-index.json" "$OUT/site-index.json" for name in "${CATEGORIES[@]}"; do fetch "$BASE/ai/categories/${name}.md" "$OUT/categories/${name}.md" done if [[ "$FULL" -eq 1 ]]; then fetch "$BASE/ai/llms-full.jsonl" "$OUT/llms-full.jsonl" else rm -f "$OUT/llms-full.jsonl" fi MANIFEST="$OUT/manifest.json" export OUT MANIFEST BASE python3 <<'PY' import hashlib, json, os from datetime import datetime, timezone from pathlib import Path out = Path(os.environ["OUT"]) base = os.environ["BASE"] manifest_path = Path(os.environ["MANIFEST"]) files = {} for p in sorted(out.rglob("*")): if not p.is_file(): continue if p.name in ("manifest.json", "README.md"): continue rel = str(p.relative_to(out)).replace("\\", "/") data = p.read_bytes() files[rel] = { "sha256": hashlib.sha256(data).hexdigest(), "bytes": len(data), } doc = { "synced_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "base_url": base, "files": files, } manifest_path.write_text(json.dumps(doc, indent=2) + "\n", encoding="utf-8") PY echo "Done. Manifest: $MANIFEST"