#!/usr/bin/env python3 """Validate global compliance matrix index and jurisdiction drill-down manifests.""" from __future__ import annotations import json import sys from pathlib import Path def fail(message: str) -> None: raise SystemExit(f"error: {message}") def load_json(path: Path) -> dict: try: return json.loads(path.read_text(encoding="utf-8")) except FileNotFoundError: fail(f"missing JSON file: {path}") except json.JSONDecodeError as exc: fail(f"invalid JSON in {path}: {exc}") def require_non_empty_str(obj: dict, key: str, label: str) -> str: val = obj.get(key) if not isinstance(val, str) or not val.strip(): fail(f"{label} missing non-empty string {key}") return val.strip() def check_repo_path(repo_root: Path, rel: str, label: str) -> None: p = repo_root / rel if not p.is_file(): fail(f"{label} path is not a file: {rel}") def validate_package(repo_root: Path, pkg: dict, label: str) -> None: if not isinstance(pkg, dict): fail(f"{label} must be an object") require_non_empty_str(pkg, "package_id", label) require_non_empty_str(pkg, "kind", label) require_non_empty_str(pkg, "title", label) for key in ( "canonical_matrix_json", "spreadsheet_csv", ): if key in pkg and pkg[key] is not None: rel = pkg[key] if not isinstance(rel, str) or not rel.strip(): fail(f"{label}.{key} must be a non-empty string when set") check_repo_path(repo_root, rel.strip(), f"{label}.{key}") for list_key in ( "human_readable_md", "policy_md", "entry_point_md", "index_md", ): if list_key not in pkg: continue items = pkg[list_key] if not isinstance(items, list) or not items: fail(f"{label}.{list_key} must be a non-empty array when present") for i, rel in enumerate(items): if not isinstance(rel, str) or not rel.strip(): fail(f"{label}.{list_key}[{i}] must be a non-empty string") check_repo_path(repo_root, rel.strip(), f"{label}.{list_key}[{i}]") def validate_jurisdiction_manifest(repo_root: Path, path: Path) -> None: data = load_json(path) if data.get("schema_version") != 1: fail(f"{path} schema_version must be 1") jid = require_non_empty_str(data, "jurisdiction_id", str(path)) require_non_empty_str(data, "display_name", str(path)) iso = data.get("iso_3166_alpha2") if iso is not None and iso != "": if not isinstance(iso, str) or len(iso) != 2 or not iso.isalpha(): fail(f"{path} iso_3166_alpha2 must be null or two letters") engagements = data.get("audit_engagements") if not isinstance(engagements, list): fail(f"{path} audit_engagements must be an array") for i, eng in enumerate(engagements): el = f"{path}:audit_engagements[{i}]" if not isinstance(eng, dict): fail(f"{el} must be an object") if "closure_matrix" in eng and eng["closure_matrix"]: rel = eng["closure_matrix"] if not isinstance(rel, str): fail(f"{el}.closure_matrix must be a string") check_repo_path(repo_root, rel.strip(), f"{el}.closure_matrix") regulators = data.get("regulators") if not isinstance(regulators, list) or not regulators: fail(f"{path} regulators must be a non-empty array") for i, reg in enumerate(regulators): if not isinstance(reg, dict): fail(f"{path}:regulators[{i}] must be an object") require_non_empty_str(reg, "short", f"{path}:regulators[{i}]") require_non_empty_str(reg, "full_name", f"{path}:regulators[{i}]") packages = data.get("compliance_packages") if not isinstance(packages, list) or not packages: fail(f"{path} compliance_packages must be a non-empty array") for i, pkg in enumerate(packages): validate_package(repo_root, pkg, f"{path}:compliance_packages[{i}]") if path.stem != jid: fail( f"{path} file name must be {jid}.json (stem {path.stem!r} != jurisdiction_id {jid!r})" ) def main() -> int: repo_root = Path(__file__).resolve().parents[2] index_path = repo_root / "config/compliance-matrix/global-compliance-matrix-index.json" index = load_json(index_path) if index.get("schema_version") != 1: fail(f"{index_path} schema_version must be 1") require_non_empty_str(index, "title", str(index_path)) require_non_empty_str(index, "matrix_version", str(index_path)) rows = index.get("jurisdictions") if not isinstance(rows, list) or not rows: fail(f"{index_path} jurisdictions must be a non-empty array") seen_ids: set[str] = set() for i, row in enumerate(rows): label = f"{index_path}:jurisdictions[{i}]" if not isinstance(row, dict): fail(f"{label} must be an object") jid = require_non_empty_str(row, "jurisdiction_id", label) if jid in seen_ids: fail(f"{label} duplicate jurisdiction_id {jid}") seen_ids.add(jid) require_non_empty_str(row, "display_name", label) require_non_empty_str(row, "status", label) manifest_rel = require_non_empty_str(row, "jurisdiction_manifest", label) manifest_path = repo_root / manifest_rel if not manifest_path.is_file(): fail(f"{label} jurisdiction_manifest not found: {manifest_rel}") iso = row.get("iso_3166_alpha2") if iso is not None and iso != "": if not isinstance(iso, str) or len(iso) != 2 or not iso.isalpha(): fail(f"{label} iso_3166_alpha2 must be null or two letters") inner = load_json(manifest_path) if inner.get("jurisdiction_id") != jid: fail(f"{manifest_path} jurisdiction_id {inner.get('jurisdiction_id')!r} != index {jid!r}") inner_iso = inner.get("iso_3166_alpha2") if inner_iso != iso: fail( f"{manifest_path} iso_3166_alpha2 {inner_iso!r} must match index row {iso!r} " f"for jurisdiction_id {jid}" ) validate_jurisdiction_manifest(repo_root, manifest_path) print(f"OK global compliance matrix index ({len(rows)} jurisdictions, manifests validated)") return 0 if __name__ == "__main__": raise SystemExit(main())