tests/test_agi_stack.py

"""Smoke tests for AGI stack: executive, memory, verification, world model, skills, multi-agent, governance, tooling."""

import pytest

from fusionagi.core import GoalManager, Scheduler, BlockersAndCheckpoints, SchedulerMode, FallbackMode
from fusionagi.schemas.goal import Goal, GoalBudget, GoalStatus, Blocker, Checkpoint
from fusionagi.memory import SemanticMemory, ProceduralMemory, TrustMemory, ConsolidationJob
from fusionagi.verification import OutcomeVerifier, ContradictionDetector, FormalValidators
from fusionagi.world_model import SimpleWorldModel, run_rollout
from fusionagi.schemas.plan import Plan, PlanStep
from fusionagi.skills import SkillLibrary, SkillInduction, SkillVersioning
from fusionagi.schemas.skill import Skill, SkillKind
from fusionagi.governance import AuditLog, PolicyEngine, IntentAlignment
from fusionagi.schemas.audit import AuditEventType
from fusionagi.multi_agent import consensus_vote, arbitrate
from fusionagi.agents import AdversarialReviewerAgent
from fusionagi.tools import DocsConnector, DBConnector, CodeRunnerConnector


class TestExecutive:
    def test_goal_manager_budget(self):
        gm = GoalManager()
        g = Goal(goal_id="g1", objective="Test", budget=GoalBudget(time_seconds=10.0, compute_budget=100.0))
        gm.add_goal(g)
        assert gm.get_goal("g1") is not None
        gm.record_time("g1", 5.0)
        assert not gm.is_over_budget("g1")
        gm.record_time("g1", 10.0)
        assert gm.is_over_budget("g1")

    def test_scheduler_fallback(self):
        s = Scheduler(default_mode=SchedulerMode.ACT, max_retries_per_step=2)
        assert s.next_mode("t1", "s1") == SchedulerMode.ACT
        assert s.should_retry("t1", "s1")
        s.record_retry("t1", "s1")
        s.record_retry("t1", "s1")
        assert not s.should_retry("t1", "s1")
        fb = s.next_fallback("t1")
        assert fb == FallbackMode.RETRY

    def test_blockers_checkpoints(self):
        bc = BlockersAndCheckpoints()
        bc.add_blocker(Blocker(blocker_id="b1", task_id="t1", reason="Waiting"))
        assert len(bc.get_blockers("t1")) == 1
        bc.add_checkpoint(Checkpoint(checkpoint_id="c1", task_id="t1", step_ids_completed=["s1"]))
        assert bc.get_latest_checkpoint("t1") is not None


class TestMemory:
    def test_semantic(self):
        sm = SemanticMemory()
        sm.add_fact("f1", "The sky is blue", domain="weather")
        assert sm.get_fact("f1")["statement"] == "The sky is blue"
        assert len(sm.query(domain="weather")) == 1

    def test_procedural_trust(self):
        pm = ProceduralMemory()
        sk = Skill(skill_id="s1", name="Close month", description="Close month-end")
        pm.add_skill(sk)
        assert pm.get_skill_by_name("Close month") is not None
        tm = TrustMemory()
        tm.add("c1", verified=True, source="test")
        assert tm.is_verified("c1")


class TestVerification:
    def test_outcome_verifier(self):
        v = OutcomeVerifier()
        assert v.verify({"result": "ok"}) is True
        assert v.verify({"error": "fail"}) is False

    def test_contradiction_detector(self):
        d = ContradictionDetector()
        assert d.check("It is not raining") == []

    def test_formal_validators(self):
        fv = FormalValidators()
        ok, msg = fv.validate_json('{"a": 1}')
        assert ok is True


class TestWorldModel:
    def test_rollout(self):
        plan = Plan(steps=[PlanStep(id="s1", description="Step 1"), PlanStep(id="s2", description="Step 2")])
        wm = SimpleWorldModel()
        ok, trans, state = run_rollout(plan, {}, wm)
        assert ok is True
        assert len(trans) == 2


class TestSkills:
    def test_library_induction_versioning(self):
        lib = SkillLibrary()
        sk = Skill(skill_id="s1", name="Routine", description="Test")
        lib.register(sk)
        assert lib.get_by_name("Routine") is not None
        ind = SkillInduction()
        candidates = ind.propose_from_traces([[{"step_id": "a", "tool": "t1"}]])
        assert len(candidates) == 1
        ver = SkillVersioning()
        ver.record_success("s1", 1)
        assert ver.get_info("s1", 1).success_count == 1


class TestGovernance:
    def test_audit_policy_intent(self):
        audit = AuditLog()
        eid = audit.append(AuditEventType.TOOL_CALL, "executor", action="run", task_id="t1")
        assert eid
        assert len(audit.get_by_task("t1")) == 1
        pe = PolicyEngine()
        allowed, _ = pe.check("run", {"tool_name": "read"})
        assert allowed is True
        ia = IntentAlignment()
        ok, _ = ia.check("Summarize", "summarize the doc")
        assert ok is True


class TestMultiAgent:
    def test_consensus_arbitrate(self):
        out = consensus_vote(["a", "a", "b"])
        assert out == "a"
        prop = arbitrate([{"plan": "p1"}, {"plan": "p2"}])
        assert prop["plan"] == "p1"


class TestConnectors:
    def test_connectors(self):
        doc = DocsConnector()
        r = doc.invoke("read", {"path": "/x"})
        assert "error" in r or "content" in r
        db = DBConnector()
        r = db.invoke("query", {"query": "SELECT 1"})
        assert "error" in r or "rows" in r
        code = CodeRunnerConnector()
        r = code.invoke("run", {"code": "1+1", "language": "python"})
        assert "error" in r or "stdout" in r
Initial commit: add .gitignore and README 2026-02-09 21:51:42 -08:00			`"""Smoke tests for AGI stack: executive, memory, verification, world model, skills, multi-agent, governance, tooling."""`

			`import pytest`

			`from fusionagi.core import GoalManager, Scheduler, BlockersAndCheckpoints, SchedulerMode, FallbackMode`
			`from fusionagi.schemas.goal import Goal, GoalBudget, GoalStatus, Blocker, Checkpoint`
			`from fusionagi.memory import SemanticMemory, ProceduralMemory, TrustMemory, ConsolidationJob`
			`from fusionagi.verification import OutcomeVerifier, ContradictionDetector, FormalValidators`
			`from fusionagi.world_model import SimpleWorldModel, run_rollout`
			`from fusionagi.schemas.plan import Plan, PlanStep`
			`from fusionagi.skills import SkillLibrary, SkillInduction, SkillVersioning`
			`from fusionagi.schemas.skill import Skill, SkillKind`
			`from fusionagi.governance import AuditLog, PolicyEngine, IntentAlignment`
			`from fusionagi.schemas.audit import AuditEventType`
			`from fusionagi.multi_agent import consensus_vote, arbitrate`
			`from fusionagi.agents import AdversarialReviewerAgent`
			`from fusionagi.tools import DocsConnector, DBConnector, CodeRunnerConnector`


			`class TestExecutive:`
			`def test_goal_manager_budget(self):`
			`gm = GoalManager()`
			`g = Goal(goal_id="g1", objective="Test", budget=GoalBudget(time_seconds=10.0, compute_budget=100.0))`
			`gm.add_goal(g)`
			`assert gm.get_goal("g1") is not None`
			`gm.record_time("g1", 5.0)`
			`assert not gm.is_over_budget("g1")`
			`gm.record_time("g1", 10.0)`
			`assert gm.is_over_budget("g1")`

			`def test_scheduler_fallback(self):`
			`s = Scheduler(default_mode=SchedulerMode.ACT, max_retries_per_step=2)`
			`assert s.next_mode("t1", "s1") == SchedulerMode.ACT`
			`assert s.should_retry("t1", "s1")`
			`s.record_retry("t1", "s1")`
			`s.record_retry("t1", "s1")`
			`assert not s.should_retry("t1", "s1")`
			`fb = s.next_fallback("t1")`
			`assert fb == FallbackMode.RETRY`

			`def test_blockers_checkpoints(self):`
			`bc = BlockersAndCheckpoints()`
			`bc.add_blocker(Blocker(blocker_id="b1", task_id="t1", reason="Waiting"))`
			`assert len(bc.get_blockers("t1")) == 1`
			`bc.add_checkpoint(Checkpoint(checkpoint_id="c1", task_id="t1", step_ids_completed=["s1"]))`
			`assert bc.get_latest_checkpoint("t1") is not None`


			`class TestMemory:`
			`def test_semantic(self):`
			`sm = SemanticMemory()`
			`sm.add_fact("f1", "The sky is blue", domain="weather")`
			`assert sm.get_fact("f1")["statement"] == "The sky is blue"`
			`assert len(sm.query(domain="weather")) == 1`

			`def test_procedural_trust(self):`
			`pm = ProceduralMemory()`
			`sk = Skill(skill_id="s1", name="Close month", description="Close month-end")`
			`pm.add_skill(sk)`
			`assert pm.get_skill_by_name("Close month") is not None`
			`tm = TrustMemory()`
			`tm.add("c1", verified=True, source="test")`
			`assert tm.is_verified("c1")`


			`class TestVerification:`
			`def test_outcome_verifier(self):`
			`v = OutcomeVerifier()`
			`assert v.verify({"result": "ok"}) is True`
			`assert v.verify({"error": "fail"}) is False`

			`def test_contradiction_detector(self):`
			`d = ContradictionDetector()`
			`assert d.check("It is not raining") == []`

			`def test_formal_validators(self):`
			`fv = FormalValidators()`
			`ok, msg = fv.validate_json('{"a": 1}')`
			`assert ok is True`


			`class TestWorldModel:`
			`def test_rollout(self):`
			`plan = Plan(steps=[PlanStep(id="s1", description="Step 1"), PlanStep(id="s2", description="Step 2")])`
			`wm = SimpleWorldModel()`
			`ok, trans, state = run_rollout(plan, {}, wm)`
			`assert ok is True`
			`assert len(trans) == 2`


			`class TestSkills:`
			`def test_library_induction_versioning(self):`
			`lib = SkillLibrary()`
			`sk = Skill(skill_id="s1", name="Routine", description="Test")`
			`lib.register(sk)`
			`assert lib.get_by_name("Routine") is not None`
			`ind = SkillInduction()`
			`candidates = ind.propose_from_traces([[{"step_id": "a", "tool": "t1"}]])`
			`assert len(candidates) == 1`
			`ver = SkillVersioning()`
			`ver.record_success("s1", 1)`
			`assert ver.get_info("s1", 1).success_count == 1`


			`class TestGovernance:`
			`def test_audit_policy_intent(self):`
			`audit = AuditLog()`
			`eid = audit.append(AuditEventType.TOOL_CALL, "executor", action="run", task_id="t1")`
			`assert eid`
			`assert len(audit.get_by_task("t1")) == 1`
			`pe = PolicyEngine()`
			`allowed, _ = pe.check("run", {"tool_name": "read"})`
			`assert allowed is True`
			`ia = IntentAlignment()`
			`ok, _ = ia.check("Summarize", "summarize the doc")`
			`assert ok is True`


			`class TestMultiAgent:`
			`def test_consensus_arbitrate(self):`
			`out = consensus_vote(["a", "a", "b"])`
			`assert out == "a"`
			`prop = arbitrate([{"plan": "p1"}, {"plan": "p2"}])`
			`assert prop["plan"] == "p1"`


			`class TestConnectors:`
			`def test_connectors(self):`
			`doc = DocsConnector()`
			`r = doc.invoke("read", {"path": "/x"})`
			`assert "error" in r or "content" in r`
			`db = DBConnector()`
			`r = db.invoke("query", {"query": "SELECT 1"})`
			`assert "error" in r or "rows" in r`
			`code = CodeRunnerConnector()`
			`r = code.invoke("run", {"code": "1+1", "language": "python"})`
			`assert "error" in r or "stdout" in r`