138 lines
5.2 KiB
Python
138 lines
5.2 KiB
Python
|
|
"""Smoke tests for AGI stack: executive, memory, verification, world model, skills, multi-agent, governance, tooling."""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from fusionagi.core import GoalManager, Scheduler, BlockersAndCheckpoints, SchedulerMode, FallbackMode
|
||
|
|
from fusionagi.schemas.goal import Goal, GoalBudget, GoalStatus, Blocker, Checkpoint
|
||
|
|
from fusionagi.memory import SemanticMemory, ProceduralMemory, TrustMemory, ConsolidationJob
|
||
|
|
from fusionagi.verification import OutcomeVerifier, ContradictionDetector, FormalValidators
|
||
|
|
from fusionagi.world_model import SimpleWorldModel, run_rollout
|
||
|
|
from fusionagi.schemas.plan import Plan, PlanStep
|
||
|
|
from fusionagi.skills import SkillLibrary, SkillInduction, SkillVersioning
|
||
|
|
from fusionagi.schemas.skill import Skill, SkillKind
|
||
|
|
from fusionagi.governance import AuditLog, PolicyEngine, IntentAlignment
|
||
|
|
from fusionagi.schemas.audit import AuditEventType
|
||
|
|
from fusionagi.multi_agent import consensus_vote, arbitrate
|
||
|
|
from fusionagi.agents import AdversarialReviewerAgent
|
||
|
|
from fusionagi.tools import DocsConnector, DBConnector, CodeRunnerConnector
|
||
|
|
|
||
|
|
|
||
|
|
class TestExecutive:
|
||
|
|
def test_goal_manager_budget(self):
|
||
|
|
gm = GoalManager()
|
||
|
|
g = Goal(goal_id="g1", objective="Test", budget=GoalBudget(time_seconds=10.0, compute_budget=100.0))
|
||
|
|
gm.add_goal(g)
|
||
|
|
assert gm.get_goal("g1") is not None
|
||
|
|
gm.record_time("g1", 5.0)
|
||
|
|
assert not gm.is_over_budget("g1")
|
||
|
|
gm.record_time("g1", 10.0)
|
||
|
|
assert gm.is_over_budget("g1")
|
||
|
|
|
||
|
|
def test_scheduler_fallback(self):
|
||
|
|
s = Scheduler(default_mode=SchedulerMode.ACT, max_retries_per_step=2)
|
||
|
|
assert s.next_mode("t1", "s1") == SchedulerMode.ACT
|
||
|
|
assert s.should_retry("t1", "s1")
|
||
|
|
s.record_retry("t1", "s1")
|
||
|
|
s.record_retry("t1", "s1")
|
||
|
|
assert not s.should_retry("t1", "s1")
|
||
|
|
fb = s.next_fallback("t1")
|
||
|
|
assert fb == FallbackMode.RETRY
|
||
|
|
|
||
|
|
def test_blockers_checkpoints(self):
|
||
|
|
bc = BlockersAndCheckpoints()
|
||
|
|
bc.add_blocker(Blocker(blocker_id="b1", task_id="t1", reason="Waiting"))
|
||
|
|
assert len(bc.get_blockers("t1")) == 1
|
||
|
|
bc.add_checkpoint(Checkpoint(checkpoint_id="c1", task_id="t1", step_ids_completed=["s1"]))
|
||
|
|
assert bc.get_latest_checkpoint("t1") is not None
|
||
|
|
|
||
|
|
|
||
|
|
class TestMemory:
|
||
|
|
def test_semantic(self):
|
||
|
|
sm = SemanticMemory()
|
||
|
|
sm.add_fact("f1", "The sky is blue", domain="weather")
|
||
|
|
assert sm.get_fact("f1")["statement"] == "The sky is blue"
|
||
|
|
assert len(sm.query(domain="weather")) == 1
|
||
|
|
|
||
|
|
def test_procedural_trust(self):
|
||
|
|
pm = ProceduralMemory()
|
||
|
|
sk = Skill(skill_id="s1", name="Close month", description="Close month-end")
|
||
|
|
pm.add_skill(sk)
|
||
|
|
assert pm.get_skill_by_name("Close month") is not None
|
||
|
|
tm = TrustMemory()
|
||
|
|
tm.add("c1", verified=True, source="test")
|
||
|
|
assert tm.is_verified("c1")
|
||
|
|
|
||
|
|
|
||
|
|
class TestVerification:
|
||
|
|
def test_outcome_verifier(self):
|
||
|
|
v = OutcomeVerifier()
|
||
|
|
assert v.verify({"result": "ok"}) is True
|
||
|
|
assert v.verify({"error": "fail"}) is False
|
||
|
|
|
||
|
|
def test_contradiction_detector(self):
|
||
|
|
d = ContradictionDetector()
|
||
|
|
assert d.check("It is not raining") == []
|
||
|
|
|
||
|
|
def test_formal_validators(self):
|
||
|
|
fv = FormalValidators()
|
||
|
|
ok, msg = fv.validate_json('{"a": 1}')
|
||
|
|
assert ok is True
|
||
|
|
|
||
|
|
|
||
|
|
class TestWorldModel:
|
||
|
|
def test_rollout(self):
|
||
|
|
plan = Plan(steps=[PlanStep(id="s1", description="Step 1"), PlanStep(id="s2", description="Step 2")])
|
||
|
|
wm = SimpleWorldModel()
|
||
|
|
ok, trans, state = run_rollout(plan, {}, wm)
|
||
|
|
assert ok is True
|
||
|
|
assert len(trans) == 2
|
||
|
|
|
||
|
|
|
||
|
|
class TestSkills:
|
||
|
|
def test_library_induction_versioning(self):
|
||
|
|
lib = SkillLibrary()
|
||
|
|
sk = Skill(skill_id="s1", name="Routine", description="Test")
|
||
|
|
lib.register(sk)
|
||
|
|
assert lib.get_by_name("Routine") is not None
|
||
|
|
ind = SkillInduction()
|
||
|
|
candidates = ind.propose_from_traces([[{"step_id": "a", "tool": "t1"}]])
|
||
|
|
assert len(candidates) == 1
|
||
|
|
ver = SkillVersioning()
|
||
|
|
ver.record_success("s1", 1)
|
||
|
|
assert ver.get_info("s1", 1).success_count == 1
|
||
|
|
|
||
|
|
|
||
|
|
class TestGovernance:
|
||
|
|
def test_audit_policy_intent(self):
|
||
|
|
audit = AuditLog()
|
||
|
|
eid = audit.append(AuditEventType.TOOL_CALL, "executor", action="run", task_id="t1")
|
||
|
|
assert eid
|
||
|
|
assert len(audit.get_by_task("t1")) == 1
|
||
|
|
pe = PolicyEngine()
|
||
|
|
allowed, _ = pe.check("run", {"tool_name": "read"})
|
||
|
|
assert allowed is True
|
||
|
|
ia = IntentAlignment()
|
||
|
|
ok, _ = ia.check("Summarize", "summarize the doc")
|
||
|
|
assert ok is True
|
||
|
|
|
||
|
|
|
||
|
|
class TestMultiAgent:
|
||
|
|
def test_consensus_arbitrate(self):
|
||
|
|
out = consensus_vote(["a", "a", "b"])
|
||
|
|
assert out == "a"
|
||
|
|
prop = arbitrate([{"plan": "p1"}, {"plan": "p2"}])
|
||
|
|
assert prop["plan"] == "p1"
|
||
|
|
|
||
|
|
|
||
|
|
class TestConnectors:
|
||
|
|
def test_connectors(self):
|
||
|
|
doc = DocsConnector()
|
||
|
|
r = doc.invoke("read", {"path": "/x"})
|
||
|
|
assert "error" in r or "content" in r
|
||
|
|
db = DBConnector()
|
||
|
|
r = db.invoke("query", {"query": "SELECT 1"})
|
||
|
|
assert "error" in r or "rows" in r
|
||
|
|
code = CodeRunnerConnector()
|
||
|
|
r = code.invoke("run", {"code": "1+1", "language": "python"})
|
||
|
|
assert "error" in r or "stdout" in r
|