Files
FusionAGI/fusionagi/reasoning/decomposition.py
defiQUG c052b07662
Some checks failed
Tests / test (3.10) (push) Has been cancelled
Tests / test (3.11) (push) Has been cancelled
Tests / test (3.12) (push) Has been cancelled
Tests / lint (push) Has been cancelled
Tests / docker (push) Has been cancelled
Initial commit: add .gitignore and README
2026-02-09 21:51:42 -08:00

208 lines
6.5 KiB
Python

"""Recursive semantic decomposition: split text into atomic units."""
from __future__ import annotations
import re
import uuid
from typing import Any
from fusionagi.reasoning.native import analyze_prompt
from fusionagi.schemas.atomic import (
AtomicSemanticUnit,
AtomicUnitType,
DecompositionResult,
RelationType,
SemanticRelation,
)
from fusionagi._logger import logger
def _make_unit_id(prefix: str = "asu") -> str:
"""Generate unique unit ID."""
return f"{prefix}_{uuid.uuid4().hex[:12]}"
def _is_atomic(text: str, min_words: int = 3) -> bool:
"""Check if text is irreducible (atomic)."""
content = " ".join(text.split()).strip()
if not content or len(content) < 10:
return True
words = len(content.split())
return words <= min_words
def _extract_questions(text: str) -> list[str]:
"""Extract explicit questions from text."""
questions: list[str] = []
content = " ".join(text.split()).strip()
q_parts = re.split(r"\?+", content)
for part in q_parts[:-1]:
q = part.strip()
if len(q) > 10:
questions.append(q + "?")
if not questions and any(w in content.lower() for w in ["how", "what", "why", "when", "where", "who"]):
questions.append(content)
return questions[:5]
def _extract_constraints(text: str) -> list[str]:
"""Extract constraint signals from text."""
constraints: list[str] = []
patterns = [
r"must\s+(\w[\w\s]+?)(?:\.|$)",
r"should\s+(\w[\w\s]+?)(?:\.|$)",
r"cannot\s+(\w[\w\s]+?)(?:\.|$)",
r"require[sd]?\s+(\w[\w\s]+?)(?:\.|$)",
r"constraint[s]?:\s*(\w[\w\s]+?)(?:\.|$)",
r"assume[sd]?\s+(\w[\w\s]+?)(?:\.|$)",
]
for pat in patterns:
for m in re.finditer(pat, text, re.I):
constraints.append(m.group(1).strip())
return list(dict.fromkeys(constraints))[:10]
def _extract_entities(text: str) -> list[str]:
"""Extract entity-like phrases."""
entities = re.findall(r'"([^"]+)"', text)
entities += re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", text)
return list(dict.fromkeys(e for e in entities if len(e) > 2))[:10]
def decompose_recursive(
text: str,
max_depth: int = 3,
parent_id: str | None = None,
current_depth: int = 0,
source_ref: str | None = None,
) -> DecompositionResult:
"""
Recursively decompose text into atomic semantic units.
Extracts entities, constraints, intents, assumptions, questions; recurses
on non-atomic segments. Integrates with native analyze_prompt for intent
and domain signals.
Args:
text: Input text to decompose.
max_depth: Maximum recursion depth.
parent_id: Parent unit ID for decomposition tree.
current_depth: Current recursion depth.
source_ref: Optional source reference.
Returns:
DecompositionResult with units and relations.
"""
content = " ".join(text.split()).strip()
if not content:
return DecompositionResult(units=[], relations=[], depth=current_depth)
units: list[AtomicSemanticUnit] = []
relations: list[SemanticRelation] = []
analysis = analyze_prompt(content)
# Root unit for this segment (if not already atomic)
root_id = _make_unit_id()
root_unit = AtomicSemanticUnit(
unit_id=root_id,
content=content[:500] + ("..." if len(content) > 500 else ""),
type=AtomicUnitType.INTENT if analysis.intent == "question" else AtomicUnitType.FACT,
confidence=0.8,
parent_id=parent_id,
source_ref=source_ref,
metadata={"intent": analysis.intent},
)
units.append(root_unit)
if parent_id:
relations.append(
SemanticRelation(from_id=parent_id, to_id=root_id, relation_type=RelationType.LOGICAL)
)
# Extract questions as atomic units
for q in _extract_questions(content):
q_id = _make_unit_id()
units.append(
AtomicSemanticUnit(
unit_id=q_id,
content=q,
type=AtomicUnitType.QUESTION,
confidence=0.9,
parent_id=root_id,
source_ref=source_ref,
)
)
relations.append(
SemanticRelation(from_id=root_id, to_id=q_id, relation_type=RelationType.LOGICAL)
)
# Extract constraints as atomic units
for c in _extract_constraints(content):
c_id = _make_unit_id()
units.append(
AtomicSemanticUnit(
unit_id=c_id,
content=c,
type=AtomicUnitType.CONSTRAINT,
confidence=0.85,
parent_id=root_id,
source_ref=source_ref,
)
)
relations.append(
SemanticRelation(from_id=root_id, to_id=c_id, relation_type=RelationType.LOGICAL)
)
# Extract entities as atomic units
for e in _extract_entities(content):
e_id = _make_unit_id()
units.append(
AtomicSemanticUnit(
unit_id=e_id,
content=e,
type=AtomicUnitType.FACT,
confidence=0.9,
parent_id=root_id,
source_ref=source_ref,
)
)
relations.append(
SemanticRelation(from_id=root_id, to_id=e_id, relation_type=RelationType.LOGICAL)
)
# If not atomic and depth allows, split and recurse
if not _is_atomic(content, min_words=8) and current_depth < max_depth:
sentences = re.split(r"[.!?]\s+", content)
if len(sentences) > 1:
for sent in sentences:
sent = sent.strip()
if len(sent) > 20:
sub = decompose_recursive(
sent,
max_depth=max_depth,
parent_id=root_id,
current_depth=current_depth + 1,
source_ref=source_ref,
)
units.extend(sub.units)
relations.extend(sub.relations)
# Dedupe by unit_id
seen: set[str] = set()
unique_units: list[AtomicSemanticUnit] = []
for u in units:
if u.unit_id not in seen:
seen.add(u.unit_id)
unique_units.append(u)
logger.debug(
"Decomposition complete",
extra={"depth": current_depth, "units": len(unique_units), "relations": len(relations)},
)
return DecompositionResult(
units=unique_units,
relations=relations,
depth=current_depth,
)