Files
opus-orchestrator-ai/opus_orchestrator/nonfiction/intake.py
T
mrhavens 8cf833c729 feat: Content-Based Purpose Inference
Added content_infer.py - analyzes existing content to infer purpose:

- ContentPurposeInferer class
- Analyzes blog posts, articles, text
- Detects signals: tutorials, explainers, transformation stories, etc.
- Returns purpose, confidence, reasoning

Updated intake.py to weight all signals:
1. Explicit flags (weight: 1.0)
2. Content inference (weight: 0.4) - NEW
3. Keyword classification (weight: 0.3)
4. Conversational (weight: 0.5)

Now if you point at a blog:
- Tutorial posts → LEARN_HANDS_ON
- Explainers → UNDERSTAND
- Transformation stories → TRANSFORM
- Reviews/Comparisons → DECIDE
- Reference docs → REFERENCE
- Journey/Biography → BE_INSPIRED
2026-03-13 20:50:36 +00:00

383 lines
13 KiB
Python

"""Intake Agent for Nonfiction Book Classification.
A conversational agent that determines the reader purpose and best framework
by asking clarifying questions or using available signals.
This agent intelligently combines:
1. Explicit user flags (--purpose learn)
2. Keyword classification from concept
3. Content inference from existing blog/posts
4. Conversational intake (asking questions)
The agent weights all inputs to make the best decision.
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose
from opus_orchestrator.nonfiction.content_infer import ContentPurposeInferer, ContentAnalysis
from opus_orchestrator.nonfiction_taxonomy import (
select_framework,
get_frameworks_for_purpose,
NonfictionCategory,
)
class IntakeMode(str, Enum):
"""How the intake agent operates."""
CONVERSATIONAL = "conversational" # Ask questions
AUTO = "auto" # Use classifier only
EXPLICIT = "explicit" # Trust flags only
@dataclass
class IntakeInput:
"""All possible inputs to the intake agent."""
# Option 1: Explicit flags (highest priority if provided)
explicit_purpose: Optional[str] = None
explicit_category: Optional[str] = None
explicit_framework: Optional[str] = None
# Option 2: Concept for classification
concept: str = ""
target_audience: str = ""
intended_outcome: str = ""
# Option 3: Existing content (for inference)
content: str = ""
content_title: str = ""
blog_posts: list = field(default_factory=list)
# Option 4: Previous Q&A (if conversational)
answers: dict[str, str] = field(default_factory=dict)
@dataclass
class IntakeResult:
"""Result from the intake agent."""
purpose: ReaderPurpose
confidence: float
category: Optional[NonfictionCategory]
framework: dict
reasoning: str
source: str # "explicit" | "classifier" | "content" | "hybrid"
content_analysis: Optional[ContentAnalysis] = None
all_signals: dict = field(default_factory=dict)
class IntakeAgent:
"""Intelligent agent for determining book purpose and framework.
This agent acts as a decision layer that:
1. Respects explicit user choices (highest priority)
2. Uses keyword classification when input is clear
3. Asks clarifying questions when ambiguous
4. Combines all signals for best accuracy
"""
# Questions for each purpose (for conversational mode)
PURPOSE_QUESTIONS = {
ReaderPurpose.LEARN_HANDS_ON: [
"Should readers be able to DO something specific after reading?",
"Is this about learning a skill or completing a project?",
"Do you want step-by-step instructions?",
],
ReaderPurpose.UNDERSTAND: [
"Is the goal to GRASP a concept or theory?",
"Do you want readers to understand how something works?",
"Is this about building mental models?",
],
ReaderPurpose.TRANSFORM: [
"Is this about personal CHANGE or growth?",
"Do you want readers to become different?",
"Is this a self-help or motivational book?",
],
ReaderPurpose.DECIDE: [
"Is this helping readers MAKE A DECISION?",
"Are you comparing options or choices?",
"Do you want to help them choose between alternatives?",
],
ReaderPurpose.REFERENCE: [
"Is this a COMPREHENSIVE REFERENCE or manual?",
"Will readers look up specific information?",
"Is completeness more important than narrative?",
],
ReaderPurpose.BE_INSPIRED: [
"Is this an INSPIRATIONAL story or biography?",
"Do you want readers to feel motivated?",
"Is this about a journey or triumph?",
],
}
# Content inference
CONTENT_INFERENCE_WEIGHT = 0.4 # Weight for content-based inference
def __init__(self, llm_client=None):
self.classifier = PurposeClassifier(llm_client)
self.content_inferer = ContentPurposeInferer()
self.llm_client = llm_client
async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult:
"""Process intake and determine purpose and framework.
All signals are weighted:
1. Explicit flags (weight: 1.0) - highest priority
2. Content inference (weight: 0.4) - from existing blog/posts
3. Keyword classification (weight: 0.3) - from concept
4. Conversational (weight: 0.5) - from Q&A
Args:
intake: All available input signals
mode: How to resolve (conversational, auto, explicit)
Returns:
IntakeResult with purpose, framework, and reasoning
"""
signals = {} # Track all signals for reasoning
# Step 1: Check explicit flags (highest priority)
if intake.explicit_purpose:
return self._process_explicit(intake)
if mode == IntakeMode.EXPLICIT:
return self._need_more_info(intake)
# Step 2: Content inference (if content provided)
content_result = None
if intake.content or intake.blog_posts:
if intake.blog_posts:
content_result = self.content_inferer.infer_from_blog(intake.blog_posts)
elif intake.content:
content_result = self.content_inferer.analyze(
intake.content,
title=intake.content_title
)
signals["content"] = content_result
# Step 3: Keyword classification from concept
classifier_result = self.classifier._keyword_classify(
concept=intake.concept,
target_audience=intake.target_audience,
intended_outcome=intake.intended_outcome,
)
signals["concept"] = classifier_result
# Step 4: WEIGHTED DECISION - combine signals
purpose_scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
# Add content inference (if available)
if content_result and content_result.confidence > 0.3:
purpose_scores[content_result.purpose] += (
content_result.confidence * self.CONTENT_INFERENCE_WEIGHT
)
# Add classifier result
purpose_scores[classifier_result.purpose] += (
classifier_result.confidence * 0.3
)
# Find winning purpose
best_purpose = max(purpose_scores, key=purpose_scores.get)
best_score = purpose_scores[best_purpose]
# Calculate final confidence
confidence = min(0.95, best_score)
# If confidence is low and in conversational mode, ask questions
if confidence < 0.4 and mode == IntakeMode.CONVERSATIONAL:
return self._need_more_info(intake)
# Determine source
if content_result and content_result.confidence > 0.5:
source = "content"
elif content_result and classifier_result.confidence > 0.3:
source = "hybrid"
else:
source = "classifier"
# Get category from input
category = None
if intake.explicit_category:
try:
category = NonfictionCategory(intake.explicit_category.lower())
except ValueError:
pass
# Select framework
framework = select_framework(
purpose=best_purpose,
category=category,
)
# Build reasoning
reasons = []
if content_result:
reasons.append(f"content: {content_result.reasoning}")
reasons.append(f"concept: {classifier_result.reasoning}")
return IntakeResult(
purpose=best_purpose,
confidence=confidence,
category=category,
framework=framework,
reasoning=" | ".join(reasons),
source=source,
content_analysis=content_result,
all_signals=signals,
)
def _process_explicit(self, intake: IntakeInput) -> IntakeResult:
"""Process when user provided explicit purpose."""
try:
purpose = ReaderPurpose(intake.explicit_purpose.lower())
except ValueError:
# Invalid purpose, fall back to classifier
return self._process_auto(intake)
# Select framework
category = None
if intake.explicit_category:
try:
category = NonfictionCategory(intake.explicit_category.lower())
except ValueError:
pass
framework = select_framework(
purpose=purpose,
category=category,
user_preferred_framework=intake.explicit_framework,
)
return IntakeResult(
purpose=purpose,
confidence=1.0,
category=category,
framework=framework,
reasoning=f"Explicit user selection: {intake.explicit_purpose}",
source="explicit",
content_analysis=None,
all_signals={"explicit": intake.explicit_purpose},
)
def _process_auto(self, intake: IntakeInput) -> IntakeResult:
"""Auto-classify from concept."""
result = self.classifier._keyword_classify(
concept=intake.concept,
target_audience=intake.target_audience,
intended_outcome=intake.intended_outcome,
)
return self._build_result_from_classification(intake, result, "classifier")
def _build_result_from_classification(
self,
intake: IntakeInput,
classifier_result,
source: str,
) -> IntakeResult:
"""Build result from classification."""
purpose = classifier_result.purpose
# Get category from input
category = None
if intake.explicit_category:
try:
category = NonfictionCategory(intake.explicit_category.lower())
except ValueError:
pass
framework = select_framework(
purpose=purpose,
category=category,
)
return IntakeResult(
purpose=purpose,
confidence=classifier_result.confidence,
category=category,
framework=framework,
reasoning=classifier_result.reasoning,
source=source,
)
def _need_more_info(self, intake: IntakeInput) -> IntakeResult:
"""Return questions needed when input is ambiguous."""
# This would be used in conversational mode
# For now, default to UNDERSTAND with low confidence
return IntakeResult(
purpose=ReaderPurpose.UNDERSTAND,
confidence=0.3,
category=None,
framework=select_framework(purpose=ReaderPurpose.UNDERSTAND),
reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.",
source="intake",
content_analysis=None,
all_signals={},
)
def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]:
"""Get clarifying questions for a purpose.
Args:
purpose: The purpose to get questions for, or None for general
Returns:
List of questions to ask
"""
if purpose and purpose in self.PURPOSE_QUESTIONS:
return self.PURPOSE_QUESTIONS[purpose]
# Return all questions
questions = []
for q_list in self.PURPOSE_QUESTIONS.values():
questions.extend(q_list)
return questions[:5] # Limit to 5
def get_available_purposes(self) -> list[str]:
"""Get list of available purpose options for menu."""
return [p.value for p in ReaderPurpose]
def get_available_categories(self) -> list[str]:
"""Get list of available category options."""
return [c.value for c in NonfictionCategory]
# Convenience function
async def determine_intake(
concept: str = "",
purpose: Optional[str] = None,
category: Optional[str] = None,
framework: Optional[str] = None,
target_audience: str = "",
intended_outcome: str = "",
mode: str = "auto",
) -> IntakeResult:
"""Convenience function to process intake.
Args:
concept: Book concept/title
purpose: Explicit purpose (overrides classification)
category: Explicit category
framework: Explicit framework
target_audience: Target audience description
intended_outcome: What the book achieves
mode: "auto", "conversational", or "explicit"
Returns:
IntakeResult with purpose, framework, etc.
"""
intake = IntakeInput(
explicit_purpose=purpose,
explicit_category=category,
explicit_framework=framework,
concept=concept,
target_audience=target_audience,
intended_outcome=intended_outcome,
)
agent = IntakeAgent()
mode_enum = IntakeMode(mode)
return await agent.process(intake, mode_enum)