feat: Content-Based Purpose Inference

Added content_infer.py - analyzes existing content to infer purpose:

- ContentPurposeInferer class
- Analyzes blog posts, articles, text
- Detects signals: tutorials, explainers, transformation stories, etc.
- Returns purpose, confidence, reasoning

Updated intake.py to weight all signals:
1. Explicit flags (weight: 1.0)
2. Content inference (weight: 0.4) - NEW
3. Keyword classification (weight: 0.3)
4. Conversational (weight: 0.5)

Now if you point at a blog:
- Tutorial posts → LEARN_HANDS_ON
- Explainers → UNDERSTAND
- Transformation stories → TRANSFORM
- Reviews/Comparisons → DECIDE
- Reference docs → REFERENCE
- Journey/Biography → BE_INSPIRED
This commit is contained in:
2026-03-13 20:50:36 +00:00
parent b46e87ff76
commit 8cf833c729
3 changed files with 320 additions and 12 deletions
+102 -12
View File
@@ -6,7 +6,8 @@ by asking clarifying questions or using available signals.
This agent intelligently combines:
1. Explicit user flags (--purpose learn)
2. Keyword classification from concept
3. Conversational intake (asking questions)
3. Content inference from existing blog/posts
4. Conversational intake (asking questions)
The agent weights all inputs to make the best decision.
"""
@@ -16,6 +17,7 @@ from enum import Enum
from typing import Optional
from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose
from opus_orchestrator.nonfiction.content_infer import ContentPurposeInferer, ContentAnalysis
from opus_orchestrator.nonfiction_taxonomy import (
select_framework,
get_frameworks_for_purpose,
@@ -43,7 +45,12 @@ class IntakeInput:
target_audience: str = ""
intended_outcome: str = ""
# Option 3: Previous Q&A (if conversational)
# Option 3: Existing content (for inference)
content: str = ""
content_title: str = ""
blog_posts: list = field(default_factory=list)
# Option 4: Previous Q&A (if conversational)
answers: dict[str, str] = field(default_factory=dict)
@@ -55,7 +62,9 @@ class IntakeResult:
category: Optional[NonfictionCategory]
framework: dict
reasoning: str
source: str # "explicit" | "classifier" | "intake" | "hybrid"
source: str # "explicit" | "classifier" | "content" | "hybrid"
content_analysis: Optional[ContentAnalysis] = None
all_signals: dict = field(default_factory=dict)
class IntakeAgent:
@@ -102,13 +111,23 @@ class IntakeAgent:
],
}
# Content inference
CONTENT_INFERENCE_WEIGHT = 0.4 # Weight for content-based inference
def __init__(self, llm_client=None):
self.classifier = PurposeClassifier(llm_client)
self.content_inferer = ContentPurposeInferer()
self.llm_client = llm_client
async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult:
"""Process intake and determine purpose and framework.
All signals are weighted:
1. Explicit flags (weight: 1.0) - highest priority
2. Content inference (weight: 0.4) - from existing blog/posts
3. Keyword classification (weight: 0.3) - from concept
4. Conversational (weight: 0.5) - from Q&A
Args:
intake: All available input signals
mode: How to resolve (conversational, auto, explicit)
@@ -116,31 +135,98 @@ class IntakeAgent:
Returns:
IntakeResult with purpose, framework, and reasoning
"""
signals = {} # Track all signals for reasoning
# Step 1: Check explicit flags (highest priority)
if intake.explicit_purpose:
return self._process_explicit(intake)
# Step 2: Use classifier for clear cases
if mode == IntakeMode.EXPLICIT:
return self._need_more_info(intake)
# Step 3: Auto-classify from concept
# Step 2: Content inference (if content provided)
content_result = None
if intake.content or intake.blog_posts:
if intake.blog_posts:
content_result = self.content_inferer.infer_from_blog(intake.blog_posts)
elif intake.content:
content_result = self.content_inferer.analyze(
intake.content,
title=intake.content_title
)
signals["content"] = content_result
# Step 3: Keyword classification from concept
classifier_result = self.classifier._keyword_classify(
concept=intake.concept,
target_audience=intake.target_audience,
intended_outcome=intake.intended_outcome,
)
signals["concept"] = classifier_result
# If high confidence, use it
if classifier_result.confidence >= 0.7:
return self._build_result_from_classification(intake, classifier_result, "classifier")
# Step 4: WEIGHTED DECISION - combine signals
purpose_scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
# Step 4: If conversational and low confidence, ask questions
if mode == IntakeMode.CONVERSATIONAL and classifier_result.confidence < 0.5:
# Add content inference (if available)
if content_result and content_result.confidence > 0.3:
purpose_scores[content_result.purpose] += (
content_result.confidence * self.CONTENT_INFERENCE_WEIGHT
)
# Add classifier result
purpose_scores[classifier_result.purpose] += (
classifier_result.confidence * 0.3
)
# Find winning purpose
best_purpose = max(purpose_scores, key=purpose_scores.get)
best_score = purpose_scores[best_purpose]
# Calculate final confidence
confidence = min(0.95, best_score)
# If confidence is low and in conversational mode, ask questions
if confidence < 0.4 and mode == IntakeMode.CONVERSATIONAL:
return self._need_more_info(intake)
# Step 5: Fall back to classification even with medium confidence
return self._build_result_from_classification(intake, classifier_result, "classifier")
# Determine source
if content_result and content_result.confidence > 0.5:
source = "content"
elif content_result and classifier_result.confidence > 0.3:
source = "hybrid"
else:
source = "classifier"
# Get category from input
category = None
if intake.explicit_category:
try:
category = NonfictionCategory(intake.explicit_category.lower())
except ValueError:
pass
# Select framework
framework = select_framework(
purpose=best_purpose,
category=category,
)
# Build reasoning
reasons = []
if content_result:
reasons.append(f"content: {content_result.reasoning}")
reasons.append(f"concept: {classifier_result.reasoning}")
return IntakeResult(
purpose=best_purpose,
confidence=confidence,
category=category,
framework=framework,
reasoning=" | ".join(reasons),
source=source,
content_analysis=content_result,
all_signals=signals,
)
def _process_explicit(self, intake: IntakeInput) -> IntakeResult:
"""Process when user provided explicit purpose."""
@@ -171,6 +257,8 @@ class IntakeAgent:
framework=framework,
reasoning=f"Explicit user selection: {intake.explicit_purpose}",
source="explicit",
content_analysis=None,
all_signals={"explicit": intake.explicit_purpose},
)
def _process_auto(self, intake: IntakeInput) -> IntakeResult:
@@ -224,6 +312,8 @@ class IntakeAgent:
framework=select_framework(purpose=ReaderPurpose.UNDERSTAND),
reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.",
source="intake",
content_analysis=None,
all_signals={},
)
def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]: