feat: Content-Based Purpose Inference

Added content_infer.py - analyzes existing content to infer purpose:

- ContentPurposeInferer class
- Analyzes blog posts, articles, text
- Detects signals: tutorials, explainers, transformation stories, etc.
- Returns purpose, confidence, reasoning

Updated intake.py to weight all signals:
1. Explicit flags (weight: 1.0)
2. Content inference (weight: 0.4) - NEW
3. Keyword classification (weight: 0.3)
4. Conversational (weight: 0.5)

Now if you point at a blog:
- Tutorial posts → LEARN_HANDS_ON
- Explainers → UNDERSTAND
- Transformation stories → TRANSFORM
- Reviews/Comparisons → DECIDE
- Reference docs → REFERENCE
- Journey/Biography → BE_INSPIRED
This commit is contained in:
2026-03-13 20:50:36 +00:00
parent b46e87ff76
commit 8cf833c729
3 changed files with 320 additions and 12 deletions
+10
View File
@@ -3,6 +3,7 @@
Key components:
- classifier: Classifies user input into ReaderPurpose
- intake: Conversational intake agent for high-fidelity intent
- content_infer: Infers purpose from existing blog/content
"""
from opus_orchestrator.nonfiction.classifier import (
@@ -18,6 +19,11 @@ from opus_orchestrator.nonfiction.intake import (
IntakeMode,
determine_intake,
)
from opus_orchestrator.nonfiction.content_infer import (
ContentPurposeInferer,
ContentAnalysis,
infer_purpose_from_content,
)
__all__ = [
# Classifier
@@ -31,4 +37,8 @@ __all__ = [
"IntakeResult",
"IntakeMode",
"determine_intake",
# Content Inference
"ContentPurposeInferer",
"ContentAnalysis",
"infer_purpose_from_content",
]
@@ -0,0 +1,208 @@
"""Content-Based Purpose Inference.
Analyzes existing content to infer the reader purpose.
This allows the system to determine purpose from blog posts, articles, etc.
"""
from dataclasses import dataclass
from typing import Optional
from opus_orchestrator.nonfiction.classifier import ReaderPurpose
@dataclass
class ContentAnalysis:
"""Result of analyzing content for purpose."""
purpose: ReaderPurpose
confidence: float
reasoning: str
signals: dict
class ContentPurposeInferer:
"""Infers reader purpose from existing content.
Analyzes blog posts, articles, or other content to determine
what kind of book this content would become.
"""
# Content patterns that indicate purpose
CONTENT_SIGNALS = {
ReaderPurpose.LEARN_HANDS_ON: {
"indicators": [
"step by step", "how to", "tutorial", "guide to",
"instructions", "learn to", "course", "workshop",
"example code", "exercise", "practice", "build a",
"create a", "implement", "getting started",
],
"structure": ["step", "chapter", "lesson", "module", "exercise"],
},
ReaderPurpose.UNDERSTAND: {
"indicators": [
"why", "explains", "understand", "concept of",
"the nature of", "how it works", "mechanism",
"deep dive", "analysis", "framework", "principles",
"mental model", "theory", "psychology", "science",
],
"structure": ["overview", "background", "core concepts", "implications"],
},
ReaderPurpose.TRANSFORM: {
"indicators": [
"i was", "i became", "my journey", "transformation",
"overcoming", "struggle", "breakthrough", "changed my life",
"how i", "from", "to", "becoming", "awakening",
"healing", "recovery", "manifest", "empower",
],
"structure": ["before", "after", "journey", "struggle", "triumph"],
},
ReaderPurpose.DECIDE: {
"indicators": [
"compared to", "versus", "pros and cons", "should you",
"which is better", "is it worth", "decision", "choose",
"analysis", "recommendation", "best", "top", "ranking",
"tradeoff", "evaluation", "case study",
],
"structure": ["comparison", "versus", "pros", "cons", "verdict"],
},
ReaderPurpose.REFERENCE: {
"indicators": [
"reference", "documentation", "api", "specification",
"manual", "handbook", "comprehensive", "complete guide",
"all about", "definitive", "index", "table of contents",
],
"structure": ["reference", "api", "syntax", "parameters", "examples"],
},
ReaderPurpose.BE_INSPIRED: {
"indicators": [
"story", "journey", "triumph", "against all odds",
"inspiration", "motivation", "life lesson", "wisdom",
"legacy", "calling", "warrior", "hero", "unstoppable",
],
"structure": ["chapter one", "the beginning", "the end", "epilogue"],
},
}
# Negative signals (reduce confidence)
NEGATION_PATTERNS = {
ReaderPurpose.LEARN_HANDS_ON: ["theory", "why", "explain", "concept"],
ReaderPurpose.TRANSFORM: ["reference", "documentation", "api"],
}
def analyze(
self,
content: str,
title: str = "",
meta_description: str = "",
) -> ContentAnalysis:
"""Analyze content to infer purpose.
Args:
content: The text content to analyze
title: Title of the content
meta_description: Meta description if available
Returns:
ContentAnalysis with inferred purpose
"""
# Combine all text
full_text = f"{title} {meta_description} {content}".lower()
# Score each purpose
scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
signal_counts: dict[ReaderPurpose, list[str]] = {p: [] for p in ReaderPurpose}
for purpose, patterns in self.CONTENT_SIGNALS.items():
# Count indicator matches
for indicator in patterns["indicators"]:
if indicator.lower() in full_text:
scores[purpose] += 1.0
signal_counts[purpose].append(indicator)
# Check structure patterns
for structure in patterns.get("structure", []):
if structure.lower() in full_text:
scores[purpose] += 0.5
# Apply negations (reduce scores)
for purpose, negations in self.NEGATION_PATTERNS.items():
for negation in negations:
if negation.lower() in full_text:
scores[purpose] = max(0, scores[purpose] - 0.5)
# Normalize scores
total_score = sum(scores.values())
if total_score > 0:
normalized = {p: s / total_score for p, s in scores.items()}
else:
normalized = {p: 0.1 for p in ReaderPurpose} # Uniform if no matches
# Find best match
best_purpose = max(normalized, key=normalized.get)
best_score = normalized[best_purpose]
# Calculate confidence
if best_score > 0.5:
confidence = min(0.95, 0.5 + best_score * 0.5)
elif best_score > 0.2:
confidence = min(0.7, 0.3 + best_score * 0.4)
else:
confidence = 0.3
# Build reasoning
signals = signal_counts[best_purpose]
if signals:
reasoning = f"Content signals: {', '.join(signals[:5])}"
else:
reasoning = "No strong signals - purpose unclear"
return ContentAnalysis(
purpose=best_purpose,
confidence=confidence,
reasoning=reasoning,
signals={p.value: c for p, c in signal_counts.items() if c},
)
def infer_from_blog(self, blog_posts: list[dict]) -> ContentAnalysis:
"""Infer purpose from multiple blog posts.
Args:
blog_posts: List of dicts with 'title', 'content', 'excerpt'
Returns:
Aggregated ContentAnalysis
"""
all_text = ""
titles = []
for post in blog_posts:
all_text += post.get("content", "") + " "
all_text += post.get("excerpt", "") + " "
titles.append(post.get("title", ""))
result = self.analyze(all_text, title="; ".join(titles))
# If multiple posts, boost confidence slightly
if len(blog_posts) > 3:
result.confidence = min(0.95, result.confidence + 0.1)
return result
# Convenience function
def infer_purpose_from_content(
content: str,
title: str = "",
meta_description: str = "",
) -> ContentAnalysis:
"""Convenience function to infer purpose from content.
Args:
content: The text content
title: Title of the content
meta_description: Optional meta description
Returns:
ContentAnalysis with inferred purpose
"""
inferer = ContentPurposeInferer()
return inferer.analyze(content, title, meta_description)
+102 -12
View File
@@ -6,7 +6,8 @@ by asking clarifying questions or using available signals.
This agent intelligently combines:
1. Explicit user flags (--purpose learn)
2. Keyword classification from concept
3. Conversational intake (asking questions)
3. Content inference from existing blog/posts
4. Conversational intake (asking questions)
The agent weights all inputs to make the best decision.
"""
@@ -16,6 +17,7 @@ from enum import Enum
from typing import Optional
from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose
from opus_orchestrator.nonfiction.content_infer import ContentPurposeInferer, ContentAnalysis
from opus_orchestrator.nonfiction_taxonomy import (
select_framework,
get_frameworks_for_purpose,
@@ -43,7 +45,12 @@ class IntakeInput:
target_audience: str = ""
intended_outcome: str = ""
# Option 3: Previous Q&A (if conversational)
# Option 3: Existing content (for inference)
content: str = ""
content_title: str = ""
blog_posts: list = field(default_factory=list)
# Option 4: Previous Q&A (if conversational)
answers: dict[str, str] = field(default_factory=dict)
@@ -55,7 +62,9 @@ class IntakeResult:
category: Optional[NonfictionCategory]
framework: dict
reasoning: str
source: str # "explicit" | "classifier" | "intake" | "hybrid"
source: str # "explicit" | "classifier" | "content" | "hybrid"
content_analysis: Optional[ContentAnalysis] = None
all_signals: dict = field(default_factory=dict)
class IntakeAgent:
@@ -102,13 +111,23 @@ class IntakeAgent:
],
}
# Content inference
CONTENT_INFERENCE_WEIGHT = 0.4 # Weight for content-based inference
def __init__(self, llm_client=None):
self.classifier = PurposeClassifier(llm_client)
self.content_inferer = ContentPurposeInferer()
self.llm_client = llm_client
async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult:
"""Process intake and determine purpose and framework.
All signals are weighted:
1. Explicit flags (weight: 1.0) - highest priority
2. Content inference (weight: 0.4) - from existing blog/posts
3. Keyword classification (weight: 0.3) - from concept
4. Conversational (weight: 0.5) - from Q&A
Args:
intake: All available input signals
mode: How to resolve (conversational, auto, explicit)
@@ -116,31 +135,98 @@ class IntakeAgent:
Returns:
IntakeResult with purpose, framework, and reasoning
"""
signals = {} # Track all signals for reasoning
# Step 1: Check explicit flags (highest priority)
if intake.explicit_purpose:
return self._process_explicit(intake)
# Step 2: Use classifier for clear cases
if mode == IntakeMode.EXPLICIT:
return self._need_more_info(intake)
# Step 3: Auto-classify from concept
# Step 2: Content inference (if content provided)
content_result = None
if intake.content or intake.blog_posts:
if intake.blog_posts:
content_result = self.content_inferer.infer_from_blog(intake.blog_posts)
elif intake.content:
content_result = self.content_inferer.analyze(
intake.content,
title=intake.content_title
)
signals["content"] = content_result
# Step 3: Keyword classification from concept
classifier_result = self.classifier._keyword_classify(
concept=intake.concept,
target_audience=intake.target_audience,
intended_outcome=intake.intended_outcome,
)
signals["concept"] = classifier_result
# If high confidence, use it
if classifier_result.confidence >= 0.7:
return self._build_result_from_classification(intake, classifier_result, "classifier")
# Step 4: WEIGHTED DECISION - combine signals
purpose_scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
# Step 4: If conversational and low confidence, ask questions
if mode == IntakeMode.CONVERSATIONAL and classifier_result.confidence < 0.5:
# Add content inference (if available)
if content_result and content_result.confidence > 0.3:
purpose_scores[content_result.purpose] += (
content_result.confidence * self.CONTENT_INFERENCE_WEIGHT
)
# Add classifier result
purpose_scores[classifier_result.purpose] += (
classifier_result.confidence * 0.3
)
# Find winning purpose
best_purpose = max(purpose_scores, key=purpose_scores.get)
best_score = purpose_scores[best_purpose]
# Calculate final confidence
confidence = min(0.95, best_score)
# If confidence is low and in conversational mode, ask questions
if confidence < 0.4 and mode == IntakeMode.CONVERSATIONAL:
return self._need_more_info(intake)
# Step 5: Fall back to classification even with medium confidence
return self._build_result_from_classification(intake, classifier_result, "classifier")
# Determine source
if content_result and content_result.confidence > 0.5:
source = "content"
elif content_result and classifier_result.confidence > 0.3:
source = "hybrid"
else:
source = "classifier"
# Get category from input
category = None
if intake.explicit_category:
try:
category = NonfictionCategory(intake.explicit_category.lower())
except ValueError:
pass
# Select framework
framework = select_framework(
purpose=best_purpose,
category=category,
)
# Build reasoning
reasons = []
if content_result:
reasons.append(f"content: {content_result.reasoning}")
reasons.append(f"concept: {classifier_result.reasoning}")
return IntakeResult(
purpose=best_purpose,
confidence=confidence,
category=category,
framework=framework,
reasoning=" | ".join(reasons),
source=source,
content_analysis=content_result,
all_signals=signals,
)
def _process_explicit(self, intake: IntakeInput) -> IntakeResult:
"""Process when user provided explicit purpose."""
@@ -171,6 +257,8 @@ class IntakeAgent:
framework=framework,
reasoning=f"Explicit user selection: {intake.explicit_purpose}",
source="explicit",
content_analysis=None,
all_signals={"explicit": intake.explicit_purpose},
)
def _process_auto(self, intake: IntakeInput) -> IntakeResult:
@@ -224,6 +312,8 @@ class IntakeAgent:
framework=select_framework(purpose=ReaderPurpose.UNDERSTAND),
reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.",
source="intake",
content_analysis=None,
all_signals={},
)
def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]: