Files
opus-orchestrator-ai/opus_orchestrator/nonfiction/content_infer.py
T
mrhavens 8cf833c729 feat: Content-Based Purpose Inference
Added content_infer.py - analyzes existing content to infer purpose:

- ContentPurposeInferer class
- Analyzes blog posts, articles, text
- Detects signals: tutorials, explainers, transformation stories, etc.
- Returns purpose, confidence, reasoning

Updated intake.py to weight all signals:
1. Explicit flags (weight: 1.0)
2. Content inference (weight: 0.4) - NEW
3. Keyword classification (weight: 0.3)
4. Conversational (weight: 0.5)

Now if you point at a blog:
- Tutorial posts → LEARN_HANDS_ON
- Explainers → UNDERSTAND
- Transformation stories → TRANSFORM
- Reviews/Comparisons → DECIDE
- Reference docs → REFERENCE
- Journey/Biography → BE_INSPIRED
2026-03-13 20:50:36 +00:00

209 lines
7.4 KiB
Python

"""Content-Based Purpose Inference.
Analyzes existing content to infer the reader purpose.
This allows the system to determine purpose from blog posts, articles, etc.
"""
from dataclasses import dataclass
from typing import Optional
from opus_orchestrator.nonfiction.classifier import ReaderPurpose
@dataclass
class ContentAnalysis:
"""Result of analyzing content for purpose."""
purpose: ReaderPurpose
confidence: float
reasoning: str
signals: dict
class ContentPurposeInferer:
"""Infers reader purpose from existing content.
Analyzes blog posts, articles, or other content to determine
what kind of book this content would become.
"""
# Content patterns that indicate purpose
CONTENT_SIGNALS = {
ReaderPurpose.LEARN_HANDS_ON: {
"indicators": [
"step by step", "how to", "tutorial", "guide to",
"instructions", "learn to", "course", "workshop",
"example code", "exercise", "practice", "build a",
"create a", "implement", "getting started",
],
"structure": ["step", "chapter", "lesson", "module", "exercise"],
},
ReaderPurpose.UNDERSTAND: {
"indicators": [
"why", "explains", "understand", "concept of",
"the nature of", "how it works", "mechanism",
"deep dive", "analysis", "framework", "principles",
"mental model", "theory", "psychology", "science",
],
"structure": ["overview", "background", "core concepts", "implications"],
},
ReaderPurpose.TRANSFORM: {
"indicators": [
"i was", "i became", "my journey", "transformation",
"overcoming", "struggle", "breakthrough", "changed my life",
"how i", "from", "to", "becoming", "awakening",
"healing", "recovery", "manifest", "empower",
],
"structure": ["before", "after", "journey", "struggle", "triumph"],
},
ReaderPurpose.DECIDE: {
"indicators": [
"compared to", "versus", "pros and cons", "should you",
"which is better", "is it worth", "decision", "choose",
"analysis", "recommendation", "best", "top", "ranking",
"tradeoff", "evaluation", "case study",
],
"structure": ["comparison", "versus", "pros", "cons", "verdict"],
},
ReaderPurpose.REFERENCE: {
"indicators": [
"reference", "documentation", "api", "specification",
"manual", "handbook", "comprehensive", "complete guide",
"all about", "definitive", "index", "table of contents",
],
"structure": ["reference", "api", "syntax", "parameters", "examples"],
},
ReaderPurpose.BE_INSPIRED: {
"indicators": [
"story", "journey", "triumph", "against all odds",
"inspiration", "motivation", "life lesson", "wisdom",
"legacy", "calling", "warrior", "hero", "unstoppable",
],
"structure": ["chapter one", "the beginning", "the end", "epilogue"],
},
}
# Negative signals (reduce confidence)
NEGATION_PATTERNS = {
ReaderPurpose.LEARN_HANDS_ON: ["theory", "why", "explain", "concept"],
ReaderPurpose.TRANSFORM: ["reference", "documentation", "api"],
}
def analyze(
self,
content: str,
title: str = "",
meta_description: str = "",
) -> ContentAnalysis:
"""Analyze content to infer purpose.
Args:
content: The text content to analyze
title: Title of the content
meta_description: Meta description if available
Returns:
ContentAnalysis with inferred purpose
"""
# Combine all text
full_text = f"{title} {meta_description} {content}".lower()
# Score each purpose
scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
signal_counts: dict[ReaderPurpose, list[str]] = {p: [] for p in ReaderPurpose}
for purpose, patterns in self.CONTENT_SIGNALS.items():
# Count indicator matches
for indicator in patterns["indicators"]:
if indicator.lower() in full_text:
scores[purpose] += 1.0
signal_counts[purpose].append(indicator)
# Check structure patterns
for structure in patterns.get("structure", []):
if structure.lower() in full_text:
scores[purpose] += 0.5
# Apply negations (reduce scores)
for purpose, negations in self.NEGATION_PATTERNS.items():
for negation in negations:
if negation.lower() in full_text:
scores[purpose] = max(0, scores[purpose] - 0.5)
# Normalize scores
total_score = sum(scores.values())
if total_score > 0:
normalized = {p: s / total_score for p, s in scores.items()}
else:
normalized = {p: 0.1 for p in ReaderPurpose} # Uniform if no matches
# Find best match
best_purpose = max(normalized, key=normalized.get)
best_score = normalized[best_purpose]
# Calculate confidence
if best_score > 0.5:
confidence = min(0.95, 0.5 + best_score * 0.5)
elif best_score > 0.2:
confidence = min(0.7, 0.3 + best_score * 0.4)
else:
confidence = 0.3
# Build reasoning
signals = signal_counts[best_purpose]
if signals:
reasoning = f"Content signals: {', '.join(signals[:5])}"
else:
reasoning = "No strong signals - purpose unclear"
return ContentAnalysis(
purpose=best_purpose,
confidence=confidence,
reasoning=reasoning,
signals={p.value: c for p, c in signal_counts.items() if c},
)
def infer_from_blog(self, blog_posts: list[dict]) -> ContentAnalysis:
"""Infer purpose from multiple blog posts.
Args:
blog_posts: List of dicts with 'title', 'content', 'excerpt'
Returns:
Aggregated ContentAnalysis
"""
all_text = ""
titles = []
for post in blog_posts:
all_text += post.get("content", "") + " "
all_text += post.get("excerpt", "") + " "
titles.append(post.get("title", ""))
result = self.analyze(all_text, title="; ".join(titles))
# If multiple posts, boost confidence slightly
if len(blog_posts) > 3:
result.confidence = min(0.95, result.confidence + 0.1)
return result
# Convenience function
def infer_purpose_from_content(
content: str,
title: str = "",
meta_description: str = "",
) -> ContentAnalysis:
"""Convenience function to infer purpose from content.
Args:
content: The text content
title: Title of the content
meta_description: Optional meta description
Returns:
ContentAnalysis with inferred purpose
"""
inferer = ContentPurposeInferer()
return inferer.analyze(content, title, meta_description)