feat: Content-Based Purpose Inference
Added content_infer.py - analyzes existing content to infer purpose: - ContentPurposeInferer class - Analyzes blog posts, articles, text - Detects signals: tutorials, explainers, transformation stories, etc. - Returns purpose, confidence, reasoning Updated intake.py to weight all signals: 1. Explicit flags (weight: 1.0) 2. Content inference (weight: 0.4) - NEW 3. Keyword classification (weight: 0.3) 4. Conversational (weight: 0.5) Now if you point at a blog: - Tutorial posts → LEARN_HANDS_ON - Explainers → UNDERSTAND - Transformation stories → TRANSFORM - Reviews/Comparisons → DECIDE - Reference docs → REFERENCE - Journey/Biography → BE_INSPIRED
This commit is contained in:
@@ -0,0 +1,208 @@
|
||||
"""Content-Based Purpose Inference.
|
||||
|
||||
Analyzes existing content to infer the reader purpose.
|
||||
This allows the system to determine purpose from blog posts, articles, etc.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from opus_orchestrator.nonfiction.classifier import ReaderPurpose
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContentAnalysis:
|
||||
"""Result of analyzing content for purpose."""
|
||||
purpose: ReaderPurpose
|
||||
confidence: float
|
||||
reasoning: str
|
||||
signals: dict
|
||||
|
||||
|
||||
class ContentPurposeInferer:
|
||||
"""Infers reader purpose from existing content.
|
||||
|
||||
Analyzes blog posts, articles, or other content to determine
|
||||
what kind of book this content would become.
|
||||
"""
|
||||
|
||||
# Content patterns that indicate purpose
|
||||
CONTENT_SIGNALS = {
|
||||
ReaderPurpose.LEARN_HANDS_ON: {
|
||||
"indicators": [
|
||||
"step by step", "how to", "tutorial", "guide to",
|
||||
"instructions", "learn to", "course", "workshop",
|
||||
"example code", "exercise", "practice", "build a",
|
||||
"create a", "implement", "getting started",
|
||||
],
|
||||
"structure": ["step", "chapter", "lesson", "module", "exercise"],
|
||||
},
|
||||
ReaderPurpose.UNDERSTAND: {
|
||||
"indicators": [
|
||||
"why", "explains", "understand", "concept of",
|
||||
"the nature of", "how it works", "mechanism",
|
||||
"deep dive", "analysis", "framework", "principles",
|
||||
"mental model", "theory", "psychology", "science",
|
||||
],
|
||||
"structure": ["overview", "background", "core concepts", "implications"],
|
||||
},
|
||||
ReaderPurpose.TRANSFORM: {
|
||||
"indicators": [
|
||||
"i was", "i became", "my journey", "transformation",
|
||||
"overcoming", "struggle", "breakthrough", "changed my life",
|
||||
"how i", "from", "to", "becoming", "awakening",
|
||||
"healing", "recovery", "manifest", "empower",
|
||||
],
|
||||
"structure": ["before", "after", "journey", "struggle", "triumph"],
|
||||
},
|
||||
ReaderPurpose.DECIDE: {
|
||||
"indicators": [
|
||||
"compared to", "versus", "pros and cons", "should you",
|
||||
"which is better", "is it worth", "decision", "choose",
|
||||
"analysis", "recommendation", "best", "top", "ranking",
|
||||
"tradeoff", "evaluation", "case study",
|
||||
],
|
||||
"structure": ["comparison", "versus", "pros", "cons", "verdict"],
|
||||
},
|
||||
ReaderPurpose.REFERENCE: {
|
||||
"indicators": [
|
||||
"reference", "documentation", "api", "specification",
|
||||
"manual", "handbook", "comprehensive", "complete guide",
|
||||
"all about", "definitive", "index", "table of contents",
|
||||
],
|
||||
"structure": ["reference", "api", "syntax", "parameters", "examples"],
|
||||
},
|
||||
ReaderPurpose.BE_INSPIRED: {
|
||||
"indicators": [
|
||||
"story", "journey", "triumph", "against all odds",
|
||||
"inspiration", "motivation", "life lesson", "wisdom",
|
||||
"legacy", "calling", "warrior", "hero", "unstoppable",
|
||||
],
|
||||
"structure": ["chapter one", "the beginning", "the end", "epilogue"],
|
||||
},
|
||||
}
|
||||
|
||||
# Negative signals (reduce confidence)
|
||||
NEGATION_PATTERNS = {
|
||||
ReaderPurpose.LEARN_HANDS_ON: ["theory", "why", "explain", "concept"],
|
||||
ReaderPurpose.TRANSFORM: ["reference", "documentation", "api"],
|
||||
}
|
||||
|
||||
def analyze(
|
||||
self,
|
||||
content: str,
|
||||
title: str = "",
|
||||
meta_description: str = "",
|
||||
) -> ContentAnalysis:
|
||||
"""Analyze content to infer purpose.
|
||||
|
||||
Args:
|
||||
content: The text content to analyze
|
||||
title: Title of the content
|
||||
meta_description: Meta description if available
|
||||
|
||||
Returns:
|
||||
ContentAnalysis with inferred purpose
|
||||
"""
|
||||
# Combine all text
|
||||
full_text = f"{title} {meta_description} {content}".lower()
|
||||
|
||||
# Score each purpose
|
||||
scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
|
||||
signal_counts: dict[ReaderPurpose, list[str]] = {p: [] for p in ReaderPurpose}
|
||||
|
||||
for purpose, patterns in self.CONTENT_SIGNALS.items():
|
||||
# Count indicator matches
|
||||
for indicator in patterns["indicators"]:
|
||||
if indicator.lower() in full_text:
|
||||
scores[purpose] += 1.0
|
||||
signal_counts[purpose].append(indicator)
|
||||
|
||||
# Check structure patterns
|
||||
for structure in patterns.get("structure", []):
|
||||
if structure.lower() in full_text:
|
||||
scores[purpose] += 0.5
|
||||
|
||||
# Apply negations (reduce scores)
|
||||
for purpose, negations in self.NEGATION_PATTERNS.items():
|
||||
for negation in negations:
|
||||
if negation.lower() in full_text:
|
||||
scores[purpose] = max(0, scores[purpose] - 0.5)
|
||||
|
||||
# Normalize scores
|
||||
total_score = sum(scores.values())
|
||||
if total_score > 0:
|
||||
normalized = {p: s / total_score for p, s in scores.items()}
|
||||
else:
|
||||
normalized = {p: 0.1 for p in ReaderPurpose} # Uniform if no matches
|
||||
|
||||
# Find best match
|
||||
best_purpose = max(normalized, key=normalized.get)
|
||||
best_score = normalized[best_purpose]
|
||||
|
||||
# Calculate confidence
|
||||
if best_score > 0.5:
|
||||
confidence = min(0.95, 0.5 + best_score * 0.5)
|
||||
elif best_score > 0.2:
|
||||
confidence = min(0.7, 0.3 + best_score * 0.4)
|
||||
else:
|
||||
confidence = 0.3
|
||||
|
||||
# Build reasoning
|
||||
signals = signal_counts[best_purpose]
|
||||
if signals:
|
||||
reasoning = f"Content signals: {', '.join(signals[:5])}"
|
||||
else:
|
||||
reasoning = "No strong signals - purpose unclear"
|
||||
|
||||
return ContentAnalysis(
|
||||
purpose=best_purpose,
|
||||
confidence=confidence,
|
||||
reasoning=reasoning,
|
||||
signals={p.value: c for p, c in signal_counts.items() if c},
|
||||
)
|
||||
|
||||
def infer_from_blog(self, blog_posts: list[dict]) -> ContentAnalysis:
|
||||
"""Infer purpose from multiple blog posts.
|
||||
|
||||
Args:
|
||||
blog_posts: List of dicts with 'title', 'content', 'excerpt'
|
||||
|
||||
Returns:
|
||||
Aggregated ContentAnalysis
|
||||
"""
|
||||
all_text = ""
|
||||
titles = []
|
||||
|
||||
for post in blog_posts:
|
||||
all_text += post.get("content", "") + " "
|
||||
all_text += post.get("excerpt", "") + " "
|
||||
titles.append(post.get("title", ""))
|
||||
|
||||
result = self.analyze(all_text, title="; ".join(titles))
|
||||
|
||||
# If multiple posts, boost confidence slightly
|
||||
if len(blog_posts) > 3:
|
||||
result.confidence = min(0.95, result.confidence + 0.1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Convenience function
|
||||
def infer_purpose_from_content(
|
||||
content: str,
|
||||
title: str = "",
|
||||
meta_description: str = "",
|
||||
) -> ContentAnalysis:
|
||||
"""Convenience function to infer purpose from content.
|
||||
|
||||
Args:
|
||||
content: The text content
|
||||
title: Title of the content
|
||||
meta_description: Optional meta description
|
||||
|
||||
Returns:
|
||||
ContentAnalysis with inferred purpose
|
||||
"""
|
||||
inferer = ContentPurposeInferer()
|
||||
return inferer.analyze(content, title, meta_description)
|
||||
Reference in New Issue
Block a user