feat: Content-Based Purpose Inference
Added content_infer.py - analyzes existing content to infer purpose: - ContentPurposeInferer class - Analyzes blog posts, articles, text - Detects signals: tutorials, explainers, transformation stories, etc. - Returns purpose, confidence, reasoning Updated intake.py to weight all signals: 1. Explicit flags (weight: 1.0) 2. Content inference (weight: 0.4) - NEW 3. Keyword classification (weight: 0.3) 4. Conversational (weight: 0.5) Now if you point at a blog: - Tutorial posts → LEARN_HANDS_ON - Explainers → UNDERSTAND - Transformation stories → TRANSFORM - Reviews/Comparisons → DECIDE - Reference docs → REFERENCE - Journey/Biography → BE_INSPIRED
This commit is contained in:
@@ -3,6 +3,7 @@
|
|||||||
Key components:
|
Key components:
|
||||||
- classifier: Classifies user input into ReaderPurpose
|
- classifier: Classifies user input into ReaderPurpose
|
||||||
- intake: Conversational intake agent for high-fidelity intent
|
- intake: Conversational intake agent for high-fidelity intent
|
||||||
|
- content_infer: Infers purpose from existing blog/content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from opus_orchestrator.nonfiction.classifier import (
|
from opus_orchestrator.nonfiction.classifier import (
|
||||||
@@ -18,6 +19,11 @@ from opus_orchestrator.nonfiction.intake import (
|
|||||||
IntakeMode,
|
IntakeMode,
|
||||||
determine_intake,
|
determine_intake,
|
||||||
)
|
)
|
||||||
|
from opus_orchestrator.nonfiction.content_infer import (
|
||||||
|
ContentPurposeInferer,
|
||||||
|
ContentAnalysis,
|
||||||
|
infer_purpose_from_content,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
# Classifier
|
# Classifier
|
||||||
@@ -31,4 +37,8 @@ __all__ = [
|
|||||||
"IntakeResult",
|
"IntakeResult",
|
||||||
"IntakeMode",
|
"IntakeMode",
|
||||||
"determine_intake",
|
"determine_intake",
|
||||||
|
# Content Inference
|
||||||
|
"ContentPurposeInferer",
|
||||||
|
"ContentAnalysis",
|
||||||
|
"infer_purpose_from_content",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -0,0 +1,208 @@
|
|||||||
|
"""Content-Based Purpose Inference.
|
||||||
|
|
||||||
|
Analyzes existing content to infer the reader purpose.
|
||||||
|
This allows the system to determine purpose from blog posts, articles, etc.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from opus_orchestrator.nonfiction.classifier import ReaderPurpose
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContentAnalysis:
|
||||||
|
"""Result of analyzing content for purpose."""
|
||||||
|
purpose: ReaderPurpose
|
||||||
|
confidence: float
|
||||||
|
reasoning: str
|
||||||
|
signals: dict
|
||||||
|
|
||||||
|
|
||||||
|
class ContentPurposeInferer:
|
||||||
|
"""Infers reader purpose from existing content.
|
||||||
|
|
||||||
|
Analyzes blog posts, articles, or other content to determine
|
||||||
|
what kind of book this content would become.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Content patterns that indicate purpose
|
||||||
|
CONTENT_SIGNALS = {
|
||||||
|
ReaderPurpose.LEARN_HANDS_ON: {
|
||||||
|
"indicators": [
|
||||||
|
"step by step", "how to", "tutorial", "guide to",
|
||||||
|
"instructions", "learn to", "course", "workshop",
|
||||||
|
"example code", "exercise", "practice", "build a",
|
||||||
|
"create a", "implement", "getting started",
|
||||||
|
],
|
||||||
|
"structure": ["step", "chapter", "lesson", "module", "exercise"],
|
||||||
|
},
|
||||||
|
ReaderPurpose.UNDERSTAND: {
|
||||||
|
"indicators": [
|
||||||
|
"why", "explains", "understand", "concept of",
|
||||||
|
"the nature of", "how it works", "mechanism",
|
||||||
|
"deep dive", "analysis", "framework", "principles",
|
||||||
|
"mental model", "theory", "psychology", "science",
|
||||||
|
],
|
||||||
|
"structure": ["overview", "background", "core concepts", "implications"],
|
||||||
|
},
|
||||||
|
ReaderPurpose.TRANSFORM: {
|
||||||
|
"indicators": [
|
||||||
|
"i was", "i became", "my journey", "transformation",
|
||||||
|
"overcoming", "struggle", "breakthrough", "changed my life",
|
||||||
|
"how i", "from", "to", "becoming", "awakening",
|
||||||
|
"healing", "recovery", "manifest", "empower",
|
||||||
|
],
|
||||||
|
"structure": ["before", "after", "journey", "struggle", "triumph"],
|
||||||
|
},
|
||||||
|
ReaderPurpose.DECIDE: {
|
||||||
|
"indicators": [
|
||||||
|
"compared to", "versus", "pros and cons", "should you",
|
||||||
|
"which is better", "is it worth", "decision", "choose",
|
||||||
|
"analysis", "recommendation", "best", "top", "ranking",
|
||||||
|
"tradeoff", "evaluation", "case study",
|
||||||
|
],
|
||||||
|
"structure": ["comparison", "versus", "pros", "cons", "verdict"],
|
||||||
|
},
|
||||||
|
ReaderPurpose.REFERENCE: {
|
||||||
|
"indicators": [
|
||||||
|
"reference", "documentation", "api", "specification",
|
||||||
|
"manual", "handbook", "comprehensive", "complete guide",
|
||||||
|
"all about", "definitive", "index", "table of contents",
|
||||||
|
],
|
||||||
|
"structure": ["reference", "api", "syntax", "parameters", "examples"],
|
||||||
|
},
|
||||||
|
ReaderPurpose.BE_INSPIRED: {
|
||||||
|
"indicators": [
|
||||||
|
"story", "journey", "triumph", "against all odds",
|
||||||
|
"inspiration", "motivation", "life lesson", "wisdom",
|
||||||
|
"legacy", "calling", "warrior", "hero", "unstoppable",
|
||||||
|
],
|
||||||
|
"structure": ["chapter one", "the beginning", "the end", "epilogue"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Negative signals (reduce confidence)
|
||||||
|
NEGATION_PATTERNS = {
|
||||||
|
ReaderPurpose.LEARN_HANDS_ON: ["theory", "why", "explain", "concept"],
|
||||||
|
ReaderPurpose.TRANSFORM: ["reference", "documentation", "api"],
|
||||||
|
}
|
||||||
|
|
||||||
|
def analyze(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
title: str = "",
|
||||||
|
meta_description: str = "",
|
||||||
|
) -> ContentAnalysis:
|
||||||
|
"""Analyze content to infer purpose.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The text content to analyze
|
||||||
|
title: Title of the content
|
||||||
|
meta_description: Meta description if available
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ContentAnalysis with inferred purpose
|
||||||
|
"""
|
||||||
|
# Combine all text
|
||||||
|
full_text = f"{title} {meta_description} {content}".lower()
|
||||||
|
|
||||||
|
# Score each purpose
|
||||||
|
scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
|
||||||
|
signal_counts: dict[ReaderPurpose, list[str]] = {p: [] for p in ReaderPurpose}
|
||||||
|
|
||||||
|
for purpose, patterns in self.CONTENT_SIGNALS.items():
|
||||||
|
# Count indicator matches
|
||||||
|
for indicator in patterns["indicators"]:
|
||||||
|
if indicator.lower() in full_text:
|
||||||
|
scores[purpose] += 1.0
|
||||||
|
signal_counts[purpose].append(indicator)
|
||||||
|
|
||||||
|
# Check structure patterns
|
||||||
|
for structure in patterns.get("structure", []):
|
||||||
|
if structure.lower() in full_text:
|
||||||
|
scores[purpose] += 0.5
|
||||||
|
|
||||||
|
# Apply negations (reduce scores)
|
||||||
|
for purpose, negations in self.NEGATION_PATTERNS.items():
|
||||||
|
for negation in negations:
|
||||||
|
if negation.lower() in full_text:
|
||||||
|
scores[purpose] = max(0, scores[purpose] - 0.5)
|
||||||
|
|
||||||
|
# Normalize scores
|
||||||
|
total_score = sum(scores.values())
|
||||||
|
if total_score > 0:
|
||||||
|
normalized = {p: s / total_score for p, s in scores.items()}
|
||||||
|
else:
|
||||||
|
normalized = {p: 0.1 for p in ReaderPurpose} # Uniform if no matches
|
||||||
|
|
||||||
|
# Find best match
|
||||||
|
best_purpose = max(normalized, key=normalized.get)
|
||||||
|
best_score = normalized[best_purpose]
|
||||||
|
|
||||||
|
# Calculate confidence
|
||||||
|
if best_score > 0.5:
|
||||||
|
confidence = min(0.95, 0.5 + best_score * 0.5)
|
||||||
|
elif best_score > 0.2:
|
||||||
|
confidence = min(0.7, 0.3 + best_score * 0.4)
|
||||||
|
else:
|
||||||
|
confidence = 0.3
|
||||||
|
|
||||||
|
# Build reasoning
|
||||||
|
signals = signal_counts[best_purpose]
|
||||||
|
if signals:
|
||||||
|
reasoning = f"Content signals: {', '.join(signals[:5])}"
|
||||||
|
else:
|
||||||
|
reasoning = "No strong signals - purpose unclear"
|
||||||
|
|
||||||
|
return ContentAnalysis(
|
||||||
|
purpose=best_purpose,
|
||||||
|
confidence=confidence,
|
||||||
|
reasoning=reasoning,
|
||||||
|
signals={p.value: c for p, c in signal_counts.items() if c},
|
||||||
|
)
|
||||||
|
|
||||||
|
def infer_from_blog(self, blog_posts: list[dict]) -> ContentAnalysis:
|
||||||
|
"""Infer purpose from multiple blog posts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blog_posts: List of dicts with 'title', 'content', 'excerpt'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Aggregated ContentAnalysis
|
||||||
|
"""
|
||||||
|
all_text = ""
|
||||||
|
titles = []
|
||||||
|
|
||||||
|
for post in blog_posts:
|
||||||
|
all_text += post.get("content", "") + " "
|
||||||
|
all_text += post.get("excerpt", "") + " "
|
||||||
|
titles.append(post.get("title", ""))
|
||||||
|
|
||||||
|
result = self.analyze(all_text, title="; ".join(titles))
|
||||||
|
|
||||||
|
# If multiple posts, boost confidence slightly
|
||||||
|
if len(blog_posts) > 3:
|
||||||
|
result.confidence = min(0.95, result.confidence + 0.1)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience function
|
||||||
|
def infer_purpose_from_content(
|
||||||
|
content: str,
|
||||||
|
title: str = "",
|
||||||
|
meta_description: str = "",
|
||||||
|
) -> ContentAnalysis:
|
||||||
|
"""Convenience function to infer purpose from content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The text content
|
||||||
|
title: Title of the content
|
||||||
|
meta_description: Optional meta description
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ContentAnalysis with inferred purpose
|
||||||
|
"""
|
||||||
|
inferer = ContentPurposeInferer()
|
||||||
|
return inferer.analyze(content, title, meta_description)
|
||||||
@@ -6,7 +6,8 @@ by asking clarifying questions or using available signals.
|
|||||||
This agent intelligently combines:
|
This agent intelligently combines:
|
||||||
1. Explicit user flags (--purpose learn)
|
1. Explicit user flags (--purpose learn)
|
||||||
2. Keyword classification from concept
|
2. Keyword classification from concept
|
||||||
3. Conversational intake (asking questions)
|
3. Content inference from existing blog/posts
|
||||||
|
4. Conversational intake (asking questions)
|
||||||
|
|
||||||
The agent weights all inputs to make the best decision.
|
The agent weights all inputs to make the best decision.
|
||||||
"""
|
"""
|
||||||
@@ -16,6 +17,7 @@ from enum import Enum
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose
|
from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose
|
||||||
|
from opus_orchestrator.nonfiction.content_infer import ContentPurposeInferer, ContentAnalysis
|
||||||
from opus_orchestrator.nonfiction_taxonomy import (
|
from opus_orchestrator.nonfiction_taxonomy import (
|
||||||
select_framework,
|
select_framework,
|
||||||
get_frameworks_for_purpose,
|
get_frameworks_for_purpose,
|
||||||
@@ -43,7 +45,12 @@ class IntakeInput:
|
|||||||
target_audience: str = ""
|
target_audience: str = ""
|
||||||
intended_outcome: str = ""
|
intended_outcome: str = ""
|
||||||
|
|
||||||
# Option 3: Previous Q&A (if conversational)
|
# Option 3: Existing content (for inference)
|
||||||
|
content: str = ""
|
||||||
|
content_title: str = ""
|
||||||
|
blog_posts: list = field(default_factory=list)
|
||||||
|
|
||||||
|
# Option 4: Previous Q&A (if conversational)
|
||||||
answers: dict[str, str] = field(default_factory=dict)
|
answers: dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
@@ -55,7 +62,9 @@ class IntakeResult:
|
|||||||
category: Optional[NonfictionCategory]
|
category: Optional[NonfictionCategory]
|
||||||
framework: dict
|
framework: dict
|
||||||
reasoning: str
|
reasoning: str
|
||||||
source: str # "explicit" | "classifier" | "intake" | "hybrid"
|
source: str # "explicit" | "classifier" | "content" | "hybrid"
|
||||||
|
content_analysis: Optional[ContentAnalysis] = None
|
||||||
|
all_signals: dict = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
class IntakeAgent:
|
class IntakeAgent:
|
||||||
@@ -102,13 +111,23 @@ class IntakeAgent:
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Content inference
|
||||||
|
CONTENT_INFERENCE_WEIGHT = 0.4 # Weight for content-based inference
|
||||||
|
|
||||||
def __init__(self, llm_client=None):
|
def __init__(self, llm_client=None):
|
||||||
self.classifier = PurposeClassifier(llm_client)
|
self.classifier = PurposeClassifier(llm_client)
|
||||||
|
self.content_inferer = ContentPurposeInferer()
|
||||||
self.llm_client = llm_client
|
self.llm_client = llm_client
|
||||||
|
|
||||||
async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult:
|
async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult:
|
||||||
"""Process intake and determine purpose and framework.
|
"""Process intake and determine purpose and framework.
|
||||||
|
|
||||||
|
All signals are weighted:
|
||||||
|
1. Explicit flags (weight: 1.0) - highest priority
|
||||||
|
2. Content inference (weight: 0.4) - from existing blog/posts
|
||||||
|
3. Keyword classification (weight: 0.3) - from concept
|
||||||
|
4. Conversational (weight: 0.5) - from Q&A
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
intake: All available input signals
|
intake: All available input signals
|
||||||
mode: How to resolve (conversational, auto, explicit)
|
mode: How to resolve (conversational, auto, explicit)
|
||||||
@@ -116,31 +135,98 @@ class IntakeAgent:
|
|||||||
Returns:
|
Returns:
|
||||||
IntakeResult with purpose, framework, and reasoning
|
IntakeResult with purpose, framework, and reasoning
|
||||||
"""
|
"""
|
||||||
|
signals = {} # Track all signals for reasoning
|
||||||
|
|
||||||
# Step 1: Check explicit flags (highest priority)
|
# Step 1: Check explicit flags (highest priority)
|
||||||
if intake.explicit_purpose:
|
if intake.explicit_purpose:
|
||||||
return self._process_explicit(intake)
|
return self._process_explicit(intake)
|
||||||
|
|
||||||
# Step 2: Use classifier for clear cases
|
|
||||||
if mode == IntakeMode.EXPLICIT:
|
if mode == IntakeMode.EXPLICIT:
|
||||||
return self._need_more_info(intake)
|
return self._need_more_info(intake)
|
||||||
|
|
||||||
# Step 3: Auto-classify from concept
|
# Step 2: Content inference (if content provided)
|
||||||
|
content_result = None
|
||||||
|
if intake.content or intake.blog_posts:
|
||||||
|
if intake.blog_posts:
|
||||||
|
content_result = self.content_inferer.infer_from_blog(intake.blog_posts)
|
||||||
|
elif intake.content:
|
||||||
|
content_result = self.content_inferer.analyze(
|
||||||
|
intake.content,
|
||||||
|
title=intake.content_title
|
||||||
|
)
|
||||||
|
signals["content"] = content_result
|
||||||
|
|
||||||
|
# Step 3: Keyword classification from concept
|
||||||
classifier_result = self.classifier._keyword_classify(
|
classifier_result = self.classifier._keyword_classify(
|
||||||
concept=intake.concept,
|
concept=intake.concept,
|
||||||
target_audience=intake.target_audience,
|
target_audience=intake.target_audience,
|
||||||
intended_outcome=intake.intended_outcome,
|
intended_outcome=intake.intended_outcome,
|
||||||
)
|
)
|
||||||
|
signals["concept"] = classifier_result
|
||||||
|
|
||||||
# If high confidence, use it
|
# Step 4: WEIGHTED DECISION - combine signals
|
||||||
if classifier_result.confidence >= 0.7:
|
purpose_scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose}
|
||||||
return self._build_result_from_classification(intake, classifier_result, "classifier")
|
|
||||||
|
|
||||||
# Step 4: If conversational and low confidence, ask questions
|
# Add content inference (if available)
|
||||||
if mode == IntakeMode.CONVERSATIONAL and classifier_result.confidence < 0.5:
|
if content_result and content_result.confidence > 0.3:
|
||||||
|
purpose_scores[content_result.purpose] += (
|
||||||
|
content_result.confidence * self.CONTENT_INFERENCE_WEIGHT
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add classifier result
|
||||||
|
purpose_scores[classifier_result.purpose] += (
|
||||||
|
classifier_result.confidence * 0.3
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find winning purpose
|
||||||
|
best_purpose = max(purpose_scores, key=purpose_scores.get)
|
||||||
|
best_score = purpose_scores[best_purpose]
|
||||||
|
|
||||||
|
# Calculate final confidence
|
||||||
|
confidence = min(0.95, best_score)
|
||||||
|
|
||||||
|
# If confidence is low and in conversational mode, ask questions
|
||||||
|
if confidence < 0.4 and mode == IntakeMode.CONVERSATIONAL:
|
||||||
return self._need_more_info(intake)
|
return self._need_more_info(intake)
|
||||||
|
|
||||||
# Step 5: Fall back to classification even with medium confidence
|
# Determine source
|
||||||
return self._build_result_from_classification(intake, classifier_result, "classifier")
|
if content_result and content_result.confidence > 0.5:
|
||||||
|
source = "content"
|
||||||
|
elif content_result and classifier_result.confidence > 0.3:
|
||||||
|
source = "hybrid"
|
||||||
|
else:
|
||||||
|
source = "classifier"
|
||||||
|
|
||||||
|
# Get category from input
|
||||||
|
category = None
|
||||||
|
if intake.explicit_category:
|
||||||
|
try:
|
||||||
|
category = NonfictionCategory(intake.explicit_category.lower())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Select framework
|
||||||
|
framework = select_framework(
|
||||||
|
purpose=best_purpose,
|
||||||
|
category=category,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build reasoning
|
||||||
|
reasons = []
|
||||||
|
if content_result:
|
||||||
|
reasons.append(f"content: {content_result.reasoning}")
|
||||||
|
reasons.append(f"concept: {classifier_result.reasoning}")
|
||||||
|
|
||||||
|
return IntakeResult(
|
||||||
|
purpose=best_purpose,
|
||||||
|
confidence=confidence,
|
||||||
|
category=category,
|
||||||
|
framework=framework,
|
||||||
|
reasoning=" | ".join(reasons),
|
||||||
|
source=source,
|
||||||
|
content_analysis=content_result,
|
||||||
|
all_signals=signals,
|
||||||
|
)
|
||||||
|
|
||||||
def _process_explicit(self, intake: IntakeInput) -> IntakeResult:
|
def _process_explicit(self, intake: IntakeInput) -> IntakeResult:
|
||||||
"""Process when user provided explicit purpose."""
|
"""Process when user provided explicit purpose."""
|
||||||
@@ -171,6 +257,8 @@ class IntakeAgent:
|
|||||||
framework=framework,
|
framework=framework,
|
||||||
reasoning=f"Explicit user selection: {intake.explicit_purpose}",
|
reasoning=f"Explicit user selection: {intake.explicit_purpose}",
|
||||||
source="explicit",
|
source="explicit",
|
||||||
|
content_analysis=None,
|
||||||
|
all_signals={"explicit": intake.explicit_purpose},
|
||||||
)
|
)
|
||||||
|
|
||||||
def _process_auto(self, intake: IntakeInput) -> IntakeResult:
|
def _process_auto(self, intake: IntakeInput) -> IntakeResult:
|
||||||
@@ -224,6 +312,8 @@ class IntakeAgent:
|
|||||||
framework=select_framework(purpose=ReaderPurpose.UNDERSTAND),
|
framework=select_framework(purpose=ReaderPurpose.UNDERSTAND),
|
||||||
reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.",
|
reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.",
|
||||||
source="intake",
|
source="intake",
|
||||||
|
content_analysis=None,
|
||||||
|
all_signals={},
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]:
|
def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]:
|
||||||
|
|||||||
Reference in New Issue
Block a user