From 8cf833c7290ad1a6179650bf11700d1d71616f62 Mon Sep 17 00:00:00 2001 From: Mark Randall Havens Date: Fri, 13 Mar 2026 20:50:36 +0000 Subject: [PATCH] feat: Content-Based Purpose Inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added content_infer.py - analyzes existing content to infer purpose: - ContentPurposeInferer class - Analyzes blog posts, articles, text - Detects signals: tutorials, explainers, transformation stories, etc. - Returns purpose, confidence, reasoning Updated intake.py to weight all signals: 1. Explicit flags (weight: 1.0) 2. Content inference (weight: 0.4) - NEW 3. Keyword classification (weight: 0.3) 4. Conversational (weight: 0.5) Now if you point at a blog: - Tutorial posts → LEARN_HANDS_ON - Explainers → UNDERSTAND - Transformation stories → TRANSFORM - Reviews/Comparisons → DECIDE - Reference docs → REFERENCE - Journey/Biography → BE_INSPIRED --- opus_orchestrator/nonfiction/__init__.py | 10 + opus_orchestrator/nonfiction/content_infer.py | 208 ++++++++++++++++++ opus_orchestrator/nonfiction/intake.py | 114 +++++++++- 3 files changed, 320 insertions(+), 12 deletions(-) create mode 100644 opus_orchestrator/nonfiction/content_infer.py diff --git a/opus_orchestrator/nonfiction/__init__.py b/opus_orchestrator/nonfiction/__init__.py index 1330dc6..a64da98 100644 --- a/opus_orchestrator/nonfiction/__init__.py +++ b/opus_orchestrator/nonfiction/__init__.py @@ -3,6 +3,7 @@ Key components: - classifier: Classifies user input into ReaderPurpose - intake: Conversational intake agent for high-fidelity intent +- content_infer: Infers purpose from existing blog/content """ from opus_orchestrator.nonfiction.classifier import ( @@ -18,6 +19,11 @@ from opus_orchestrator.nonfiction.intake import ( IntakeMode, determine_intake, ) +from opus_orchestrator.nonfiction.content_infer import ( + ContentPurposeInferer, + ContentAnalysis, + infer_purpose_from_content, +) __all__ = [ # Classifier @@ -31,4 +37,8 @@ __all__ = [ "IntakeResult", "IntakeMode", "determine_intake", + # Content Inference + "ContentPurposeInferer", + "ContentAnalysis", + "infer_purpose_from_content", ] diff --git a/opus_orchestrator/nonfiction/content_infer.py b/opus_orchestrator/nonfiction/content_infer.py new file mode 100644 index 0000000..94fb2b8 --- /dev/null +++ b/opus_orchestrator/nonfiction/content_infer.py @@ -0,0 +1,208 @@ +"""Content-Based Purpose Inference. + +Analyzes existing content to infer the reader purpose. +This allows the system to determine purpose from blog posts, articles, etc. +""" + +from dataclasses import dataclass +from typing import Optional + +from opus_orchestrator.nonfiction.classifier import ReaderPurpose + + +@dataclass +class ContentAnalysis: + """Result of analyzing content for purpose.""" + purpose: ReaderPurpose + confidence: float + reasoning: str + signals: dict + + +class ContentPurposeInferer: + """Infers reader purpose from existing content. + + Analyzes blog posts, articles, or other content to determine + what kind of book this content would become. + """ + + # Content patterns that indicate purpose + CONTENT_SIGNALS = { + ReaderPurpose.LEARN_HANDS_ON: { + "indicators": [ + "step by step", "how to", "tutorial", "guide to", + "instructions", "learn to", "course", "workshop", + "example code", "exercise", "practice", "build a", + "create a", "implement", "getting started", + ], + "structure": ["step", "chapter", "lesson", "module", "exercise"], + }, + ReaderPurpose.UNDERSTAND: { + "indicators": [ + "why", "explains", "understand", "concept of", + "the nature of", "how it works", "mechanism", + "deep dive", "analysis", "framework", "principles", + "mental model", "theory", "psychology", "science", + ], + "structure": ["overview", "background", "core concepts", "implications"], + }, + ReaderPurpose.TRANSFORM: { + "indicators": [ + "i was", "i became", "my journey", "transformation", + "overcoming", "struggle", "breakthrough", "changed my life", + "how i", "from", "to", "becoming", "awakening", + "healing", "recovery", "manifest", "empower", + ], + "structure": ["before", "after", "journey", "struggle", "triumph"], + }, + ReaderPurpose.DECIDE: { + "indicators": [ + "compared to", "versus", "pros and cons", "should you", + "which is better", "is it worth", "decision", "choose", + "analysis", "recommendation", "best", "top", "ranking", + "tradeoff", "evaluation", "case study", + ], + "structure": ["comparison", "versus", "pros", "cons", "verdict"], + }, + ReaderPurpose.REFERENCE: { + "indicators": [ + "reference", "documentation", "api", "specification", + "manual", "handbook", "comprehensive", "complete guide", + "all about", "definitive", "index", "table of contents", + ], + "structure": ["reference", "api", "syntax", "parameters", "examples"], + }, + ReaderPurpose.BE_INSPIRED: { + "indicators": [ + "story", "journey", "triumph", "against all odds", + "inspiration", "motivation", "life lesson", "wisdom", + "legacy", "calling", "warrior", "hero", "unstoppable", + ], + "structure": ["chapter one", "the beginning", "the end", "epilogue"], + }, + } + + # Negative signals (reduce confidence) + NEGATION_PATTERNS = { + ReaderPurpose.LEARN_HANDS_ON: ["theory", "why", "explain", "concept"], + ReaderPurpose.TRANSFORM: ["reference", "documentation", "api"], + } + + def analyze( + self, + content: str, + title: str = "", + meta_description: str = "", + ) -> ContentAnalysis: + """Analyze content to infer purpose. + + Args: + content: The text content to analyze + title: Title of the content + meta_description: Meta description if available + + Returns: + ContentAnalysis with inferred purpose + """ + # Combine all text + full_text = f"{title} {meta_description} {content}".lower() + + # Score each purpose + scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose} + signal_counts: dict[ReaderPurpose, list[str]] = {p: [] for p in ReaderPurpose} + + for purpose, patterns in self.CONTENT_SIGNALS.items(): + # Count indicator matches + for indicator in patterns["indicators"]: + if indicator.lower() in full_text: + scores[purpose] += 1.0 + signal_counts[purpose].append(indicator) + + # Check structure patterns + for structure in patterns.get("structure", []): + if structure.lower() in full_text: + scores[purpose] += 0.5 + + # Apply negations (reduce scores) + for purpose, negations in self.NEGATION_PATTERNS.items(): + for negation in negations: + if negation.lower() in full_text: + scores[purpose] = max(0, scores[purpose] - 0.5) + + # Normalize scores + total_score = sum(scores.values()) + if total_score > 0: + normalized = {p: s / total_score for p, s in scores.items()} + else: + normalized = {p: 0.1 for p in ReaderPurpose} # Uniform if no matches + + # Find best match + best_purpose = max(normalized, key=normalized.get) + best_score = normalized[best_purpose] + + # Calculate confidence + if best_score > 0.5: + confidence = min(0.95, 0.5 + best_score * 0.5) + elif best_score > 0.2: + confidence = min(0.7, 0.3 + best_score * 0.4) + else: + confidence = 0.3 + + # Build reasoning + signals = signal_counts[best_purpose] + if signals: + reasoning = f"Content signals: {', '.join(signals[:5])}" + else: + reasoning = "No strong signals - purpose unclear" + + return ContentAnalysis( + purpose=best_purpose, + confidence=confidence, + reasoning=reasoning, + signals={p.value: c for p, c in signal_counts.items() if c}, + ) + + def infer_from_blog(self, blog_posts: list[dict]) -> ContentAnalysis: + """Infer purpose from multiple blog posts. + + Args: + blog_posts: List of dicts with 'title', 'content', 'excerpt' + + Returns: + Aggregated ContentAnalysis + """ + all_text = "" + titles = [] + + for post in blog_posts: + all_text += post.get("content", "") + " " + all_text += post.get("excerpt", "") + " " + titles.append(post.get("title", "")) + + result = self.analyze(all_text, title="; ".join(titles)) + + # If multiple posts, boost confidence slightly + if len(blog_posts) > 3: + result.confidence = min(0.95, result.confidence + 0.1) + + return result + + +# Convenience function +def infer_purpose_from_content( + content: str, + title: str = "", + meta_description: str = "", +) -> ContentAnalysis: + """Convenience function to infer purpose from content. + + Args: + content: The text content + title: Title of the content + meta_description: Optional meta description + + Returns: + ContentAnalysis with inferred purpose + """ + inferer = ContentPurposeInferer() + return inferer.analyze(content, title, meta_description) diff --git a/opus_orchestrator/nonfiction/intake.py b/opus_orchestrator/nonfiction/intake.py index bcf276c..02ee609 100644 --- a/opus_orchestrator/nonfiction/intake.py +++ b/opus_orchestrator/nonfiction/intake.py @@ -6,7 +6,8 @@ by asking clarifying questions or using available signals. This agent intelligently combines: 1. Explicit user flags (--purpose learn) 2. Keyword classification from concept -3. Conversational intake (asking questions) +3. Content inference from existing blog/posts +4. Conversational intake (asking questions) The agent weights all inputs to make the best decision. """ @@ -16,6 +17,7 @@ from enum import Enum from typing import Optional from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose +from opus_orchestrator.nonfiction.content_infer import ContentPurposeInferer, ContentAnalysis from opus_orchestrator.nonfiction_taxonomy import ( select_framework, get_frameworks_for_purpose, @@ -43,7 +45,12 @@ class IntakeInput: target_audience: str = "" intended_outcome: str = "" - # Option 3: Previous Q&A (if conversational) + # Option 3: Existing content (for inference) + content: str = "" + content_title: str = "" + blog_posts: list = field(default_factory=list) + + # Option 4: Previous Q&A (if conversational) answers: dict[str, str] = field(default_factory=dict) @@ -55,7 +62,9 @@ class IntakeResult: category: Optional[NonfictionCategory] framework: dict reasoning: str - source: str # "explicit" | "classifier" | "intake" | "hybrid" + source: str # "explicit" | "classifier" | "content" | "hybrid" + content_analysis: Optional[ContentAnalysis] = None + all_signals: dict = field(default_factory=dict) class IntakeAgent: @@ -102,13 +111,23 @@ class IntakeAgent: ], } + # Content inference + CONTENT_INFERENCE_WEIGHT = 0.4 # Weight for content-based inference + def __init__(self, llm_client=None): self.classifier = PurposeClassifier(llm_client) + self.content_inferer = ContentPurposeInferer() self.llm_client = llm_client async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult: """Process intake and determine purpose and framework. + All signals are weighted: + 1. Explicit flags (weight: 1.0) - highest priority + 2. Content inference (weight: 0.4) - from existing blog/posts + 3. Keyword classification (weight: 0.3) - from concept + 4. Conversational (weight: 0.5) - from Q&A + Args: intake: All available input signals mode: How to resolve (conversational, auto, explicit) @@ -116,31 +135,98 @@ class IntakeAgent: Returns: IntakeResult with purpose, framework, and reasoning """ + signals = {} # Track all signals for reasoning + # Step 1: Check explicit flags (highest priority) if intake.explicit_purpose: return self._process_explicit(intake) - # Step 2: Use classifier for clear cases if mode == IntakeMode.EXPLICIT: return self._need_more_info(intake) - # Step 3: Auto-classify from concept + # Step 2: Content inference (if content provided) + content_result = None + if intake.content or intake.blog_posts: + if intake.blog_posts: + content_result = self.content_inferer.infer_from_blog(intake.blog_posts) + elif intake.content: + content_result = self.content_inferer.analyze( + intake.content, + title=intake.content_title + ) + signals["content"] = content_result + + # Step 3: Keyword classification from concept classifier_result = self.classifier._keyword_classify( concept=intake.concept, target_audience=intake.target_audience, intended_outcome=intake.intended_outcome, ) + signals["concept"] = classifier_result - # If high confidence, use it - if classifier_result.confidence >= 0.7: - return self._build_result_from_classification(intake, classifier_result, "classifier") + # Step 4: WEIGHTED DECISION - combine signals + purpose_scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose} - # Step 4: If conversational and low confidence, ask questions - if mode == IntakeMode.CONVERSATIONAL and classifier_result.confidence < 0.5: + # Add content inference (if available) + if content_result and content_result.confidence > 0.3: + purpose_scores[content_result.purpose] += ( + content_result.confidence * self.CONTENT_INFERENCE_WEIGHT + ) + + # Add classifier result + purpose_scores[classifier_result.purpose] += ( + classifier_result.confidence * 0.3 + ) + + # Find winning purpose + best_purpose = max(purpose_scores, key=purpose_scores.get) + best_score = purpose_scores[best_purpose] + + # Calculate final confidence + confidence = min(0.95, best_score) + + # If confidence is low and in conversational mode, ask questions + if confidence < 0.4 and mode == IntakeMode.CONVERSATIONAL: return self._need_more_info(intake) - # Step 5: Fall back to classification even with medium confidence - return self._build_result_from_classification(intake, classifier_result, "classifier") + # Determine source + if content_result and content_result.confidence > 0.5: + source = "content" + elif content_result and classifier_result.confidence > 0.3: + source = "hybrid" + else: + source = "classifier" + + # Get category from input + category = None + if intake.explicit_category: + try: + category = NonfictionCategory(intake.explicit_category.lower()) + except ValueError: + pass + + # Select framework + framework = select_framework( + purpose=best_purpose, + category=category, + ) + + # Build reasoning + reasons = [] + if content_result: + reasons.append(f"content: {content_result.reasoning}") + reasons.append(f"concept: {classifier_result.reasoning}") + + return IntakeResult( + purpose=best_purpose, + confidence=confidence, + category=category, + framework=framework, + reasoning=" | ".join(reasons), + source=source, + content_analysis=content_result, + all_signals=signals, + ) def _process_explicit(self, intake: IntakeInput) -> IntakeResult: """Process when user provided explicit purpose.""" @@ -171,6 +257,8 @@ class IntakeAgent: framework=framework, reasoning=f"Explicit user selection: {intake.explicit_purpose}", source="explicit", + content_analysis=None, + all_signals={"explicit": intake.explicit_purpose}, ) def _process_auto(self, intake: IntakeInput) -> IntakeResult: @@ -224,6 +312,8 @@ class IntakeAgent: framework=select_framework(purpose=ReaderPurpose.UNDERSTAND), reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.", source="intake", + content_analysis=None, + all_signals={}, ) def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]: