"""Intake Agent for Nonfiction Book Classification. A conversational agent that determines the reader purpose and best framework by asking clarifying questions or using available signals. This agent intelligently combines: 1. Explicit user flags (--purpose learn) 2. Keyword classification from concept 3. Content inference from existing blog/posts 4. Conversational intake (asking questions) The agent weights all inputs to make the best decision. """ from dataclasses import dataclass, field from enum import Enum from typing import Optional from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose from opus_orchestrator.nonfiction.content_infer import ContentPurposeInferer, ContentAnalysis from opus_orchestrator.nonfiction_taxonomy import ( select_framework, get_frameworks_for_purpose, NonfictionCategory, ) class IntakeMode(str, Enum): """How the intake agent operates.""" CONVERSATIONAL = "conversational" # Ask questions AUTO = "auto" # Use classifier only EXPLICIT = "explicit" # Trust flags only @dataclass class IntakeInput: """All possible inputs to the intake agent.""" # Option 1: Explicit flags (highest priority if provided) explicit_purpose: Optional[str] = None explicit_category: Optional[str] = None explicit_framework: Optional[str] = None # Option 2: Concept for classification concept: str = "" target_audience: str = "" intended_outcome: str = "" # Option 3: Existing content (for inference) content: str = "" content_title: str = "" blog_posts: list = field(default_factory=list) # Option 4: Previous Q&A (if conversational) answers: dict[str, str] = field(default_factory=dict) @dataclass class IntakeResult: """Result from the intake agent.""" purpose: ReaderPurpose confidence: float category: Optional[NonfictionCategory] framework: dict reasoning: str source: str # "explicit" | "classifier" | "content" | "hybrid" content_analysis: Optional[ContentAnalysis] = None all_signals: dict = field(default_factory=dict) class IntakeAgent: """Intelligent agent for determining book purpose and framework. This agent acts as a decision layer that: 1. Respects explicit user choices (highest priority) 2. Uses keyword classification when input is clear 3. Asks clarifying questions when ambiguous 4. Combines all signals for best accuracy """ # Questions for each purpose (for conversational mode) PURPOSE_QUESTIONS = { ReaderPurpose.LEARN_HANDS_ON: [ "Should readers be able to DO something specific after reading?", "Is this about learning a skill or completing a project?", "Do you want step-by-step instructions?", ], ReaderPurpose.UNDERSTAND: [ "Is the goal to GRASP a concept or theory?", "Do you want readers to understand how something works?", "Is this about building mental models?", ], ReaderPurpose.TRANSFORM: [ "Is this about personal CHANGE or growth?", "Do you want readers to become different?", "Is this a self-help or motivational book?", ], ReaderPurpose.DECIDE: [ "Is this helping readers MAKE A DECISION?", "Are you comparing options or choices?", "Do you want to help them choose between alternatives?", ], ReaderPurpose.REFERENCE: [ "Is this a COMPREHENSIVE REFERENCE or manual?", "Will readers look up specific information?", "Is completeness more important than narrative?", ], ReaderPurpose.BE_INSPIRED: [ "Is this an INSPIRATIONAL story or biography?", "Do you want readers to feel motivated?", "Is this about a journey or triumph?", ], } # Content inference CONTENT_INFERENCE_WEIGHT = 0.4 # Weight for content-based inference def __init__(self, llm_client=None): self.classifier = PurposeClassifier(llm_client) self.content_inferer = ContentPurposeInferer() self.llm_client = llm_client async def process(self, intake: IntakeInput, mode: IntakeMode = IntakeMode.AUTO) -> IntakeResult: """Process intake and determine purpose and framework. All signals are weighted: 1. Explicit flags (weight: 1.0) - highest priority 2. Content inference (weight: 0.4) - from existing blog/posts 3. Keyword classification (weight: 0.3) - from concept 4. Conversational (weight: 0.5) - from Q&A Args: intake: All available input signals mode: How to resolve (conversational, auto, explicit) Returns: IntakeResult with purpose, framework, and reasoning """ signals = {} # Track all signals for reasoning # Step 1: Check explicit flags (highest priority) if intake.explicit_purpose: return self._process_explicit(intake) if mode == IntakeMode.EXPLICIT: return self._need_more_info(intake) # Step 2: Content inference (if content provided) content_result = None if intake.content or intake.blog_posts: if intake.blog_posts: content_result = self.content_inferer.infer_from_blog(intake.blog_posts) elif intake.content: content_result = self.content_inferer.analyze( intake.content, title=intake.content_title ) signals["content"] = content_result # Step 3: Keyword classification from concept classifier_result = self.classifier._keyword_classify( concept=intake.concept, target_audience=intake.target_audience, intended_outcome=intake.intended_outcome, ) signals["concept"] = classifier_result # Step 4: WEIGHTED DECISION - combine signals purpose_scores: dict[ReaderPurpose, float] = {p: 0.0 for p in ReaderPurpose} # Add content inference (if available) if content_result and content_result.confidence > 0.3: purpose_scores[content_result.purpose] += ( content_result.confidence * self.CONTENT_INFERENCE_WEIGHT ) # Add classifier result purpose_scores[classifier_result.purpose] += ( classifier_result.confidence * 0.3 ) # Find winning purpose best_purpose = max(purpose_scores, key=purpose_scores.get) best_score = purpose_scores[best_purpose] # Calculate final confidence confidence = min(0.95, best_score) # If confidence is low and in conversational mode, ask questions if confidence < 0.4 and mode == IntakeMode.CONVERSATIONAL: return self._need_more_info(intake) # Determine source if content_result and content_result.confidence > 0.5: source = "content" elif content_result and classifier_result.confidence > 0.3: source = "hybrid" else: source = "classifier" # Get category from input category = None if intake.explicit_category: try: category = NonfictionCategory(intake.explicit_category.lower()) except ValueError: pass # Select framework framework = select_framework( purpose=best_purpose, category=category, ) # Build reasoning reasons = [] if content_result: reasons.append(f"content: {content_result.reasoning}") reasons.append(f"concept: {classifier_result.reasoning}") return IntakeResult( purpose=best_purpose, confidence=confidence, category=category, framework=framework, reasoning=" | ".join(reasons), source=source, content_analysis=content_result, all_signals=signals, ) def _process_explicit(self, intake: IntakeInput) -> IntakeResult: """Process when user provided explicit purpose.""" try: purpose = ReaderPurpose(intake.explicit_purpose.lower()) except ValueError: # Invalid purpose, fall back to classifier return self._process_auto(intake) # Select framework category = None if intake.explicit_category: try: category = NonfictionCategory(intake.explicit_category.lower()) except ValueError: pass framework = select_framework( purpose=purpose, category=category, user_preferred_framework=intake.explicit_framework, ) return IntakeResult( purpose=purpose, confidence=1.0, category=category, framework=framework, reasoning=f"Explicit user selection: {intake.explicit_purpose}", source="explicit", content_analysis=None, all_signals={"explicit": intake.explicit_purpose}, ) def _process_auto(self, intake: IntakeInput) -> IntakeResult: """Auto-classify from concept.""" result = self.classifier._keyword_classify( concept=intake.concept, target_audience=intake.target_audience, intended_outcome=intake.intended_outcome, ) return self._build_result_from_classification(intake, result, "classifier") def _build_result_from_classification( self, intake: IntakeInput, classifier_result, source: str, ) -> IntakeResult: """Build result from classification.""" purpose = classifier_result.purpose # Get category from input category = None if intake.explicit_category: try: category = NonfictionCategory(intake.explicit_category.lower()) except ValueError: pass framework = select_framework( purpose=purpose, category=category, ) return IntakeResult( purpose=purpose, confidence=classifier_result.confidence, category=category, framework=framework, reasoning=classifier_result.reasoning, source=source, ) def _need_more_info(self, intake: IntakeInput) -> IntakeResult: """Return questions needed when input is ambiguous.""" # This would be used in conversational mode # For now, default to UNDERSTAND with low confidence return IntakeResult( purpose=ReaderPurpose.UNDERSTAND, confidence=0.3, category=None, framework=select_framework(purpose=ReaderPurpose.UNDERSTAND), reasoning="Input ambiguous - defaulted to UNDERSTAND. Use --purpose flag for explicit selection.", source="intake", content_analysis=None, all_signals={}, ) def get_questions(self, purpose: Optional[ReaderPurpose] = None) -> list[str]: """Get clarifying questions for a purpose. Args: purpose: The purpose to get questions for, or None for general Returns: List of questions to ask """ if purpose and purpose in self.PURPOSE_QUESTIONS: return self.PURPOSE_QUESTIONS[purpose] # Return all questions questions = [] for q_list in self.PURPOSE_QUESTIONS.values(): questions.extend(q_list) return questions[:5] # Limit to 5 def get_available_purposes(self) -> list[str]: """Get list of available purpose options for menu.""" return [p.value for p in ReaderPurpose] def get_available_categories(self) -> list[str]: """Get list of available category options.""" return [c.value for c in NonfictionCategory] # Convenience function async def determine_intake( concept: str = "", purpose: Optional[str] = None, category: Optional[str] = None, framework: Optional[str] = None, target_audience: str = "", intended_outcome: str = "", mode: str = "auto", ) -> IntakeResult: """Convenience function to process intake. Args: concept: Book concept/title purpose: Explicit purpose (overrides classification) category: Explicit category framework: Explicit framework target_audience: Target audience description intended_outcome: What the book achieves mode: "auto", "conversational", or "explicit" Returns: IntakeResult with purpose, framework, etc. """ intake = IntakeInput( explicit_purpose=purpose, explicit_category=category, explicit_framework=framework, concept=concept, target_audience=target_audience, intended_outcome=intended_outcome, ) agent = IntakeAgent() mode_enum = IntakeMode(mode) return await agent.process(intake, mode_enum)