Files
opus-orchestrator-ai/opus_orchestrator/nonfiction/classifier.py
T
mrhavens 4b4addedf7 feat: Issue #18 - Purpose Classifier for Nonfiction
- Created opus_orchestrator/nonfiction/classifier.py
  - PurposeClassifier class with keyword-based classification
  - LLM-enhanced classification (optional)
  - ReaderPurpose enum (6 purposes)
  - ClassificationResult dataclass

- Keyword classification covers:
  - LEARN_HANDS_ON: how to, learn to, tutorial, skills, etc.
  - UNDERSTAND: understand, why, concept, mental model, etc.
  - TRANSFORM: change, become, improve, habits, etc.
  - DECIDE: decide, choose, compare, vs, analysis
  - REFERENCE: manual, handbook, comprehensive, API
  - BE_INSPIRED: inspire, story, journey, biography

- Tests pass for all 6 purposes with high confidence

This is the foundation for the entire nonfiction pipeline (Issue #18).
2026-03-13 20:15:20 +00:00

267 lines
10 KiB
Python

"""Purpose Classifier for Nonfiction Books.
Classifies user input into ReaderPurpose - why the reader will be reading this book.
This is the foundation for the entire nonfiction pipeline.
Usage:
from opus_orchestrator.nonfiction.classifier import PurposeClassifier, ReaderPurpose
classifier = PurposeClassifier()
result = await classifier.classify(
concept="Leadership for introverts",
target_audience="Introverted professionals who want to develop leadership skills",
intended_outcome="Learn to lead with quiet confidence"
)
print(result.purpose) # ReaderPurpose.TRANSFORM
print(result.confidence) # 0.87
print(result.reasoning) # "Target audience wants 'develop' - indicates self-transformation"
"""
import re
import json
from dataclasses import dataclass
from enum import Enum
from typing import Optional
class ReaderPurpose(str, Enum):
"""Why is the reader reading this book?"""
LEARN_HANDS_ON = "learn_hands_on"
UNDERSTAND = "understand"
TRANSFORM = "transform"
DECIDE = "decide"
REFERENCE = "reference"
BE_INSPIRED = "be_inspired"
@dataclass
class ClassificationResult:
"""Result of purpose classification."""
purpose: ReaderPurpose
confidence: float
reasoning: str
alternative_purposes: Optional[list] = None
class PurposeClassifier:
"""Classifies user input into ReaderPurpose.
Uses keyword-based classification with optional LLM enhancement.
"""
PURPOSE_KEYWORDS = {
ReaderPurpose.LEARN_HANDS_ON: [
"how to", "how-to", "learn to", "master", "step by step",
"beginner's guide", "tutorial", "practical", "hands-on",
"skills", "do it yourself", "build", "create", "make",
"implement", "develop skills", "learn skills", "course",
"workshop", "training", "teach yourself", "guide to",
"becoming", "learn the basics", "fundamentals",
],
ReaderPurpose.UNDERSTAND: [
"understand", "why", "how it works", "explain", "concept",
"mental model", "deep dive", "exploration", "the nature of",
"the truth about", "what is", "meaning", "philosophy",
"theory", "framework", "principles", "inside story",
"real story", "hidden", "secret", "science of",
"psychology of", "the way", "essence", "sapiens",
],
ReaderPurpose.TRANSFORM: [
"transform", "change", "become", "develop", "improve",
"better", "overcome", "heal", "grow", "personal growth",
"self-improvement", "self help", "empower", "breakthrough",
"awakening", "journey", "awaken", "reinvent",
"reclaim", "freedom", "love yourself", "healing",
"recovery", "manifest", "attract", "abundance",
"habits", "routines", "mindset", "productivity",
],
ReaderPurpose.DECIDE: [
"decide", "choose", "compare", "vs", "versus",
"which is better", "pros and cons", "trade-off", "decision",
"guide", "strategies", "strategy", "choosing", "selecting",
"investment", "where to put", "how to allocate", "prioritize",
"business case", "roi", "worth it", "should i", "analysis",
],
ReaderPurpose.REFERENCE: [
"reference", "manual", "handbook", "dictionary", "encyclopedia",
"comprehensive", "complete guide", "all about", "definitive",
"bible", "catalog", "directory", "index", "lookup",
"specification", "documentation", "api", "technical",
"architecture", "system design", "best practices",
],
ReaderPurpose.BE_INSPIRED: [
"inspire", "motivational", "biography", "memoir", "story",
"life", "journey", "triumph", "overcoming", "against all odds",
"unstoppable", "dream", "vision", "legacy", "purpose",
"calling", "warrior", "hero", "legend", "icon",
],
}
PURPOSE_NEGATIONS = {
ReaderPurpose.LEARN_HANDS_ON: ["understand", "explain", "why", "concept"],
ReaderPurpose.TRANSFORM: ["reference", "manual", "tutorial"],
ReaderPurpose.UNDERSTAND: ["how to", "step by step", "tutorial"],
}
def __init__(self, llm_client=None):
self.llm_client = llm_client
async def classify(
self,
concept: str,
target_audience: str = "",
intended_outcome: str = "",
) -> ClassificationResult:
"""Classify user input into ReaderPurpose."""
keyword_result = self._keyword_classify(concept, target_audience, intended_outcome)
if keyword_result.confidence >= 0.8:
return keyword_result
if self.llm_client:
try:
llm_result = await self._llm_classify(concept, target_audience, intended_outcome)
if llm_result.confidence > keyword_result.confidence:
return llm_result
except Exception:
pass
return keyword_result
def _keyword_classify(
self,
concept: str,
target_audience: str,
intended_outcome: str,
) -> ClassificationResult:
"""Fast keyword-based classification."""
text = f"{concept} {target_audience} {intended_outcome}".lower()
scores = {p: 0 for p in ReaderPurpose}
for purpose, keywords in self.PURPOSE_KEYWORDS.items():
for keyword in keywords:
if keyword.lower() in text:
scores[purpose] += 1
for purpose, negations in self.PURPOSE_NEGATIONS.items():
for negation in negations:
if negation.lower() in text:
scores[purpose] = max(0, scores[purpose] - 1)
if max(scores.values()) == 0:
return ClassificationResult(
purpose=ReaderPurpose.UNDERSTAND,
confidence=0.3,
reasoning="No clear purpose keywords found, defaulting to UNDERSTAND",
)
sorted_purposes = sorted(scores.items(), key=lambda x: x[1], reverse=True)
top_purpose, top_score = sorted_purposes[0]
total_score = sum(1 for s in scores.values() if s > 0)
confidence = min(0.95, top_score / max(1, total_score)) if total_score > 0 else 0.3
matched_keywords = [kw for kw in self.PURPOSE_KEYWORDS[top_purpose]
if kw.lower() in text]
return ClassificationResult(
purpose=top_purpose,
confidence=confidence,
reasoning=f"Keywords matched: {', '.join(matched_keywords[:5])}",
)
async def _llm_classify(
self,
concept: str,
target_audience: str,
intended_outcome: str,
) -> ClassificationResult:
"""LLM-based classification."""
prompt = f"""Analyze this book concept and determine WHY a reader would read this book.
## Input
- Concept/Title: {concept}
- Target Audience: {target_audience or '(not specified)'}
- Intended Outcome: {intended_outcome or '(not specified)'}
## Options
1. LEARN_HANDS_ON: Reader wants to DO something specific
2. UNDERSTAND: Reader wants to GRASP a concept deeply
3. TRANSFORM: Reader wants to CHANGE themselves
4. DECIDE: Reader wants to make an informed decision
5. REFERENCE: Reader wants to LOOK UP information
6. BE_INSPIRED: Reader wants to feel motivated
## Output Format (JSON only)
{{
"purpose": "one of: learn_hands_on, understand, transform, decide, reference, be_inspired",
"confidence": 0.0 to 1.0,
"reasoning": "1-2 sentences explaining why"
}}
Analyze:"""
result = await self.llm_client.complete_async(
system_prompt="You are a book categorization system. Return ONLY valid JSON.",
user_prompt=prompt,
temperature=0.3,
max_tokens=500,
)
return self._parse_llm_result(result)
def _parse_llm_result(self, result: str) -> ClassificationResult:
"""Parse LLM response."""
try:
if "```json" in result:
json_str = result.split("```json")[1].split("```")[0]
elif "```" in result:
json_str = result.split("```")[1].split("```")[0]
else:
start, end = result.find("{"), result.rfind("}") + 1
if start >= 0 and end > start:
json_str = result[start:end]
else:
raise ValueError("No JSON found")
data = json.loads(json_str)
purpose_map = {
"learn_hands_on": ReaderPurpose.LEARN_HANDS_ON,
"learn": ReaderPurpose.LEARN_HANDS_ON,
"understand": ReaderPurpose.UNDERSTAND,
"transform": ReaderPurpose.TRANSFORM,
"decide": ReaderPurpose.DECIDE,
"reference": ReaderPurpose.REFERENCE,
"be_inspired": ReaderPurpose.BE_INSPIRED,
"be inspired": ReaderPurpose.BE_INSPIRED,
}
purpose_str = data.get("purpose", "").lower()
purpose = purpose_map.get(purpose_str, ReaderPurpose.UNDERSTAND)
return ClassificationResult(
purpose=purpose,
confidence=float(data.get("confidence", 0.7)),
reasoning=data.get("reasoning", "LLM classification"),
)
except (json.JSONDecodeError, ValueError) as e:
return ClassificationResult(
purpose=ReaderPurpose.UNDERSTAND,
confidence=0.3,
reasoning=f"LLM parse failed, defaulting to UNDERSTAND",
)
async def classify_purpose(
concept: str,
target_audience: str = "",
intended_outcome: str = "",
llm_client=None,
) -> ClassificationResult:
"""Convenience function to classify purpose."""
classifier = PurposeClassifier(llm_client)
return await classifier.classify(concept, target_audience, intended_outcome)