feat: Issue #21 - Purpose-Based Critique Criteria

Created critique_criteria.py with purpose-specific evaluation: TUTORIAL: - Clarity (30%), Completeness (25%), Progressiveness (20%) - Actionability (15%), Error Prevention (10%) EXPLAINER: - Analogy Quality (25%), Examples (25%), Mental Model (20%) - Depth (15%), Misconceptions (15%) TRANSFORMATION: - Emotional Honesty (30%), Relatability (25%), Hope (20%) - Specificity (15%), Actionability (10%) DECIDE: - Evidence Quality (30%), Balance (25%), Credibility (20%) - Clarity (15%), Completeness (10%) REFERENCE: - Accuracy (35%), Completeness (30%), Organization (20%) - Examples (15%) INSPIRED: - Emotional Impact (30%), Vision (25%), Authenticity (25%) - Story Quality (20%) Each criterion has: - Weight - Description - Evaluation questions - Pass threshold Functions: - get_critique_criteria(purpose) → CritiqueCriteriaSet - evaluate_chapter(content, purpose) → scores - get_evaluation_prompt(content, purpose) → LLM prompt - list_all_criteria() → overview
2026-03-13 22:30:26 +00:00
parent be64111515
commit d3bc43daca
2 changed files with 540 additions and 0 deletions
@@ -4,6 +4,7 @@ Key components:
 - classifier: Classifies user input into ReaderPurpose
 - intake: Conversational intake agent for high-fidelity intent
 - content_infer: Infers purpose from existing blog/content
+- critique_criteria: Purpose-specific evaluation criteria
 """

 from opus_orchestrator.nonfiction.classifier import (
@@ -24,6 +25,14 @@ from opus_orchestrator.nonfiction.content_infer import (
    ContentAnalysis,
    infer_purpose_from_content,
 )
+from opus_orchestrator.nonfiction.critique_criteria import (
+    CritiqueCriterion,
+    CritiqueCriteriaSet,
+    get_critique_criteria,
+    evaluate_chapter,
+    get_evaluation_prompt,
+    list_all_criteria,
+)

 __all__ = [
    # Classifier
@@ -41,4 +50,11 @@ __all__ = [
    "ContentPurposeInferer",
    "ContentAnalysis",
    "infer_purpose_from_content",
+    # Critique Criteria
+    "CritiqueCriterion",
+    "CritiqueCriteriaSet",
+    "get_critique_criteria",
+    "evaluate_chapter",
+    "get_evaluation_prompt",
+    "list_all_criteria",
 ]
@@ -0,0 +1,524 @@
+"""Purpose-Based Critique Criteria.
+
+Different evaluation criteria for different reader purposes.
+A tutorial is evaluated differently from a memoir.
+"""
+
+from dataclasses import dataclass
+from typing import Optional
+
+from opus_orchestrator.nonfiction import ReaderPurpose
+
+
+@dataclass
+class CritiqueCriterion:
+    """A single criterion for evaluation."""
+    name: str
+    description: str
+    weight: float  # 0.0 to 1.0
+    questions: list[str]
+
+
+@dataclass
+class CritiqueCriteriaSet:
+    """Complete set of criteria for a purpose."""
+    purpose: ReaderPurpose
+    criteria: list[CritiqueCriterion]
+    overall_questions: list[str]
+    pass_threshold: float = 0.7
+
+
+# =============================================================================
+# CRITERIA BY PURPOSE
+# =============================================================================
+
+TUTORIAL_CRITERIA = CritiqueCriteriaSet(
+    purpose=ReaderPurpose.LEARN_HANDS_ON,
+    criteria=[
+        CritiqueCriterion(
+            name="Clarity",
+            description="Can a beginner understand each step?",
+            weight=0.30,
+            questions=[
+                "Is each step explained clearly enough for a beginner?",
+                "Are there any ambiguous instructions?",
+                "Could someone with no prior knowledge complete this?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Completeness",
+            description="Are all prerequisites and steps covered?",
+            weight=0.25,
+            questions=[
+                "Are all prerequisites listed?",
+                "Is anything missing that the reader would need?",
+                "Are there gaps in the instructions?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Progressiveness",
+            description="Does complexity build gradually?",
+            weight=0.20,
+            questions=[
+                "Does each step build on the previous?",
+                "Is there too much complexity too soon?",
+                "Are advanced topics introduced appropriately?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Actionability",
+            description="Can reader immediately apply what they learned?",
+            weight=0.15,
+            questions=[
+                "Can the reader try this right now?",
+                "Are there exercises or practice opportunities?",
+                "Is there enough hand-holding for beginners?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Error Prevention",
+            description="Are common mistakes addressed?",
+            weight=0.10,
+            questions=[
+                "Are common pitfalls mentioned?",
+                "Is there troubleshooting guidance?",
+                "Does the writer anticipate reader mistakes?",
+            ],
+        ),
+    ],
+    overall_questions=[
+        "Can a complete beginner actually complete this tutorial?",
+        "Are the steps in the right order?",
+        "Is the pacing appropriate for learning?",
+    ],
+    pass_threshold=0.70,
+)
+
+
+EXPLAINER_CRITERIA = CritiqueCriteriaSet(
+    purpose=ReaderPurpose.UNDERSTAND,
+    criteria=[
+        CritiqueCriterion(
+            name="Analogy Quality",
+            description="Do analogies make complex ideas click?",
+            weight=0.25,
+            questions=[
+                "Are there vivid, memorable analogies?",
+                "Do the analogies actually clarify the concept?",
+                "Are there enough analogies for different learning styles?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Examples",
+            description="Are there diverse, clear examples?",
+            weight=0.25,
+            questions=[
+                "Are there multiple examples?",
+                "Do examples cover different scenarios?",
+                "Are the examples relevant to the target audience?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Mental Model",
+            description="Does reader leave with a useful framework?",
+            weight=0.20,
+            questions=[
+                "Can the reader explain this to someone else?",
+                "Do they have a framework for thinking about this?",
+                "Is there a key insight they'll remember?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Depth",
+            description="Is there surface AND depth?",
+            weight=0.15,
+            questions=[
+                "Does this go beyond the obvious?",
+                "Is there nuance and complexity acknowledged?",
+                "Can beginners and intermediates both learn?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Misconceptions",
+            description="Are wrong views addressed?",
+            weight=0.15,
+            questions=[
+                "Does the writer address common misconceptions?",
+                "Is it clear what this is NOT?",
+                "Are there counterexamples?",
+            ],
+        ),
+    ],
+    overall_questions=[
+        "Would a reader understand this deeply after reading?",
+        "Do the analogies make sense?",
+        "Is there enough depth without being overwhelming?",
+    ],
+    pass_threshold=0.70,
+)
+
+
+TRANSFORMATION_CRITERIA = CritiqueCriteriaSet(
+    purpose=ReaderPurpose.TRANSFORM,
+    criteria=[
+        CritiqueCriterion(
+            name="Emotional Honesty",
+            description="Does it include real struggles, not just success?",
+            weight=0.30,
+            questions=[
+                "Are the hard parts included?",
+                "Does it acknowledge that transformation is hard?",
+                "Is there vulnerability, not just triumph?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Relatability",
+            description="Would the target reader see themselves?",
+            weight=0.25,
+            questions=[
+                "Would someone in the target situation recognize themselves?",
+                "Is the struggle described vividly enough?",
+                "Does the reader feel understood?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Hope",
+            description="Does it build hope without false promises?",
+            weight=0.20,
+            questions=[
+                "Does this inspire hope?",
+                "Is the hope realistic?",
+                "Would a cynic roll their eyes?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Specificity",
+            description="Are there concrete details (names, moments)?",
+            weight=0.15,
+            questions=[
+                "Are there specific, vivid details?",
+                "Does it avoid generic advice?",
+                "Are there real stories, not just concepts?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Actionability",
+            description="Are there specific steps to start?",
+            weight=0.10,
+            questions=[
+                "Does the reader know how to start?",
+                "Are the first steps concrete?",
+                "Is there something they can do tomorrow?",
+            ],
+        ),
+    ],
+    overall_questions=[
+        "Would this actually inspire someone to change?",
+        "Is it emotionally honest?",
+        "Does it feel authentic?",
+    ],
+    pass_threshold=0.70,
+)
+
+
+DECIDE_CRITERIA = CritiqueCriteriaSet(
+    purpose=ReaderPurpose.DECIDE,
+    criteria=[
+        CritiqueCriterion(
+            name="Evidence Quality",
+            description="Are claims backed by data/studies?",
+            weight=0.30,
+            questions=[
+                "Are there credible sources?",
+                "Is the evidence sufficient?",
+                "Are statistics used appropriately?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Balance",
+            description="Are counterarguments addressed fairly?",
+            weight=0.25,
+            questions=[
+                "Does the writer acknowledge other perspectives?",
+                "Is it biased or fair?",
+                "Are the tradeoffs explored?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Credibility",
+            description="Are sources trustworthy?",
+            weight=0.20,
+            questions=[
+                "Would an expert trust this?",
+                "Are sources credible?",
+                "Is there any misleading information?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Clarity",
+            description="Is the recommendation clear?",
+            weight=0.15,
+            questions=[
+                "Does the reader know what to decide?",
+                "Is the conclusion clear?",
+                "Is there ambiguity that could confuse?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Completeness",
+            description="Are all relevant factors considered?",
+            weight=0.10,
+            questions=[
+                "Is anything important missing?",
+                "Are all sides represented?",
+                "Would the reader need additional research?",
+            ],
+        ),
+    ],
+    overall_questions=[
+        "Can the reader make an informed decision after reading?",
+        "Is the evidence convincing?",
+        "Are all perspectives represented fairly?",
+    ],
+    pass_threshold=0.75,
+)
+
+
+REFERENCE_CRITERIA = CritiqueCriteriaSet(
+    purpose=ReaderPurpose.REFERENCE,
+    criteria=[
+        CritiqueCriterion(
+            name="Accuracy",
+            description="Is all information correct?",
+            weight=0.35,
+            questions=[
+                "Is everything factually correct?",
+                "Are there any errors?",
+                "Would an expert approve this?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Completeness",
+            description="Is nothing important missing?",
+            weight=0.30,
+            questions=[
+                "Is this comprehensive?",
+                "Are there obvious gaps?",
+                "Would someone need another source?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Organization",
+            description="Is it easy to find things?",
+            weight=0.20,
+            questions=[
+                "Is the structure logical?",
+                "Can you find what you need quickly?",
+                "Is there a good index/table of contents?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Examples",
+            description="Are there enough examples?",
+            weight=0.15,
+            questions=[
+                "Is every concept illustrated with an example?",
+                "Are the examples clear?",
+                "Do they cover edge cases?",
+            ],
+        ),
+    ],
+    overall_questions=[
+        "Is this a definitive reference?",
+        "Would someone need another book after this?",
+        "Is everything accurate and complete?",
+    ],
+    pass_threshold=0.85,
+)
+
+
+INSPIRED_CRITERIA = CritiqueCriteriaSet(
+    purpose=ReaderPurpose.BE_INSPIRED,
+    criteria=[
+        CritiqueCriterion(
+            name="Emotional Impact",
+            description="Does it move the reader emotionally?",
+            weight=0.30,
+            questions=[
+                "Does this inspire?",
+                "Would readers feel something?",
+                "Is there passion and authenticity?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Vision",
+            description="Is there a vivid picture of what's possible?",
+            weight=0.25,
+            questions=[
+                "Does it paint a compelling vision?",
+                "Can the reader see themselves in the story?",
+                "Is there something to aspire to?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Authenticity",
+            description="Does it feel real, not manufactured?",
+            weight=0.25,
+            questions=[
+                "Is this genuine or performative?",
+                "Does the writer have credibility?",
+                "Would a cynic be moved?",
+            ],
+        ),
+        CritiqueCriterion(
+            name="Story Quality",
+            description="Is it a compelling narrative?",
+            weight=0.20,
+            questions=[
+                "Is it a good story?",
+                "Are there memorable moments?",
+                "Does it have a satisfying arc?",
+            ],
+        ),
+    ],
+    overall_questions=[
+        "Would this inspire someone to act?",
+        "Is it emotionally resonant?",
+        "Would this change someone's life?",
+    ],
+    pass_threshold=0.70,
+)
+
+
+# =============================================================================
+# REGISTRY
+# =============================================================================
+
+PURPOSE_CRITERIA = {
+    ReaderPurpose.LEARN_HANDS_ON: TUTORIAL_CRITERIA,
+    ReaderPurpose.UNDERSTAND: EXPLAINER_CRITERIA,
+    ReaderPurpose.TRANSFORM: TRANSFORMATION_CRITERIA,
+    ReaderPurpose.DECIDE: DECIDE_CRITERIA,
+    ReaderPurpose.REFERENCE: REFERENCE_CRITERIA,
+    ReaderPurpose.BE_INSPIRED: INSPIRED_CRITERIA,
+}
+
+
+# =============================================================================
+# FUNCTIONS
+# =============================================================================
+
+def get_critique_criteria(purpose: ReaderPurpose) -> CritiqueCriteriaSet:
+    """Get the critique criteria for a purpose.
+    
+    Args:
+        purpose: The reader purpose
+        
+    Returns:
+        CritiqueCriteriaSet with criteria and questions
+    """
+    return PURPOSE_CRITERIA.get(purpose, EXPLAINER_CRITERIA)
+
+
+def evaluate_chapter(
+    chapter_content: str,
+    purpose: ReaderPurpose,
+    chapter_number: int = 1,
+) -> dict:
+    """Evaluate a chapter against purpose-specific criteria.
+    
+    This would typically be called by an LLM with the criteria.
+    
+    Args:
+        chapter_content: The chapter text
+        purpose: The reader purpose
+        chapter_number: Which chapter
+        
+    Returns:
+        Dict with scores for each criterion
+    """
+    criteria_set = get_critique_criteria(purpose)
+    
+    # This would be expanded to actually evaluate using LLM
+    return {
+        "purpose": purpose.value,
+        "criteria_set": criteria_set.purpose.value,
+        "criteria": [
+            {"name": c.name, "weight": c.weight}
+            for c in criteria_set.criteria
+        ],
+        "overall_questions": criteria_set.overall_questions,
+        "pass_threshold": criteria_set.pass_threshold,
+        "note": "LLM evaluation would fill in actual scores",
+    }
+
+
+def get_evaluation_prompt(
+    chapter_content: str,
+    purpose: ReaderPurpose,
+) -> str:
+    """Generate an LLM prompt for purpose-specific evaluation.
+    
+    Args:
+        chapter_content: The chapter to evaluate
+        purpose: The reader purpose
+        
+    Returns:
+        A prompt for the LLM to evaluate the chapter
+    """
+    criteria_set = get_critique_criteria(purpose)
+    
+    criteria_text = "\n".join([
+        f"- **{c.name}** ({c.weight*100:.0f}%): {c.description}"
+        for c in criteria_set.criteria
+    ])
+    
+    questions_text = "\n".join([
+        f"- {q}"
+        for q in criteria_set.overall_questions
+    ])
+    
+    prompt = f"""Evaluate this chapter for a {purpose.value} book.
+
+CRITERIA (score each 0-10):
+{criteria_text}
+
+OVERALL QUESTIONS (answer these):
+{questions_text}
+
+Chapter to evaluate:
+---
+{chapter_content[:2000]}...
+---
+
+Return your evaluation as JSON:
+{{
+    "scores": {{
+        "criterion_name": score,
+        ...
+    }},
+    "overall_score": 0-10,
+    "pass": true/false,
+    "strengths": ["..."],
+    "weaknesses": ["..."],
+    "recommendations": ["..."]
+}}
+"""
+    return prompt
+
+
+def list_all_criteria() -> dict:
+    """List all criteria sets by purpose.
+    
+    Returns:
+        Dict of purpose -> criteria info
+    """
+    return {
+        purpose.value: {
+            "name": criteria.purpose.name,
+            "criteria_count": len(criteria.criteria),
+            "pass_threshold": criteria.pass_threshold,
+            "criteria": [c.name for c in criteria.criteria],
+        }
+        for purpose, criteria in PURPOSE_CRITERIA.items()
+    }