feat: Full code review, bug fixes, and philosophy book generation

This commit includes: - A full code review and bug fixes for language drift, package loading, and CLI crashes. - The generated 15,000-word philosophy manuscript. - CODE_REVIEW.md and CHANGELOG.md documenting the process.
2026-05-20 21:15:11 +00:00
parent dddf5c4a80
commit 13bce7500c
13 changed files with 1160 additions and 198 deletions
@@ -113,7 +113,32 @@ def __getattr__(name: str):
    if name == "ExportOptions":
        from opus_orchestrator.scrivener_export import ExportOptions
        return ExportOptions
-    
+        
+    # LaTeX Export
+    if name == "LaTeXExporter":
+        from opus_orchestrator.latex_compile import LaTeXExporter
+        return LaTeXExporter
+    if name == "CompileOptions":
+        from opus_orchestrator.latex_compile import CompileOptions
+        return CompileOptions
+    if name == "export_to_latex":
+        from opus_orchestrator.latex_compile import export_to_latex
+        return export_to_latex
+    if name == "compile_pdf":
+        from opus_orchestrator.latex_compile import compile_pdf
+        return compile_pdf
+
+    # HTML Export
+    if name == "export_to_html":
+        from opus_orchestrator.html_export import export_to_html
+        return export_to_html
+    if name == "export_to_pdf":
+        from opus_orchestrator.html_export import export_to_pdf
+        return export_to_pdf
+    if name == "HTMLExporter":
+        from opus_orchestrator.html_export import HTMLExporter
+        return HTMLExporter
+
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


@@ -157,33 +182,13 @@ __all__ = [
    "ScrivenerExporter",
    "export_to_scrivener",
    "ExportOptions",
-    "ExportOptions",
+    # LaTeX Export
+    "LaTeXExporter",
+    "CompileOptions",
+    "export_to_latex",
+    "compile_pdf",
+    # HTML Export
+    "export_to_html",
+    "export_to_pdf",
+    "HTMLExporter",
 ]
-
-def __getattr__(name):
-    if name == "LaTeXExporter":
-        from opus_orchestrator.latex_compile import LaTeXExporter
-        return LaTeXExporter
-    if name == "CompileOptions":
-        from opus_orchestrator.latex_compile import CompileOptions
-        return CompileOptions
-    if name == "export_to_latex":
-        from opus_orchestrator.latex_compile import export_to_latex
-        return export_to_latex
-    if name == "compile_pdf":
-        from opus_orchestrator.latex_compile import compile_pdf
-        return compile_pdf
-    raise AttributeError(f"module has no attribute {name!r}")
-
-# HTML Export
-def __getattr__(name):
-    if name == "export_to_html":
-        from opus_orchestrator.html_export import export_to_html
-        return export_to_html
-    if name == "export_to_pdf":
-        from opus_orchestrator.html_export import export_to_pdf
-        return export_to_pdf
-    if name == "HTMLExporter":
-        from opus_orchestrator.html_export import HTMLExporter
-        return HTMLExporter
-    raise AttributeError(f"module has no attribute {name!r}")
@@ -148,6 +148,9 @@ class BaseAgent(ABC, Generic[T]):
            Complete system prompt
        """
        base = self.system_prompt
+        
+        # Add universal language constraint
+        base += "\n\nIMPORTANT: You must respond ONLY in English. Do not use any other language."

        if context:
            context_str = "\n\n## Context\n"
@@ -105,11 +105,12 @@ Return your critique as a JSON with: {"score": 0.0-1.0, "strengths": [], "weakne
            system_message="""You are a Professional Writer.

 After receiving critique from the Literary Critic, Genre Expert, and Story Editor:
-1. Consider each feedback point
-2. Identify what to revise
-3. Output your revision plan
+1. Consider each feedback point.
+2. Rewrite the chapter to incorporate the suggestions while maintaining the original strengths.
+3. Ensure the prose is high-quality, engaging, and follows the story context.
+4. IMPORTANT: You must respond ONLY in English. Do not use Chinese characters.

-You do NOT rewrite - you plan revisions. Return: {"revision_plan": [], "priorities": []}""",
+Output the complete revised chapter text.""",
            llm_config={
                "model": self.model,
                "api_key": self.api_key,
@@ -264,23 +265,18 @@ End with a final verdict: APPROVED, MINOR_REVISIONS, or MAJOR_REVISIONS.
 ## Your Task:
 Revise the chapter to address the weaknesses identified in the critique.
 Preserve the strengths. Improve the story, pacing, and prose.
+Output ONLY the full, revised chapter text.
 """
-                    # Use the writer agent to revise
-                    revision_result = self.agents["writer"].initiate_chat(
-                        self.manager,
-                        message=revision_request,
-                        summary_method="reflection_with_llm",
+                    # Use the writer agent to generate the revision
+                    revised = self.agents["writer"].generate_reply(
+                        messages=[{"role": "user", "content": revision_request}],
                    )
                    
-                    # Extract revised content from the chat
-                    if hasattr(revision_result, 'chat_history'):
-                        # Get the last response as revised content
-                        revised = revision_result.chat_history[-1].get('content', '') if revision_result.chat_history else current_content
-                        if revised and len(revised) > 100:
-                            current_content = revised
-                            print(f"   ✏️  Revision applied, new length: {len(current_content)} chars")
-                        else:
-                            print(f"   ⚠️  No valid revision received, keeping current content")
+                    if isinstance(revised, str) and len(revised) > 100:
+                        current_content = revised
+                        print(f"   ✏️  Revision applied, new length: {len(current_content)} chars")
+                    else:
+                        print(f"   ⚠️  No valid revision received, keeping current content. Response: {revised}")
                    
                except Exception as e:
                    print(f"   ⚠️  Revision failed: {e}, continuing with current content")
@@ -551,16 +551,19 @@ async def run_generate(args: argparse.Namespace) -> int:
    from opus_orchestrator import run_opus, OpusOrchestrator
    from opus_orchestrator.crews import create_fiction_crew, create_nonfiction_crew
    
-    print(f"\n{'='*60}")
+    print(f"
+{'='*60}")
    print("📚 OPUS ORCHESTRATOR AI")
-    print(f"{'='*60}\n")
+    print(f"{'='*60}
+")
    
    # Check for API client mode
    if args.api_url:
        client = OpusAPIClient(args.api_url)
        
        print(f"🌐 API Client Mode")
-        print(f"   Server: {args.api_url}\n")
+        print(f"   Server: {args.api_url}
+")
        
        # Call API
        try:
@@ -582,7 +585,8 @@ async def run_generate(args: argparse.Namespace) -> int:
            print(f"✅ Generation complete!")
            print(f"   Words: {result.get('word_count', 'N/A'):,}")
            print(f"   Chapters: {result.get('chapters', 'N/A')}")
-            print(f"   Framework: {result.get('framework', 'N/A')}\n")
+            print(f"   Framework: {result.get('framework', 'N/A')}
+")
            
            manuscript = result.get("manuscript", "")
            
@@ -627,7 +631,8 @@ async def run_generate(args: argparse.Namespace) -> int:
            # Use full content as seed
            full_text = content.text
            print(f"   ✅ Loaded {len(full_text):,} characters from {content.metadata['file_count']} files")
-            print(f"   📄 Files: {', '.join(content.metadata['files'])}\n")
+            print(f"   📄 Files: {', '.join(content.metadata['files'])}
+")
            
            seed_concept = full_text
        
@@ -660,7 +665,8 @@ async def run_generate(args: argparse.Namespace) -> int:
            from opus_orchestrator.nonfiction_generator import NonfictionGenerator
            from opus_orchestrator.nonfiction_frameworks import NonfictionFramework
            
-            print("📚 Using Nonfiction Framework...\n")
+            print("📚 Using Nonfiction Framework...
+")
            
            # Map framework string to enum
            framework_map = {
@@ -687,7 +693,8 @@ async def run_generate(args: argparse.Namespace) -> int:
        
        elif args.use_crewai:
            # Use CrewAI crews
-            print("🛠️  Using CrewAI crews...\n")
+            print("🛠️  Using CrewAI crews...
+")
            
            if args.book_type == "fiction":
                crew = create_fiction_crew(
@@ -703,7 +710,11 @@ async def run_generate(args: argparse.Namespace) -> int:
                    num_chapters=args.chapters,
                )
                
-                manuscript = "\n\n---\n\n".join(story)
+                manuscript = "
+
+---
+
+".join(story)
            else:
                crew = create_nonfiction_crew(
                    topic=args.genre,
@@ -723,11 +734,12 @@ async def run_generate(args: argparse.Namespace) -> int:
            
            print(f"🧵 Thread ID: {thread_id}")
            if args.resume:
-                print(f"   ↪️  Resuming from checkpoint\n")
+                print(f"   ↪️  Resuming from checkpoint
+")
            else:
                print()
            
-            result = await run_opus(
+            result = run_opus(
                seed_concept=seed_concept,
                framework=args.framework,
                genre=args.genre,
@@ -847,22 +859,26 @@ Target Words: {args.words:,}
            else:
                print(f"   ⚠️  GitHub save failed: {resp.status_code} - {resp.text}")
    
-    print(f"\n{'='*60}")
+    print(f"
+{'='*60}")
    print(f"✅ COMPLETE!")
    print(f"   Words: {word_count:,}")
    if not args.output and not args.save_s3 and not args.save_repo:
        print(f"   Output: {output_path}")
-    print(f"{'='*60}\n")
+    print(f"{'='*60}
+")
    
    return 0


 async def run_serve(args: argparse.Namespace) -> int:
    """Start the OpenAPI server."""
-    print(f"\n🚀 Starting Opus API Server...")
+    print(f"
+🚀 Starting Opus API Server...")
    print(f"   Host: {args.host}")
    print(f"   Port: {args.port}")
-    print(f"   Docs: http://{args.host}:{args.port}/docs\n")
+    print(f"   Docs: http://{args.host}:{args.port}/docs
+")
    
    try:
        from opus_orchestrator.server import run_server
@@ -876,10 +892,12 @@ async def run_serve(args: argparse.Namespace) -> int:

 async def run_ui(args: argparse.Namespace) -> int:
    """Start the web UI only."""
-    print(f"\n🎨 Starting Opus Web UI...")
+    print(f"
+🎨 Starting Opus Web UI...")
    print(f"   Host: {args.host}")
    print(f"   Port: {args.port}")
-    print(f"   UI: http://{args.host}:{args.port}/\n")
+    print(f"   UI: http://{args.host}:{args.port}/
+")
    
    try:
        from opus_orchestrator.server import create_app
@@ -901,12 +919,15 @@ def run_ingest(args: argparse.Namespace) -> int:
    """Ingest content from GitHub."""
    from opus_orchestrator import OpusOrchestrator
    
-    print(f"\n📥 Ingesting from GitHub: {args.repo}\n")
+    print(f"
+📥 Ingesting from GitHub: {args.repo}
+")
    
    # Check for API client mode
    if args.api_url:
        client = OpusAPIClient(args.api_url)
-        print(f"🌐 API Client Mode: {args.api_url}\n")
+        print(f"🌐 API Client Mode: {args.api_url}
+")
        
        try:
            result = client.ingest(args.repo, include_readme=args.include_readme)
@@ -926,7 +947,8 @@ def run_ingest(args: argparse.Namespace) -> int:
    
    print(f"✅ Loaded {len(content_text):,} characters")
    print(f"   Files: {file_count}")
-    print(f"   File list: {', '.join(files)}\n")
+    print(f"   File list: {', '.join(files)}
+")
    
    if args.preview:
        print("📄 PREVIEW (first 2000 chars):")
@@ -937,7 +959,8 @@ def run_ingest(args: argparse.Namespace) -> int:
    if args.output:
        with open(args.output, "w") as f:
            f.write(content_text)
-        print(f"\n💾 Saved to: {args.output}")
+        print(f"
+💾 Saved to: {args.output}")
    
    return 0

@@ -946,7 +969,9 @@ def run_s3_ingest(args: argparse.Namespace) -> int:
    """Ingest content from S3/MinIO."""
    from opus_orchestrator import S3Ingestor
    
-    print(f"\n🪣 Ingesting from S3: {args.bucket}/{args.prefix}\n")
+    print(f"
+🪣 Ingesting from S3: {args.bucket}/{args.prefix}
+")
    
    if args.endpoint:
        print(f"   Endpoint: {args.endpoint}")
@@ -974,7 +999,8 @@ def run_s3_ingest(args: argparse.Namespace) -> int:
    
    print(f"✅ Loaded {result['total_chars']:,} characters")
    print(f"   Files: {result['file_count']}")
-    print(f"   File list: {', '.join(result['files'].keys())}\n")
+    print(f"   File list: {', '.join(result['files'].keys())}
+")
    
    if args.preview:
        print("📄 PREVIEW (first 2000 chars):")
@@ -985,7 +1011,8 @@ def run_s3_ingest(args: argparse.Namespace) -> int:
    if args.output:
        with open(args.output, "w") as f:
            f.write(result["combined_text"])
-        print(f"\n💾 Saved to: {args.output}")
+        print(f"
+💾 Saved to: {args.output}")
    
    return 0

@@ -994,7 +1021,9 @@ def run_local_ingest(args: argparse.Namespace) -> int:
    """Ingest content from local files/directories."""
    from opus_orchestrator import LocalIngestor
    
-    print(f"\n📂 Ingesting from local: {args.path}\n")
+    print(f"
+📂 Ingesting from local: {args.path}
+")
    
    # Parse extensions
    extensions = None
@@ -1039,7 +1068,8 @@ def run_local_ingest(args: argparse.Namespace) -> int:
    if args.output:
        with open(args.output, "w") as f:
            f.write(content)
-        print(f"\n💾 Saved to: {args.output}")
+        print(f"
+💾 Saved to: {args.output}")
    
    return 0

@@ -1048,7 +1078,9 @@ def run_frameworks(args: argparse.Namespace) -> int:
    """List available frameworks."""
    from opus_orchestrator.frameworks import FRAMEWORKS
    
-    print("\n📚 AVAILABLE STORY FRAMEWORKS\n")
+    print("
+📚 AVAILABLE STORY FRAMEWORKS
+")
    print("=" * 50)
    
    for framework, info in FRAMEWORKS.items():
@@ -1057,7 +1089,8 @@ def run_frameworks(args: argparse.Namespace) -> int:
        stages = info.get("stages", [])
        beats = info.get("beats", [])
        
-        print(f"\n{name}")
+        print(f"
+{name}")
        print(f"  {desc}")
        
        if stages:
@@ -1074,7 +1107,8 @@ def run_frameworks(args: argparse.Namespace) -> int:
            if len(beats) > 3:
                print(f"    ... and {len(beats) - 3} more")
    
-    print("\n" + "=" * 50)
+    print("
+" + "=" * 50)
    return 0


@@ -1084,37 +1118,45 @@ def run_config(args: argparse.Namespace) -> int:
    
    config = get_config()
    
-    print("\n⚙️  OPUS CONFIGURATION\n")
+    print("
+⚙️  OPUS CONFIGURATION
+")
    print("=" * 40)
    
-    print(f"\n🔹 Agent")
+    print(f"
+🔹 Agent")
    print(f"   Provider: {config.agent.provider}")
    print(f"   Model: {config.agent.model}")
    print(f"   Temperature: {config.agent.temperature}")
    print(f"   Max Tokens: {config.agent.max_tokens or 'None'}")
    
-    print(f"\n🔹 Iteration")
+    print(f"
+🔹 Iteration")
    print(f"   Min Critic Rounds: {config.iteration.min_critic_rounds}")
    print(f"   Max Critic Rounds: {config.iteration.max_critic_rounds}")
    print(f"   Approval Threshold: {config.iteration.approval_threshold}")
    
-    print(f"\n🔹 Output")
+    print(f"
+🔹 Output")
    print(f"   Format: {config.output.format}")
    print(f"   Include TOC: {config.output.include_toc}")
    print(f"   Output Dir: {config.output.output_dir}")
    
-    print(f"\n🔹 Integrations")
+    print(f"
+🔹 Integrations")
    print(f"   GitHub Token: {'✓ Set' if config.github_token else '✗ Not Set'}")
    print(f"   API Key: {'✓ Set' if config.agent.api_key else '✗ Not Set'}")
    
    if args.show_keys:
-        print(f"\n🔹 API Keys (unmasked)")
+        print(f"
+🔹 API Keys (unmasked)")
        print(f"   OPENAI_API_KEY: {os.environ.get('OPENAI_API_KEY', 'Not Set')[:20]}...")
        print(f"   MINIMAX_API_KEY: {os.environ.get('MINIMAX_API_KEY', 'Not Set')[:20]}...")
        print(f"   GITHUB_TOKEN: {os.environ.get('GITHUB_TOKEN', 'Not Set')[:20]}...")
    
    if args.env:
-        print(f"\n📋 ENVIRONMENT VARIABLES NEEDED:")
+        print(f"
+📋 ENVIRONMENT VARIABLES NEEDED:")
        print("-" * 40)
        print("OPENAI_API_KEY=sk-...  # Required for LLM")
        print("GITHUB_TOKEN=ghp_...   # For private repos")
@@ -321,12 +321,12 @@ Generate a detailed outline with:
        content: Optional[RawContent] = None,
        sources: Optional[list[dict]] = None,
    ) -> OpusState:
-        """Ingest raw content from multiple sources.
-        
-        Args:
-            content: Pre-loaded raw content
-            sources: List of source configurations (github, local, s3)
-        """
+        """Ingest raw content from multiple sources."""
+        # Skip if we already have content and weren't given specific new sources/content
+        if self.state and self.state.raw_content and not content and not sources:
+            print("ℹ️  Using existing raw content.")
+            return self.state
+
        if sources:
            from opus_orchestrator.utils.multi_source_ingest import ingest_multiple
            
@@ -335,7 +335,6 @@ Generate a detailed outline with:
            result = await ingest_multiple(
                sources=sources,
                github_token=self.config.github_token,
-                # AWS keys would come from environment
            )
            
            content = RawContent(
@@ -399,29 +398,26 @@ Generate a detailed outline with:
    # =========================================================================

    async def snowflake_stage_1(self) -> str:
-        """Stage 1: One sentence summary.
-        
-        Take your one-paragraph story summary and cut it down to one sentence.
-        """
+        """Stage 1: One sentence summary."""
        print("❄️ SNOWFLAKE STAGE 1: One sentence summary...")
        
        raw_content = self.state.raw_content.text if self.state.raw_content else ""
        
-        user_prompt = f"""Create a ONE SENTENCE summary of this story concept.
+        user_prompt = f"""You are analyzing a collection of source materials to synthesize a new story.
+        
+## SOURCE CONTENT:
+{raw_content}

-The sentence should contain:
- Protagonist's name (or descriptor)
+## TASK:
+Synthesize the core narrative conflict and outcome from the source content into ONE compelling sentence.
+
+The sentence must contain:
+- Protagonist's name or descriptor
 - Their goal
- The conflict/obstacle
+- The central conflict/obstacle
 - The stakes

 Example: "In a world where magic is forbidden, a young mage must master forbidden arts to save her dying brother, even if it means sparking a war with the ruling theocracy."
-
-## Your seed content:
-{raw_content}
-
-## Task:
-Write ONE compelling sentence that captures the entire story.
 """
        response = await self.agents["architect"].call_llm(
            system_prompt="You are an expert story architect. Create concise, compelling summaries.",
@@ -67,7 +67,8 @@ class OpusPydanticAgent:
        # Build system prompt
        system_prompt = self.system_prompt or """You are an expert writer and editor for Opus Orchestrator.
 You produce high-quality, structured output that conforms to the given schema.
-Always follow best practices for the content type you're creating."""
+Always follow best practices for the content type you're creating.
+IMPORTANT: You must respond ONLY in English. Do not use any other language."""
        
        if self.result_type:
            self._agent = Agent(
@@ -19,14 +19,12 @@ class GitHubIngestor:
        self.token = token or os.environ.get("GITHUB_TOKEN")
        
        # Token is optional - only required for private repos
-        # Public repos can be accessed without authentication
        if self.token:
            self.headers = {
                "Authorization": f"token {self.token}",
                "Accept": "application/vnd.github.v3+json",
            }
        else:
-            # No token - use unauthenticated requests (rate limited)
            self.headers = {
                "Accept": "application/vnd.github.v3+json",
            }
@@ -34,34 +32,22 @@ class GitHubIngestor:
        
        self.base_url = "https://api.github.com"
    
-    def get_contents(self, repo: str, path: str = "") -> list[dict]:
-        """Get contents of a directory or file.
-        
-        Args:
-            repo: "owner/repo" format
-            path: directory path (default: root)
-            
-        Returns:
-            List of content items
-        """
+    def get_contents(self, repo: str, path: str = "", branch: Optional[str] = None) -> list[dict]:
+        """Get contents of a directory or file."""
        url = f"{self.base_url}/repos/{repo}/contents/{path}"
+        if branch:
+            url += f"?ref={branch}"
        
        response = requests.get(url, headers=self.headers)
        response.raise_for_status()
        
        return response.json()
    
-    def get_file_content(self, repo: str, path: str) -> str:
-        """Get content of a single file.
-        
-        Args:
-            repo: "owner/repo" format
-            path: file path
-            
-        Returns:
-            Decoded file content
-        """
+    def get_file_content(self, repo: str, path: str, branch: Optional[str] = None) -> str:
+        """Get content of a single file."""
        url = f"{self.base_url}/repos/{repo}/contents/{path}"
+        if branch:
+            url += f"?ref={branch}"
        
        response = requests.get(url, headers=self.headers)
        response.raise_for_status()
@@ -78,43 +64,34 @@ class GitHubIngestor:
    def get_all_files(
        self,
        repo: str,
+        branch: Optional[str] = None,
+        path: str = "",
        extensions: Optional[list[str]] = None,
        exclude_dirs: Optional[list[str]] = None,
        include_all: bool = True,
    ) -> dict[str, str]:
-        """Get all files from a repository - INCLUDING SOURCE CODE.
-        
-        The AI witnesses EVERYTHING and transforms it into documentation.
-        Don't filter what the AI can see - let it decide what's relevant.
-        
-        Args:
-            repo: "owner/repo" format
-            extensions: File extensions to include (None = ALL files!)
-            exclude_dirs: Directories to exclude (build artifacts, etc.)
-            include_all: If True, include ALL files (default True!)
-            
-        Returns:
-            Dictionary mapping file paths to content
-        """
-        # Default: include ALL files - the AI will witness everything!
+        """Get all files from a repository."""
        if include_all:
-            extensions = None  # No extension filter
-            exclude_dirs = exclude_dirs or [".git", "node_modules", "__pycache__", ".github", "dist", "build", "*.egg-info"]
+            extensions = None
+            exclude_dirs = exclude_dirs or [".git", "node_modules", "__pycache__", ".github", "dist", "build"]
        else:
-            extensions = extensions or [".md", ".txt", ".text", ".notes", ".draft", ".rst"]
-            exclude_dirs = exclude_dirs or [".git", "node_modules", "__pycache__", ".github"]
+            extensions = extensions or [".md", ".txt"]
+            exclude_dirs = exclude_dirs or [".git", "node_modules"]
        
        files = {}
        
-        def walk_directory(path: str = ""):
-            contents = self.get_contents(repo, path)
+        def walk_directory(current_path: str = ""):
+            try:
+                contents = self.get_contents(repo, current_path, branch)
+            except Exception as e:
+                print(f"Error walking {current_path}: {e}")
+                return
            
            if isinstance(contents, dict):
-                # Single file
                if contents.get("type") == "file":
                    content_path = contents["path"]
                    if self._should_include(content_path, extensions, exclude_dirs, include_all):
-                        files[content_path] = self.get_file_content(repo, content_path)
+                        files[content_path] = self.get_file_content(repo, content_path, branch)
                return
            
            for item in contents:
@@ -122,14 +99,16 @@ class GitHubIngestor:
                item_type = item.get("type")
                
                if item_type == "dir":
-                    # Check if excluded
                    if not any(excl in item_path for excl in exclude_dirs):
                        walk_directory(item_path)
                elif item_type == "file":
                    if self._should_include(item_path, extensions, exclude_dirs, include_all):
-                        files[item_path] = self.get_file_content(repo, item_path)
+                        try:
+                            files[item_path] = self.get_file_content(repo, item_path, branch)
+                        except Exception as e:
+                            print(f"Error reading {item_path}: {e}")
        
-        walk_directory()
+        walk_directory(path)
        return files
    
    def _should_include(
@@ -139,73 +118,41 @@ class GitHubIngestor:
        exclude_dirs: list[str],
        include_all: bool = True,
    ) -> bool:
-        """Check if file should be included.
-        
-        Args:
-            path: File path to check
-            extensions: List of extensions (None if include_all=True)
-            exclude_dirs: Directories to exclude
-            include_all: Include ALL files (ignore extensions)
-        """
-        # Exclude directories
+        """Check if file should be included."""
        for excl in exclude_dirs:
            if excl in path:
                return False
-        
-        # If include_all, include everything
        if include_all:
            return True
-        
-        # Otherwise check extensions
        if extensions:
            return any(path.endswith(ext) for ext in extensions)
-        
        return True
    
    def extract_text_from_files(self, files: dict[str, str]) -> str:
-        """Combine all file contents into a single text blob.
-        
-        Args:
-            files: Dictionary of filename -> content
-            
-        Returns:
-            Combined text
-        """
+        """Combine all file contents."""
        combined = []
-        
        for filename, content in sorted(files.items()):
            combined.append(f"=== {filename} ===\n")
            combined.append(content)
            combined.append("\n\n")
-        
        return "".join(combined)
    
    def ingest_repo(
        self,
        repo: str,
+        branch: Optional[str] = None,
+        path: str = "",
        include_readme: bool = True,
    ) -> dict[str, Any]:
-        """Ingest a complete repository.
-        
-        Args:
-            repo: "owner/repo" format
-            include_readme: Include README.md files
-            
-        Returns:
-            Dictionary with files, combined_text, and metadata
-        """
-        # Get all markdown and text files
-        files = self.get_all_files(repo)
-        
-        # Optionally exclude README
+        """Ingest a complete repository."""
+        files = self.get_all_files(repo, branch, path)
        if not include_readme:
            files = {k: v for k, v in files.items() if "README" not in k}
-        
-        # Combine into single text
        combined = self.extract_text_from_files(files)
-        
        return {
            "repo": repo,
+            "branch": branch,
+            "path": path,
            "files": files,
            "combined_text": combined,
            "file_count": len(files),
@@ -83,6 +83,9 @@ class LLMClient:
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
+
+        # STRICT ENGLISH ENFORCEMENT
+        system_prompt += "\n\nIMPORTANT: You must respond ONLY in English. Do not use Chinese characters or any other language under any circumstances."
        
        if self.provider == "minimax":
            return self._complete_minimax_sync(
@@ -110,6 +113,10 @@ class LLMClient:
                "Content-Type": "application/json",
            }
            
+            # STRICT ENGLISH ENFORCEMENT
+            nonlocal system_prompt
+            system_prompt += "\n\nIMPORTANT: You must respond ONLY in English. Do not use Chinese characters or any other language under any circumstances."
+            
            if self.provider == "minimax":
                return await self._complete_minimax_async(
                    system_prompt, user_prompt, temperature, max_tokens, headers
@@ -140,8 +147,8 @@ class LLMClient:
        # Anthropic-compatible format
        payload = {
            "model": self.minimax_model,
+            "system": system_prompt,
            "messages": [
-                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            "temperature": temperature,
@@ -167,8 +174,15 @@ class LLMClient:
        
        # Handle Anthropic-compatible response format
        if "content" in data:
-            # Return the text content
            if isinstance(data["content"], list) and len(data["content"]) > 0:
+                # Look for text content, skip thinking
+                text_parts = []
+                for item in data["content"]:
+                    if item.get("type") == "text":
+                        text_parts.append(item.get("text", ""))
+                if text_parts:
+                    return "".join(text_parts)
+                # If no text found, return first item's text or the item itself
                return data["content"][0].get("text", str(data["content"][0]))
            return str(data["content"])
        else:
@@ -224,8 +238,8 @@ class LLMClient:
        """Call MiniMax API (sync) using Anthropic-compatible endpoint."""
        payload = {
            "model": self.minimax_model,
+            "system": system_prompt,
            "messages": [
-                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            "temperature": temperature,
@@ -5,7 +5,7 @@ Merges and deduplicates content intelligently.
 """

 from dataclasses import dataclass, field
-from typing import Optional, list
+from typing import Optional, List
 from enum import Enum
 import hashlib

@@ -144,7 +144,7 @@ class MultiSourceIngestor:
        
        ingestor = GitHubIngestor(token=self.github_token)
        
-        content = await ingestor.ingest_repo(
+        content = ingestor.ingest_repo(
            repo=source.repo,
            branch=source.branch or "main",
            path=source.path or "",