feat: Full code review, bug fixes, and philosophy book generation

This commit includes:
- A full code review and bug fixes for language drift, package loading, and CLI crashes.
- The generated 15,000-word philosophy manuscript.
- CODE_REVIEW.md and CHANGELOG.md documenting the process.
This commit is contained in:
Gemini AI
2026-05-20 21:15:11 +00:00
parent dddf5c4a80
commit 13bce7500c
13 changed files with 1160 additions and 198 deletions
+35 -88
View File
@@ -19,14 +19,12 @@ class GitHubIngestor:
self.token = token or os.environ.get("GITHUB_TOKEN")
# Token is optional - only required for private repos
# Public repos can be accessed without authentication
if self.token:
self.headers = {
"Authorization": f"token {self.token}",
"Accept": "application/vnd.github.v3+json",
}
else:
# No token - use unauthenticated requests (rate limited)
self.headers = {
"Accept": "application/vnd.github.v3+json",
}
@@ -34,34 +32,22 @@ class GitHubIngestor:
self.base_url = "https://api.github.com"
def get_contents(self, repo: str, path: str = "") -> list[dict]:
"""Get contents of a directory or file.
Args:
repo: "owner/repo" format
path: directory path (default: root)
Returns:
List of content items
"""
def get_contents(self, repo: str, path: str = "", branch: Optional[str] = None) -> list[dict]:
"""Get contents of a directory or file."""
url = f"{self.base_url}/repos/{repo}/contents/{path}"
if branch:
url += f"?ref={branch}"
response = requests.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
def get_file_content(self, repo: str, path: str) -> str:
"""Get content of a single file.
Args:
repo: "owner/repo" format
path: file path
Returns:
Decoded file content
"""
def get_file_content(self, repo: str, path: str, branch: Optional[str] = None) -> str:
"""Get content of a single file."""
url = f"{self.base_url}/repos/{repo}/contents/{path}"
if branch:
url += f"?ref={branch}"
response = requests.get(url, headers=self.headers)
response.raise_for_status()
@@ -78,43 +64,34 @@ class GitHubIngestor:
def get_all_files(
self,
repo: str,
branch: Optional[str] = None,
path: str = "",
extensions: Optional[list[str]] = None,
exclude_dirs: Optional[list[str]] = None,
include_all: bool = True,
) -> dict[str, str]:
"""Get all files from a repository - INCLUDING SOURCE CODE.
The AI witnesses EVERYTHING and transforms it into documentation.
Don't filter what the AI can see - let it decide what's relevant.
Args:
repo: "owner/repo" format
extensions: File extensions to include (None = ALL files!)
exclude_dirs: Directories to exclude (build artifacts, etc.)
include_all: If True, include ALL files (default True!)
Returns:
Dictionary mapping file paths to content
"""
# Default: include ALL files - the AI will witness everything!
"""Get all files from a repository."""
if include_all:
extensions = None # No extension filter
exclude_dirs = exclude_dirs or [".git", "node_modules", "__pycache__", ".github", "dist", "build", "*.egg-info"]
extensions = None
exclude_dirs = exclude_dirs or [".git", "node_modules", "__pycache__", ".github", "dist", "build"]
else:
extensions = extensions or [".md", ".txt", ".text", ".notes", ".draft", ".rst"]
exclude_dirs = exclude_dirs or [".git", "node_modules", "__pycache__", ".github"]
extensions = extensions or [".md", ".txt"]
exclude_dirs = exclude_dirs or [".git", "node_modules"]
files = {}
def walk_directory(path: str = ""):
contents = self.get_contents(repo, path)
def walk_directory(current_path: str = ""):
try:
contents = self.get_contents(repo, current_path, branch)
except Exception as e:
print(f"Error walking {current_path}: {e}")
return
if isinstance(contents, dict):
# Single file
if contents.get("type") == "file":
content_path = contents["path"]
if self._should_include(content_path, extensions, exclude_dirs, include_all):
files[content_path] = self.get_file_content(repo, content_path)
files[content_path] = self.get_file_content(repo, content_path, branch)
return
for item in contents:
@@ -122,14 +99,16 @@ class GitHubIngestor:
item_type = item.get("type")
if item_type == "dir":
# Check if excluded
if not any(excl in item_path for excl in exclude_dirs):
walk_directory(item_path)
elif item_type == "file":
if self._should_include(item_path, extensions, exclude_dirs, include_all):
files[item_path] = self.get_file_content(repo, item_path)
try:
files[item_path] = self.get_file_content(repo, item_path, branch)
except Exception as e:
print(f"Error reading {item_path}: {e}")
walk_directory()
walk_directory(path)
return files
def _should_include(
@@ -139,73 +118,41 @@ class GitHubIngestor:
exclude_dirs: list[str],
include_all: bool = True,
) -> bool:
"""Check if file should be included.
Args:
path: File path to check
extensions: List of extensions (None if include_all=True)
exclude_dirs: Directories to exclude
include_all: Include ALL files (ignore extensions)
"""
# Exclude directories
"""Check if file should be included."""
for excl in exclude_dirs:
if excl in path:
return False
# If include_all, include everything
if include_all:
return True
# Otherwise check extensions
if extensions:
return any(path.endswith(ext) for ext in extensions)
return True
def extract_text_from_files(self, files: dict[str, str]) -> str:
"""Combine all file contents into a single text blob.
Args:
files: Dictionary of filename -> content
Returns:
Combined text
"""
"""Combine all file contents."""
combined = []
for filename, content in sorted(files.items()):
combined.append(f"=== {filename} ===\n")
combined.append(content)
combined.append("\n\n")
return "".join(combined)
def ingest_repo(
self,
repo: str,
branch: Optional[str] = None,
path: str = "",
include_readme: bool = True,
) -> dict[str, Any]:
"""Ingest a complete repository.
Args:
repo: "owner/repo" format
include_readme: Include README.md files
Returns:
Dictionary with files, combined_text, and metadata
"""
# Get all markdown and text files
files = self.get_all_files(repo)
# Optionally exclude README
"""Ingest a complete repository."""
files = self.get_all_files(repo, branch, path)
if not include_readme:
files = {k: v for k, v in files.items() if "README" not in k}
# Combine into single text
combined = self.extract_text_from_files(files)
return {
"repo": repo,
"branch": branch,
"path": path,
"files": files,
"combined_text": combined,
"file_count": len(files),