Add local file/directory ingestion support

- LocalIngestor class for files and directories
- CLI: opus ingest-local PATH
- Generate from local: opus generate --local ./my-notes/
- Support for extensions, recursive scanning, summarize
- Pattern-based exclusion (.git, __pycache__, etc.)
This commit is contained in:
2026-03-13 03:45:43 +00:00
parent a6b43cd623
commit 64cdabd2f8
3 changed files with 368 additions and 1 deletions
+120 -1
View File
@@ -269,6 +269,11 @@ Examples:
"--save-commit-msg",
help="Commit message for GitHub save",
)
gen_parser.add_argument(
"--local",
"-l",
help="Local file or directory to use as source",
)
gen_parser.add_argument(
"--use-crewai",
action="store_true",
@@ -376,6 +381,48 @@ Examples:
help="List objects instead of downloading",
)
# -------------------------------------------------------------------------
# INGEST-LOCAL COMMAND
# -------------------------------------------------------------------------
local_parser = subparsers.add_parser(
"ingest-local",
help="Ingest content from local files/directories",
description="Fetch and analyze content from local files and directories",
)
local_parser.add_argument(
"path",
help="File or directory path to ingest",
)
local_parser.add_argument(
"--extensions", "-e",
help="Comma-separated file extensions (default: txt,md,markdown,notes,draft)",
)
local_parser.add_argument(
"--no-recursive",
action="store_true",
help="Don't scan subdirectories",
)
local_parser.add_argument(
"--output", "-o",
help="Output file for ingested content",
)
local_parser.add_argument(
"--preview",
action="store_true",
help="Show preview of ingested content",
)
local_parser.add_argument(
"--summarize",
action="store_true",
help="Summarize content instead of full ingest",
)
local_parser.add_argument(
"--max-length",
type=int,
default=10000,
help="Max length for summary (default: 10000)",
)
# -------------------------------------------------------------------------
# FRAMEWORKS COMMAND
# -------------------------------------------------------------------------
@@ -491,7 +538,24 @@ async def run_generate(args: argparse.Namespace) -> int:
# Determine the seed concept
seed_concept = args.concept
if args.repo:
if args.local:
# Ingest from local files/directory
from opus_orchestrator import LocalIngestor
print(f"📂 Ingesting from local: {args.local}")
ingestor = LocalIngestor()
result = ingestor.ingest(args.local)
full_text = result["combined_text"]
print(f" ✅ Loaded {len(full_text):,} characters from {result['file_count']} files")
print(f" 📄 Files: {', '.join(list(result['files'].keys())[:5])}")
if result['file_count'] > 5:
print(f" ... and {result['file_count'] - 5} more")
print()
seed_concept = full_text
elif args.repo:
# Ingest from GitHub - use FULL content
print(f"📥 Ingesting from GitHub: {args.repo}")
@@ -800,6 +864,60 @@ def run_s3_ingest(args: argparse.Namespace) -> int:
return 0
def run_local_ingest(args: argparse.Namespace) -> int:
"""Ingest content from local files/directories."""
from opus_orchestrator import LocalIngestor
print(f"\n📂 Ingesting from local: {args.path}\n")
# Parse extensions
extensions = None
if args.extensions:
extensions = [ext.strip() for ext in args.extensions.split(",")]
# Create ingestor
ingestor = LocalIngestor()
# Ingest
result = ingestor.ingest(
path=args.path,
extensions=extensions,
recursive=not args.no_recursive,
)
if args.summarize:
content = ingestor.summarize(result["combined_text"], args.max_length)
else:
content = result["combined_text"]
print(f"✅ Loaded {result['total_chars']:,} characters")
print(f" Files: {result['file_count']}")
print(f" Root: {result['path']}")
files_list = list(result["files"].keys())
print(f" File list: {', '.join(files_list[:10])}")
if len(files_list) > 10:
print(f" ... and {len(files_list) - 10} more")
if args.summarize:
print(f" 📝 Summarized to {args.max_length} characters")
print()
if args.preview:
print("📄 PREVIEW (first 2000 chars):")
print("-" * 40)
print(content[:2000])
print("-" * 40)
if args.output:
with open(args.output, "w") as f:
f.write(content)
print(f"\n💾 Saved to: {args.output}")
return 0
def run_frameworks(args: argparse.Namespace) -> int:
"""List available frameworks."""
from opus_orchestrator.frameworks import FRAMEWORKS
@@ -919,6 +1037,7 @@ async def main_async(args: argparse.Namespace) -> int:
"serve": run_serve,
"ingest": run_ingest,
"ingest-s3": run_s3_ingest,
"ingest-local": run_local_ingest,
"frameworks": run_frameworks,
"config": run_config,
"docs": run_docs,