Files
opus-orchestrator-ai/opus_orchestrator/agents/research.py
T

339 lines
10 KiB
Python

"""Research Agent for Opus Orchestrator.
Enhanced nonfiction agent with live research capabilities.
"""
import os
from typing import Any, Optional
from dotenv import load_dotenv
from opus_orchestrator.agents.base import BaseAgent, AgentResponse
from opus_orchestrator.utils.research import (
ResearchOrchestrator,
create_research_orchestrator,
SearchTool,
WikipediaTool,
AcademicSearchTool,
)
# System prompt for research agent
RESEARCH_AGENT_SYSTEM_PROMPT = """## Role: Research Agent with Live Web Access
You are The Researcher — an AI agent with live access to the internet, academic databases, and research tools.
## Your Capabilities
1. **Web Search** - Search the current web for latest information
2. **Wikipedia** - Access encyclopedic knowledge
3. **Academic Search** - Find peer-reviewed papers (CrossRef, Semantic Scholar)
4. **Innovation Detection** - Identify gaps and new ideas beyond training data
## Your Mission
NOT just verify facts — **DISCOVER new information, trends, and innovations**.
- Find what's NEW since your training cutoff
- Identify research gaps and opportunities
- Connect disparate ideas into novel insights
- Go beyond what you "know" to what you can FIND
## Research Process
1. **Explore** - Broad search on topic
2. **Deep Dive** - Specific searches on subtopics
3. **Cross-Reference** - Find connections between sources
4. **Innovate** - Generate original insights beyond training data
## Output Format
Provide your research in this structure:
```
## Findings (What you discovered)
- [New information 1]
- [New information 2]
- [Latest developments]
## Sources (Where you found it)
- [URL 1]: [Title]
- [URL 2]: [Title]
## Innovations (Original insights beyond training data)
- [Novel connection 1]
- [Novel connection 2]
## Research Gaps (What's not well-covered)
- [Gap 1]
- [Gap 2]
```
## Remember
You're not just fact-checking — you're RESEARCHING. Actively seek new information,
challenge assumptions, and generate original ideas. This keeps the content fresh
and prevents "AI slop" from repetitive training data patterns.
"""
class ResearchAgent(BaseAgent):
"""Enhanced research agent with live web access and innovation detection."""
def __init__(
self,
config=None,
search_provider: str = "tavily",
use_wikipedia: bool = True,
use_academic: bool = True,
):
"""Initialize research agent with tools.
Args:
config: Agent configuration
search_provider: Search provider (tavily, serper, brave, duckduckgo)
use_wikipedia: Include Wikipedia search
use_academic: Include academic search
"""
# Initialize research tools
self.research = create_research_orchestrator(
search_provider=search_provider,
use_wikipedia=use_wikipedia,
use_academic=use_academic,
)
self.search_tool = SearchTool(provider=search_provider)
self.wikipedia = WikipediaTool() if use_wikipedia else None
self.academic = AcademicSearchTool() if use_academic else None
super().__init__(
role="Research Agent",
description="Live web research with innovation detection",
system_prompt=RESEARCH_AGENT_SYSTEM_PROMPT,
config=config,
)
async def execute(self, input_data: Any, context: dict[str, Any]) -> AgentResponse:
"""Execute research task with live tools.
Args:
input_data: Research query and parameters
context: Additional context
Returns:
Research findings with sources and innovations
"""
# Extract query
if isinstance(input_data, dict):
query = input_data.get("query", "")
subtopics = input_data.get("subtopics", [])
deep = input_data.get("deep_research", False)
else:
query = str(input_data)
subtopics = []
deep = False
if not query:
return AgentResponse(
success=False,
output=None,
error="No research query provided",
metadata={"role": "Research Agent"},
)
try:
# Perform research
if deep or subtopics:
# Deep research with subtopics
results = self.research.deep_research(query, subtopics)
else:
# Quick comprehensive search
results = self.research.comprehensive_search(query)
# Format results for LLM
research_summary = self._format_research_for_llm(results)
# Use LLM to synthesize and provide analysis
synthesis = await self.call_llm(
system_prompt=self.build_system_prompt(context),
user_prompt=f"""Based on this research data, provide analysis and insights:
{research_summary}
Task: {query}
Provide:
1. Key findings synthesized
2. Most important innovations/discoveries
3. How this goes beyond typical training data
4. Recommendations for the manuscript""",
)
return AgentResponse(
success=True,
output={
"raw_results": results,
"synthesis": synthesis,
"query": query,
},
metadata={
"role": "Research Agent",
"search_provider": self.research.search.provider,
},
)
except Exception as e:
return AgentResponse(
success=False,
output=None,
error=f"Research failed: {str(e)}",
metadata={"role": "Research Agent"},
)
def _format_research_for_llm(self, results: dict) -> str:
"""Format research results for LLM consumption."""
output = []
# Query
output.append(f"# Research Query: {results.get('query', '')}")
output.append(f"Timestamp: {results.get('timestamp', '')}")
output.append("")
# Web results
web = results.get("web", [])
if web:
output.append("## Web Search Results")
for i, r in enumerate(web[:5], 1):
output.append(f"{i}. **{r.get('title', '')}**")
output.append(f" URL: {r.get('url', '')}")
output.append(f" {r.get('content', '')[:200]}...")
output.append("")
# Wikipedia
wiki = results.get("wikipedia", [])
if wiki:
output.append("## Wikipedia Results")
for r in wiki[:3]:
output.append(f"- {r.get('title', '')}: {r.get('summary', '')[:200]}...")
output.append("")
# Academic
academic = results.get("academic", [])
if academic:
output.append("## Academic Papers")
for r in academic[:5]:
output.append(f"- {r.get('title', '')} ({r.get('year', 'N/A')})")
output.append(f" {r.get('journal', '')}")
output.append("")
# Innovations
innovations = results.get("innovations", [])
if innovations:
output.append("## Innovations & New Ideas")
for i in innovations:
output.append(f"- {i}")
output.append("")
return "\n".join(output)
# Fact-checking with live verification
class VerifiedFactChecker:
"""Fact checker with live source verification."""
def __init__(self, search_provider: str = "tavily"):
"""Initialize verified fact checker."""
self.search = SearchTool(provider=search_provider)
self.wikipedia = WikipediaTool()
async def verify_claim(
self,
claim: str,
context: str = "",
) -> dict:
"""Verify a factual claim against live sources.
Args:
claim: The claim to verify
context: Additional context
Returns:
Verification result with confidence and sources
"""
# Search for the claim
results = self.search.search(claim, num_results=5)
# Check Wikipedia
wiki_results = self.wikipedia.search(claim, num_results=2)
# Analyze
supporting = []
contradicting = []
neutral = []
for r in results:
content = r.get("content", "").lower()
claim_lower = claim.lower()
# Simple keyword matching
claim_words = set(claim_lower.split())
content_words = set(content.split())
overlap = claim_words & content_words
if len(overlap) > len(claim_words) * 0.7:
supporting.append(r)
elif "not" in content or "false" in content or "incorrect" in content:
contradicting.append(r)
else:
neutral.append(r)
# Calculate confidence
total = len(supporting) + len(contradicting) + len(neutral)
if total == 0:
confidence = 0.0
else:
confidence = len(supporting) / total
return {
"claim": claim,
"verified": len(supporting) > 0,
"confidence": confidence,
"supporting_sources": supporting,
"contradicting_sources": contradicting,
"neutral_sources": neutral,
"needs_citation": confidence < 0.8,
}
async def verify_batch(
self,
claims: list[str],
) -> list[dict]:
"""Verify multiple claims.
Args:
claims: List of claims to verify
Returns:
List of verification results
"""
results = []
for claim in claims:
result = await self.verify_claim(claim)
results.append(result)
return results
def create_research_agent(
search_provider: str = "tavily",
) -> ResearchAgent:
"""Factory to create a research agent.
Args:
search_provider: Search provider
Returns:
Configured ResearchAgent
"""
return ResearchAgent(search_provider=search_provider)