Files
opus-orchestrator-ai/opus_orchestrator/agents/research.py
T

339 lines
10 KiB
Python
Raw Normal View History

"""Research Agent for Opus Orchestrator.
Enhanced nonfiction agent with live research capabilities.
"""
import os
from typing import Any, Optional
from dotenv import load_dotenv
from opus_orchestrator.agents.base import BaseAgent, AgentResponse
from opus_orchestrator.utils.research import (
ResearchOrchestrator,
create_research_orchestrator,
SearchTool,
WikipediaTool,
AcademicSearchTool,
)
# System prompt for research agent
RESEARCH_AGENT_SYSTEM_PROMPT = """## Role: Research Agent with Live Web Access
You are The Researcher — an AI agent with live access to the internet, academic databases, and research tools.
## Your Capabilities
1. **Web Search** - Search the current web for latest information
2. **Wikipedia** - Access encyclopedic knowledge
3. **Academic Search** - Find peer-reviewed papers (CrossRef, Semantic Scholar)
4. **Innovation Detection** - Identify gaps and new ideas beyond training data
## Your Mission
NOT just verify facts — **DISCOVER new information, trends, and innovations**.
- Find what's NEW since your training cutoff
- Identify research gaps and opportunities
- Connect disparate ideas into novel insights
- Go beyond what you "know" to what you can FIND
## Research Process
1. **Explore** - Broad search on topic
2. **Deep Dive** - Specific searches on subtopics
3. **Cross-Reference** - Find connections between sources
4. **Innovate** - Generate original insights beyond training data
## Output Format
Provide your research in this structure:
```
## Findings (What you discovered)
- [New information 1]
- [New information 2]
- [Latest developments]
## Sources (Where you found it)
- [URL 1]: [Title]
- [URL 2]: [Title]
## Innovations (Original insights beyond training data)
- [Novel connection 1]
- [Novel connection 2]
## Research Gaps (What's not well-covered)
- [Gap 1]
- [Gap 2]
```
## Remember
You're not just fact-checking — you're RESEARCHING. Actively seek new information,
challenge assumptions, and generate original ideas. This keeps the content fresh
and prevents "AI slop" from repetitive training data patterns.
"""
class ResearchAgent(BaseAgent):
"""Enhanced research agent with live web access and innovation detection."""
def __init__(
self,
config=None,
search_provider: str = "tavily",
use_wikipedia: bool = True,
use_academic: bool = True,
):
"""Initialize research agent with tools.
Args:
config: Agent configuration
search_provider: Search provider (tavily, serper, brave, duckduckgo)
use_wikipedia: Include Wikipedia search
use_academic: Include academic search
"""
# Initialize research tools
self.research = create_research_orchestrator(
search_provider=search_provider,
use_wikipedia=use_wikipedia,
use_academic=use_academic,
)
self.search_tool = SearchTool(provider=search_provider)
self.wikipedia = WikipediaTool() if use_wikipedia else None
self.academic = AcademicSearchTool() if use_academic else None
super().__init__(
role="Research Agent",
description="Live web research with innovation detection",
system_prompt=RESEARCH_AGENT_SYSTEM_PROMPT,
config=config,
)
async def execute(self, input_data: Any, context: dict[str, Any]) -> AgentResponse:
"""Execute research task with live tools.
Args:
input_data: Research query and parameters
context: Additional context
Returns:
Research findings with sources and innovations
"""
# Extract query
if isinstance(input_data, dict):
query = input_data.get("query", "")
subtopics = input_data.get("subtopics", [])
deep = input_data.get("deep_research", False)
else:
query = str(input_data)
subtopics = []
deep = False
if not query:
return AgentResponse(
success=False,
output=None,
error="No research query provided",
metadata={"role": "Research Agent"},
)
try:
# Perform research
if deep or subtopics:
# Deep research with subtopics
results = self.research.deep_research(query, subtopics)
else:
# Quick comprehensive search
results = self.research.comprehensive_search(query)
# Format results for LLM
research_summary = self._format_research_for_llm(results)
# Use LLM to synthesize and provide analysis
synthesis = await self.call_llm(
system_prompt=self.build_system_prompt(context),
user_prompt=f"""Based on this research data, provide analysis and insights:
{research_summary}
Task: {query}
Provide:
1. Key findings synthesized
2. Most important innovations/discoveries
3. How this goes beyond typical training data
4. Recommendations for the manuscript""",
)
return AgentResponse(
success=True,
output={
"raw_results": results,
"synthesis": synthesis,
"query": query,
},
metadata={
"role": "Research Agent",
"search_provider": self.research.search.provider,
},
)
except Exception as e:
return AgentResponse(
success=False,
output=None,
error=f"Research failed: {str(e)}",
metadata={"role": "Research Agent"},
)
def _format_research_for_llm(self, results: dict) -> str:
"""Format research results for LLM consumption."""
output = []
# Query
output.append(f"# Research Query: {results.get('query', '')}")
output.append(f"Timestamp: {results.get('timestamp', '')}")
output.append("")
# Web results
web = results.get("web", [])
if web:
output.append("## Web Search Results")
for i, r in enumerate(web[:5], 1):
output.append(f"{i}. **{r.get('title', '')}**")
output.append(f" URL: {r.get('url', '')}")
output.append(f" {r.get('content', '')[:200]}...")
output.append("")
# Wikipedia
wiki = results.get("wikipedia", [])
if wiki:
output.append("## Wikipedia Results")
for r in wiki[:3]:
output.append(f"- {r.get('title', '')}: {r.get('summary', '')[:200]}...")
output.append("")
# Academic
academic = results.get("academic", [])
if academic:
output.append("## Academic Papers")
for r in academic[:5]:
output.append(f"- {r.get('title', '')} ({r.get('year', 'N/A')})")
output.append(f" {r.get('journal', '')}")
output.append("")
# Innovations
innovations = results.get("innovations", [])
if innovations:
output.append("## Innovations & New Ideas")
for i in innovations:
output.append(f"- {i}")
output.append("")
return "\n".join(output)
# Fact-checking with live verification
class VerifiedFactChecker:
"""Fact checker with live source verification."""
def __init__(self, search_provider: str = "tavily"):
"""Initialize verified fact checker."""
self.search = SearchTool(provider=search_provider)
self.wikipedia = WikipediaTool()
async def verify_claim(
self,
claim: str,
context: str = "",
) -> dict:
"""Verify a factual claim against live sources.
Args:
claim: The claim to verify
context: Additional context
Returns:
Verification result with confidence and sources
"""
# Search for the claim
results = self.search.search(claim, num_results=5)
# Check Wikipedia
wiki_results = self.wikipedia.search(claim, num_results=2)
# Analyze
supporting = []
contradicting = []
neutral = []
for r in results:
content = r.get("content", "").lower()
claim_lower = claim.lower()
# Simple keyword matching
claim_words = set(claim_lower.split())
content_words = set(content.split())
overlap = claim_words & content_words
if len(overlap) > len(claim_words) * 0.7:
supporting.append(r)
elif "not" in content or "false" in content or "incorrect" in content:
contradicting.append(r)
else:
neutral.append(r)
# Calculate confidence
total = len(supporting) + len(contradicting) + len(neutral)
if total == 0:
confidence = 0.0
else:
confidence = len(supporting) / total
return {
"claim": claim,
"verified": len(supporting) > 0,
"confidence": confidence,
"supporting_sources": supporting,
"contradicting_sources": contradicting,
"neutral_sources": neutral,
"needs_citation": confidence < 0.8,
}
async def verify_batch(
self,
claims: list[str],
) -> list[dict]:
"""Verify multiple claims.
Args:
claims: List of claims to verify
Returns:
List of verification results
"""
results = []
for claim in claims:
result = await self.verify_claim(claim)
results.append(result)
return results
def create_research_agent(
search_provider: str = "tavily",
) -> ResearchAgent:
"""Factory to create a research agent.
Args:
search_provider: Search provider
Returns:
Configured ResearchAgent
"""
return ResearchAgent(search_provider=search_provider)