Add live research capabilities with innovation detection
Research Tools: - SearchTool: Multiple backends (Tavily, Serper, Brave, DuckDuckGo) - WikipediaTool: Wikipedia lookup - AcademicSearchTool: CrossRef, Semantic Scholar - ResearchOrchestrator: Comprehensive multi-source research ResearchAgent: - NOT just fact-checking - actively discovers NEW information - Identifies trends beyond training data cutoff - Generates innovations from cross-referencing sources - Deep research with subtopics VerifiedFactChecker: - Live claim verification against web sources - Confidence scoring - Citation needed detection Dependencies added: tavily, wikipedia, arxiv, duckduckgo-search
This commit is contained in:
@@ -0,0 +1,339 @@
|
||||
"""Research Agent for Opus Orchestrator.
|
||||
|
||||
Enhanced nonfiction agent with live research capabilities.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from opus_orchestrator.agents.base import BaseAgent, AgentResponse
|
||||
from opus_orchestrator.utils.research import (
|
||||
ResearchOrchestrator,
|
||||
create_research_orchestrator,
|
||||
SearchTool,
|
||||
WikipediaTool,
|
||||
AcademicSearchTool,
|
||||
)
|
||||
|
||||
|
||||
# System prompt for research agent
|
||||
RESEARCH_AGENT_SYSTEM_PROMPT = """## Role: Research Agent with Live Web Access
|
||||
|
||||
You are The Researcher — an AI agent with live access to the internet, academic databases, and research tools.
|
||||
|
||||
## Your Capabilities
|
||||
|
||||
1. **Web Search** - Search the current web for latest information
|
||||
2. **Wikipedia** - Access encyclopedic knowledge
|
||||
3. **Academic Search** - Find peer-reviewed papers (CrossRef, Semantic Scholar)
|
||||
4. **Innovation Detection** - Identify gaps and new ideas beyond training data
|
||||
|
||||
## Your Mission
|
||||
|
||||
NOT just verify facts — **DISCOVER new information, trends, and innovations**.
|
||||
|
||||
- Find what's NEW since your training cutoff
|
||||
- Identify research gaps and opportunities
|
||||
- Connect disparate ideas into novel insights
|
||||
- Go beyond what you "know" to what you can FIND
|
||||
|
||||
## Research Process
|
||||
|
||||
1. **Explore** - Broad search on topic
|
||||
2. **Deep Dive** - Specific searches on subtopics
|
||||
3. **Cross-Reference** - Find connections between sources
|
||||
4. **Innovate** - Generate original insights beyond training data
|
||||
|
||||
## Output Format
|
||||
|
||||
Provide your research in this structure:
|
||||
|
||||
```
|
||||
## Findings (What you discovered)
|
||||
- [New information 1]
|
||||
- [New information 2]
|
||||
- [Latest developments]
|
||||
|
||||
## Sources (Where you found it)
|
||||
- [URL 1]: [Title]
|
||||
- [URL 2]: [Title]
|
||||
|
||||
## Innovations (Original insights beyond training data)
|
||||
- [Novel connection 1]
|
||||
- [Novel connection 2]
|
||||
|
||||
## Research Gaps (What's not well-covered)
|
||||
- [Gap 1]
|
||||
- [Gap 2]
|
||||
```
|
||||
|
||||
## Remember
|
||||
|
||||
You're not just fact-checking — you're RESEARCHING. Actively seek new information,
|
||||
challenge assumptions, and generate original ideas. This keeps the content fresh
|
||||
and prevents "AI slop" from repetitive training data patterns.
|
||||
"""
|
||||
|
||||
|
||||
class ResearchAgent(BaseAgent):
|
||||
"""Enhanced research agent with live web access and innovation detection."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config=None,
|
||||
search_provider: str = "tavily",
|
||||
use_wikipedia: bool = True,
|
||||
use_academic: bool = True,
|
||||
):
|
||||
"""Initialize research agent with tools.
|
||||
|
||||
Args:
|
||||
config: Agent configuration
|
||||
search_provider: Search provider (tavily, serper, brave, duckduckgo)
|
||||
use_wikipedia: Include Wikipedia search
|
||||
use_academic: Include academic search
|
||||
"""
|
||||
# Initialize research tools
|
||||
self.research = create_research_orchestrator(
|
||||
search_provider=search_provider,
|
||||
use_wikipedia=use_wikipedia,
|
||||
use_academic=use_academic,
|
||||
)
|
||||
|
||||
self.search_tool = SearchTool(provider=search_provider)
|
||||
self.wikipedia = WikipediaTool() if use_wikipedia else None
|
||||
self.academic = AcademicSearchTool() if use_academic else None
|
||||
|
||||
super().__init__(
|
||||
role="Research Agent",
|
||||
description="Live web research with innovation detection",
|
||||
system_prompt=RESEARCH_AGENT_SYSTEM_PROMPT,
|
||||
config=config,
|
||||
)
|
||||
|
||||
async def execute(self, input_data: Any, context: dict[str, Any]) -> AgentResponse:
|
||||
"""Execute research task with live tools.
|
||||
|
||||
Args:
|
||||
input_data: Research query and parameters
|
||||
context: Additional context
|
||||
|
||||
Returns:
|
||||
Research findings with sources and innovations
|
||||
"""
|
||||
# Extract query
|
||||
if isinstance(input_data, dict):
|
||||
query = input_data.get("query", "")
|
||||
subtopics = input_data.get("subtopics", [])
|
||||
deep = input_data.get("deep_research", False)
|
||||
else:
|
||||
query = str(input_data)
|
||||
subtopics = []
|
||||
deep = False
|
||||
|
||||
if not query:
|
||||
return AgentResponse(
|
||||
success=False,
|
||||
output=None,
|
||||
error="No research query provided",
|
||||
metadata={"role": "Research Agent"},
|
||||
)
|
||||
|
||||
try:
|
||||
# Perform research
|
||||
if deep or subtopics:
|
||||
# Deep research with subtopics
|
||||
results = self.research.deep_research(query, subtopics)
|
||||
else:
|
||||
# Quick comprehensive search
|
||||
results = self.research.comprehensive_search(query)
|
||||
|
||||
# Format results for LLM
|
||||
research_summary = self._format_research_for_llm(results)
|
||||
|
||||
# Use LLM to synthesize and provide analysis
|
||||
synthesis = await self.call_llm(
|
||||
system_prompt=self.build_system_prompt(context),
|
||||
user_prompt=f"""Based on this research data, provide analysis and insights:
|
||||
|
||||
{research_summary}
|
||||
|
||||
Task: {query}
|
||||
|
||||
Provide:
|
||||
1. Key findings synthesized
|
||||
2. Most important innovations/discoveries
|
||||
3. How this goes beyond typical training data
|
||||
4. Recommendations for the manuscript""",
|
||||
)
|
||||
|
||||
return AgentResponse(
|
||||
success=True,
|
||||
output={
|
||||
"raw_results": results,
|
||||
"synthesis": synthesis,
|
||||
"query": query,
|
||||
},
|
||||
metadata={
|
||||
"role": "Research Agent",
|
||||
"search_provider": self.research.search.provider,
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return AgentResponse(
|
||||
success=False,
|
||||
output=None,
|
||||
error=f"Research failed: {str(e)}",
|
||||
metadata={"role": "Research Agent"},
|
||||
)
|
||||
|
||||
def _format_research_for_llm(self, results: dict) -> str:
|
||||
"""Format research results for LLM consumption."""
|
||||
output = []
|
||||
|
||||
# Query
|
||||
output.append(f"# Research Query: {results.get('query', '')}")
|
||||
output.append(f"Timestamp: {results.get('timestamp', '')}")
|
||||
output.append("")
|
||||
|
||||
# Web results
|
||||
web = results.get("web", [])
|
||||
if web:
|
||||
output.append("## Web Search Results")
|
||||
for i, r in enumerate(web[:5], 1):
|
||||
output.append(f"{i}. **{r.get('title', '')}**")
|
||||
output.append(f" URL: {r.get('url', '')}")
|
||||
output.append(f" {r.get('content', '')[:200]}...")
|
||||
output.append("")
|
||||
|
||||
# Wikipedia
|
||||
wiki = results.get("wikipedia", [])
|
||||
if wiki:
|
||||
output.append("## Wikipedia Results")
|
||||
for r in wiki[:3]:
|
||||
output.append(f"- {r.get('title', '')}: {r.get('summary', '')[:200]}...")
|
||||
output.append("")
|
||||
|
||||
# Academic
|
||||
academic = results.get("academic", [])
|
||||
if academic:
|
||||
output.append("## Academic Papers")
|
||||
for r in academic[:5]:
|
||||
output.append(f"- {r.get('title', '')} ({r.get('year', 'N/A')})")
|
||||
output.append(f" {r.get('journal', '')}")
|
||||
output.append("")
|
||||
|
||||
# Innovations
|
||||
innovations = results.get("innovations", [])
|
||||
if innovations:
|
||||
output.append("## Innovations & New Ideas")
|
||||
for i in innovations:
|
||||
output.append(f"- {i}")
|
||||
output.append("")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
# Fact-checking with live verification
|
||||
class VerifiedFactChecker:
|
||||
"""Fact checker with live source verification."""
|
||||
|
||||
def __init__(self, search_provider: str = "tavily"):
|
||||
"""Initialize verified fact checker."""
|
||||
self.search = SearchTool(provider=search_provider)
|
||||
self.wikipedia = WikipediaTool()
|
||||
|
||||
async def verify_claim(
|
||||
self,
|
||||
claim: str,
|
||||
context: str = "",
|
||||
) -> dict:
|
||||
"""Verify a factual claim against live sources.
|
||||
|
||||
Args:
|
||||
claim: The claim to verify
|
||||
context: Additional context
|
||||
|
||||
Returns:
|
||||
Verification result with confidence and sources
|
||||
"""
|
||||
# Search for the claim
|
||||
results = self.search.search(claim, num_results=5)
|
||||
|
||||
# Check Wikipedia
|
||||
wiki_results = self.wikipedia.search(claim, num_results=2)
|
||||
|
||||
# Analyze
|
||||
supporting = []
|
||||
contradicting = []
|
||||
neutral = []
|
||||
|
||||
for r in results:
|
||||
content = r.get("content", "").lower()
|
||||
claim_lower = claim.lower()
|
||||
|
||||
# Simple keyword matching
|
||||
claim_words = set(claim_lower.split())
|
||||
content_words = set(content.split())
|
||||
overlap = claim_words & content_words
|
||||
|
||||
if len(overlap) > len(claim_words) * 0.7:
|
||||
supporting.append(r)
|
||||
elif "not" in content or "false" in content or "incorrect" in content:
|
||||
contradicting.append(r)
|
||||
else:
|
||||
neutral.append(r)
|
||||
|
||||
# Calculate confidence
|
||||
total = len(supporting) + len(contradicting) + len(neutral)
|
||||
if total == 0:
|
||||
confidence = 0.0
|
||||
else:
|
||||
confidence = len(supporting) / total
|
||||
|
||||
return {
|
||||
"claim": claim,
|
||||
"verified": len(supporting) > 0,
|
||||
"confidence": confidence,
|
||||
"supporting_sources": supporting,
|
||||
"contradicting_sources": contradicting,
|
||||
"neutral_sources": neutral,
|
||||
"needs_citation": confidence < 0.8,
|
||||
}
|
||||
|
||||
async def verify_batch(
|
||||
self,
|
||||
claims: list[str],
|
||||
) -> list[dict]:
|
||||
"""Verify multiple claims.
|
||||
|
||||
Args:
|
||||
claims: List of claims to verify
|
||||
|
||||
Returns:
|
||||
List of verification results
|
||||
"""
|
||||
results = []
|
||||
for claim in claims:
|
||||
result = await self.verify_claim(claim)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def create_research_agent(
|
||||
search_provider: str = "tavily",
|
||||
) -> ResearchAgent:
|
||||
"""Factory to create a research agent.
|
||||
|
||||
Args:
|
||||
search_provider: Search provider
|
||||
|
||||
Returns:
|
||||
Configured ResearchAgent
|
||||
"""
|
||||
return ResearchAgent(search_provider=search_provider)
|
||||
Reference in New Issue
Block a user