seren-store/examples/web-researcher/agent.py at main · serenorg/seren-store · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
Web Researcher Agent

Researches topics by gathering information from the web and synthesizing
a comprehensive summary using LLM analysis.

Price: $0.05 per invocation (covers LLM API costs + compute)
"""

from seren_agent import agent
from seren_agent.llm import get_openai_client

# Alternative: Use Seren Publisher routing (no openai package needed, uses SEREN_API_KEY)
# from seren_agent.llm import get_seren_openai_client
# client = get_seren_openai_client()  # Routes through seren-models publisher


@agent(
    name="Web Researcher",
    description="Research any topic and get a comprehensive summary with sources. "
    "Provide a query and optionally specify depth (quick, moderate, thorough).",
    price="0.05",
)
def run(input: dict) -> dict:
    """
    Research a topic and return a structured summary.

    Input:
        query: str - The research question or topic
        depth: str - Research depth: "quick" (default), "moderate", or "thorough"
        max_sources: int - Maximum number of sources to include (default: 5)

    Output:
        summary: str - Comprehensive summary of findings
        key_points: list[str] - Bullet points of main findings
        sources: list[dict] - List of sources with title, url, relevance
        confidence: str - Confidence level in the research (low, medium, high)
    """
    query = input.get("query")
    if not query:
        return {"error": "Missing required field: query"}

    depth = input.get("depth", "quick")
    max_sources = input.get("max_sources", 5)

    # Validate depth
    if depth not in ("quick", "moderate", "thorough"):
        depth = "quick"

    # Configure research parameters based on depth
    depth_config = {
        "quick": {"iterations": 1, "model": "gpt-4o-mini"},
        "moderate": {"iterations": 2, "model": "gpt-4o"},
        "thorough": {"iterations": 3, "model": "gpt-4o"},
    }
    config = depth_config[depth]

    client = get_openai_client()

    # For this example, we simulate web research by using the LLM's knowledge
    # In production, this would integrate with a web search API (e.g., via Seren publisher)
    system_prompt = """You are a research assistant. Your task is to provide comprehensive
    information about the given topic. Structure your response as follows:

    1. A detailed summary (2-3 paragraphs)
    2. Key points (5-7 bullet points)
    3. Suggested sources (real, verifiable sources when possible)
    4. Confidence assessment

    Be factual, cite specific information, and acknowledge limitations in your knowledge."""

    # Perform research
    response = client.chat.completions.create(
        model=config["model"],
        messages=[
            {"role": "system", "content": system_prompt},
            {
                "role": "user",
                "content": f"Research the following topic thoroughly: {query}",
            },
        ],
        temperature=0.7,
    )

    research_text = response.choices[0].message.content

    # Parse and structure the response
    # In production, you'd use a more robust parsing approach
    key_points = []
    sources = []

    # Extract key points (lines starting with - or *)
    for line in research_text.split("\n"):
        line = line.strip()
        if line.startswith(("-", "*", "•")) and len(line) > 3:
            key_points.append(line.lstrip("-*• "))
            if len(key_points) >= 7:
                break

    # Generate source suggestions
    source_response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": "Generate a JSON array of relevant sources for the topic. "
                "Each source should have: title, url (realistic but may be example), "
                "and relevance (brief description). Return only valid JSON.",
            },
            {
                "role": "user",
                "content": f"Topic: {query}\n\nGenerate {max_sources} sources.",
            },
        ],
        temperature=0.5,
    )

    try:
        import json

        sources_text = source_response.choices[0].message.content
        # Clean up potential markdown code blocks
        if "```" in sources_text:
            sources_text = sources_text.split("```")[1]
            if sources_text.startswith("json"):
                sources_text = sources_text[4:]
        sources = json.loads(sources_text)[:max_sources]
    except (json.JSONDecodeError, IndexError):
        sources = [
            {
                "title": "General Reference",
                "url": "https://example.com",
                "relevance": "Primary research source",
            }
        ]

    # Determine confidence based on topic complexity and depth
    confidence = "medium"
    if depth == "thorough" and len(key_points) >= 5:
        confidence = "high"
    elif depth == "quick" or len(key_points) < 3:
        confidence = "low"

    return {
        "summary": research_text,
        "key_points": key_points[:7],
        "sources": sources,
        "confidence": confidence,
        "metadata": {
            "depth": depth,
            "model_used": config["model"],
            "query": query,
        },
    }


if __name__ == "__main__":
    # Local testing
    from seren_agent.testing import test_agent

    result = test_agent(
        run,
        {
            "query": "What are the latest developments in quantum computing?",
            "depth": "quick",
        },
        env={"OPENAI_API_KEY": "your-key-here"},
    )
    print(result)