Skip to content

Creating Custom Agents with Continuous Learning - Tutorial

Introduction

This tutorial shows you how to create domain-specific AI agents that improve through continuous learning. Unlike traditional fine-tuning, these agents learn from dozens of examples in minutes, not thousands of examples over days.

We'll build three example agents: 1. Math Tutor Agent - Helps students solve math problems 2. Code Review Agent - Reviews code for bugs and improvements 3. Customer Support Agent - Handles customer inquiries

What is a Custom Agent?

A custom agent consists of:

Base Model + System Prompt + Experience Library + Tools/Functions
     ↓              ↓                 ↓                  ↓
  Reasoning    Personality     Domain Expertise    External Actions

Example: Math Tutor Agent - Base Model: Qwen3-7B-Instruct or DeepSeek-V3 (via API) - System Prompt: "You are a patient math tutor..." - Experience Library: 50+ math problem-solving strategies - Tools: Calculator, equation solver, graphing

Prerequisites

# Install Gym with agent support
pip install zoo-gym[grpo,agents]

# Additional dependencies
pip install numpy sympy matplotlib

Agent Template Structure

Basic Template

from dataclasses import dataclass
from typing import List, Optional, Callable
from gym.train.grpo.experience_manager import ExperienceManager
from gym.train.grpo.api_model_adapter import DeepSeekAdapter

@dataclass
class AgentConfig:
    """Configuration for a custom agent."""
    name: str
    system_prompt: str
    experience_lib_path: str
    api_key: str
    api_model: str = "deepseek-chat"
    tools: List[Callable] = None

class ContinuousLearningAgent:
    """Base class for agents with continuous learning."""

    def __init__(self, config: AgentConfig):
        """Initialize agent."""
        self.config = config
        self.experience_manager = ExperienceManager(
            checkpoint_path=config.experience_lib_path
        )
        self.model = DeepSeekAdapter(
            api_key=config.api_key,
            model=config.api_model
        )
        self.tools = config.tools or []

    def respond(self, query: str, use_experiences: bool = True) -> str:
        """Generate response to query."""
        # Build prompt with system message
        full_prompt = f"{self.config.system_prompt}\n\n"

        # Add experiences if enabled
        if use_experiences and len(self.experience_manager) > 0:
            experiences = self.experience_manager.format_for_prompt()
            full_prompt += f"Helpful Experiences:\n{experiences}\n\n"

        # Add query
        full_prompt += f"Query: {query}\n\nResponse:"

        # Generate
        response = self.model.generate(
            full_prompt,
            system_prompt=self.config.system_prompt
        )

        return response

    def learn_from_examples(
        self,
        examples: List[dict],
        num_epochs: int = 3,
        group_size: int = 5
    ):
        """
        Learn from training examples.

        Args:
            examples: List of dicts with 'query', 'response', 'reward'
            num_epochs: Number of training epochs
            group_size: Rollouts per query
        """
        from gym.train.grpo.semantic_extractor import SemanticExtractor, LLMClient, Trajectory

        # Initialize semantic extractor
        llm = LLMClient(api_key=self.config.api_key)
        extractor = SemanticExtractor(llm, max_operations=3)

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch+1}/{num_epochs}")

            all_operations = []

            for example in examples:
                # Generate rollouts
                trajectories = []
                experiences = self.experience_manager.format_for_prompt()

                for _ in range(group_size):
                    response = self.model.generate_with_experiences(
                        example['query'],
                        experiences
                    )
                    reward = self._evaluate_response(
                        response,
                        example.get('expected_response'),
                        example.get('reward', 0.5)
                    )

                    traj = Trajectory(
                        query=example['query'],
                        output=response,
                        reward=reward,
                        groundtruth=example.get('expected_response')
                    )
                    trajectories.append(traj)

                # Summarize trajectories
                for traj in trajectories:
                    traj.summary = extractor.summarize_trajectory(traj)

                # Extract advantages
                if len(set(t.reward for t in trajectories)) > 1:
                    ops = extractor.extract_group_advantage(
                        trajectories,
                        experiences,
                        use_groundtruth=True
                    )
                    all_operations.append(ops)

            # Consolidate and apply
            if all_operations:
                final_ops = extractor.consolidate_batch(
                    all_operations,
                    self.experience_manager.format_for_prompt()
                )
                self.experience_manager.apply_operations(final_ops)

            print(f"Experiences: {len(self.experience_manager)}")

        # Save updated experiences
        self.experience_manager.save(self.config.experience_lib_path)

    def _evaluate_response(
        self,
        response: str,
        expected: Optional[str],
        default_reward: float
    ) -> float:
        """Evaluate response quality."""
        if expected is None:
            return default_reward

        # Simple exact match (override for domain-specific evaluation)
        if response.strip().lower() == expected.strip().lower():
            return 1.0

        # Partial match
        if expected.lower() in response.lower():
            return 0.7

        return 0.0

    def save(self, path: str):
        """Save agent state."""
        self.experience_manager.save(path)

    def load(self, path: str):
        """Load agent state."""
        self.experience_manager.load(path)

Example 1: Math Tutor Agent

Agent Definition

class MathTutorAgent(ContinuousLearningAgent):
    """Math tutor that helps students solve problems."""

    def __init__(self, api_key: str):
        config = AgentConfig(
            name="Math Tutor",
            system_prompt="""You are a patient and encouraging math tutor.

When helping students:
1. Break down complex problems into steps
2. Explain why each step is necessary
3. Check intermediate results
4. Encourage students to think critically
5. Celebrate correct reasoning

Always show your work clearly and verify solutions.""",
            experience_lib_path="./agents/math_tutor/experiences.json",
            api_key=api_key,
            tools=[self.check_solution, self.plot_function]
        )
        super().__init__(config)

    def check_solution(self, equation: str, solution: str) -> bool:
        """Verify if solution satisfies equation."""
        try:
            import sympy as sp
            # Parse equation and solution
            lhs, rhs = equation.split('=')
            x = sp.Symbol('x')

            # Substitute solution
            lhs_val = sp.sympify(lhs).subs(x, sp.sympify(solution))
            rhs_val = sp.sympify(rhs)

            return sp.simplify(lhs_val - rhs_val) == 0
        except Exception as e:
            print(f"Verification error: {e}")
            return False

    def plot_function(self, expression: str, x_range: tuple = (-10, 10)):
        """Plot mathematical function."""
        import numpy as np
        import matplotlib.pyplot as plt
        import sympy as sp

        x = sp.Symbol('x')
        func = sp.sympify(expression)

        # Generate points
        x_vals = np.linspace(x_range[0], x_range[1], 100)
        y_vals = [float(func.subs(x, val)) for val in x_vals]

        # Plot
        plt.figure(figsize=(8, 6))
        plt.plot(x_vals, y_vals)
        plt.grid(True)
        plt.xlabel('x')
        plt.ylabel('f(x)')
        plt.title(f'y = {expression}')
        plt.savefig('/tmp/function_plot.png')
        plt.close()

        return '/tmp/function_plot.png'

    def _evaluate_response(
        self,
        response: str,
        expected: Optional[str],
        default_reward: float
    ) -> float:
        """Evaluate math response quality."""
        if expected is None:
            return default_reward

        # Check for exact answer
        if expected.strip().lower() in response.lower():
            return 1.0

        # Check for correct methodology (mentions key concepts)
        math_keywords = ['derivative', 'integral', 'solve', 'substitute', 'factor']
        if any(kw in response.lower() for kw in math_keywords):
            return 0.6

        return 0.0

# Usage
tutor = MathTutorAgent(api_key="sk-xxx")

# Train on example problems
training_examples = [
    {
        "query": "Solve: x² + 5x + 6 = 0",
        "expected_response": "x = -2 or x = -3",
        "reward": 1.0
    },
    {
        "query": "Find the derivative of x³ + 2x",
        "expected_response": "3x² + 2",
        "reward": 1.0
    },
    {
        "query": "What's the integral of 2x?",
        "expected_response": "x² + C",
        "reward": 1.0
    }
]

tutor.learn_from_examples(training_examples, num_epochs=3)

# Use the trained tutor
question = "Solve: x² - 4 = 0"
answer = tutor.respond(question)
print(f"Student: {question}")
print(f"Tutor: {answer}")

Sample Training Data

Create math_problems.json:

[
  {
    "query": "Solve the quadratic equation: 2x² + 7x + 3 = 0",
    "expected_response": "x = -1/2 or x = -3",
    "hints": [
      "Use the quadratic formula",
      "Check discriminant first",
      "Factor if possible"
    ]
  },
  {
    "query": "Find dy/dx if y = x³ + 4x² - 2x + 1",
    "expected_response": "dy/dx = 3x² + 8x - 2",
    "hints": [
      "Apply power rule to each term",
      "Constant term becomes 0"
    ]
  },
  {
    "query": "Calculate: ∫(3x² + 2x) dx",
    "expected_response": "x³ + x² + C",
    "hints": [
      "Use power rule for integration",
      "Don't forget constant of integration"
    ]
  }
]

Example 2: Code Review Agent

Agent Definition

class CodeReviewAgent(ContinuousLearningAgent):
    """Agent that reviews code for bugs and improvements."""

    def __init__(self, api_key: str):
        config = AgentConfig(
            name="Code Reviewer",
            system_prompt="""You are an experienced software engineer reviewing code.

Focus on:
1. Correctness - Does the code work as intended?
2. Security - Are there vulnerabilities?
3. Performance - Can it be optimized?
4. Readability - Is it clear and maintainable?
5. Best Practices - Does it follow language conventions?

Provide specific, actionable feedback with examples.""",
            experience_lib_path="./agents/code_reviewer/experiences.json",
            api_key=api_key,
            tools=[self.run_linter, self.run_tests]
        )
        super().__init__(config)

    def run_linter(self, code: str, language: str = "python") -> dict:
        """Run linter on code."""
        if language == "python":
            import subprocess
            import tempfile

            # Write code to temp file
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
                f.write(code)
                temp_path = f.name

            try:
                # Run pylint
                result = subprocess.run(
                    ['pylint', temp_path],
                    capture_output=True,
                    text=True,
                    timeout=10
                )
                return {
                    "issues": result.stdout,
                    "score": self._extract_pylint_score(result.stdout)
                }
            except Exception as e:
                return {"error": str(e)}
            finally:
                import os
                os.unlink(temp_path)

        return {"error": f"Unsupported language: {language}"}

    def run_tests(self, code: str, tests: str) -> dict:
        """Run unit tests on code."""
        import subprocess
        import tempfile

        # Combine code and tests
        full_code = f"{code}\n\n{tests}"

        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
            f.write(full_code)
            temp_path = f.name

        try:
            result = subprocess.run(
                ['python', '-m', 'pytest', temp_path, '-v'],
                capture_output=True,
                text=True,
                timeout=10
            )
            return {
                "passed": result.returncode == 0,
                "output": result.stdout
            }
        except Exception as e:
            return {"error": str(e)}
        finally:
            import os
            os.unlink(temp_path)

    def _extract_pylint_score(self, output: str) -> float:
        """Extract score from pylint output."""
        import re
        match = re.search(r'Your code has been rated at ([\d.]+)/10', output)
        if match:
            return float(match.group(1))
        return 0.0

    def review_code(self, code: str, context: str = "") -> dict:
        """Perform full code review."""
        # Run linter
        lint_results = self.run_linter(code)

        # Get AI review
        experiences = self.experience_manager.format_for_prompt()
        prompt = f"""Review this code:

```python
{code}

Context: {context}

Linter score: {lint_results.get('score', 'N/A')}/10

Provide: 1. Issues found (bugs, security, performance) 2. Suggested improvements 3. Refactored version if needed"""

    ai_review = self.model.generate_with_experiences(prompt, experiences)

    return {
        "lint_score": lint_results.get('score'),
        "lint_issues": lint_results.get('issues'),
        "ai_review": ai_review
    }

Usage

reviewer = CodeReviewAgent(api_key="sk-xxx")

Train on code examples

training_examples = [ { "query": "Review: def add(a, b): return a + b", "expected_response": "Good: Simple and clear. Consider type hints.", "reward": 0.8 }, { "query": "Review: x = eval(user_input)", "expected_response": "Security issue: Never use eval() on user input. Use ast.literal_eval() or input validation.", "reward": 1.0 } ]

reviewer.learn_from_examples(training_examples, num_epochs=2)

Use the reviewer

code_to_review = """ def calculate_average(numbers): sum = 0 for n in numbers: sum += n return sum / len(numbers) """

review = reviewer.review_code(code_to_review, context="Function for student grades") print(f"Linter Score: {review['lint_score']}/10") print(f"\nAI Review:\n{review['ai_review']}")

## Example 3: Customer Support Agent

### Agent Definition

```python
class CustomerSupportAgent(ContinuousLearningAgent):
    """Agent for handling customer support inquiries."""

    def __init__(self, api_key: str, knowledge_base: dict):
        config = AgentConfig(
            name="Customer Support",
            system_prompt="""You are a helpful customer support representative.

Guidelines:
1. Be empathetic and understanding
2. Provide clear, step-by-step solutions
3. Escalate complex issues to human agents
4. Ask clarifying questions when needed
5. Thank customers for their patience

Always maintain a professional and friendly tone.""",
            experience_lib_path="./agents/support/experiences.json",
            api_key=api_key,
            tools=[self.search_kb, self.create_ticket]
        )
        super().__init__(config)
        self.knowledge_base = knowledge_base
        self.escalation_keywords = ['lawsuit', 'lawyer', 'sue', 'complaint']

    def search_kb(self, query: str, top_k: int = 3) -> List[dict]:
        """Search knowledge base for relevant articles."""
        # Simple keyword-based search (use embeddings in production)
        results = []
        query_lower = query.lower()

        for article_id, article in self.knowledge_base.items():
            # Check if query keywords match article
            if any(kw in article['content'].lower() for kw in query_lower.split()):
                results.append({
                    "id": article_id,
                    "title": article['title'],
                    "content": article['content'],
                    "url": article.get('url')
                })

        return results[:top_k]

    def create_ticket(self, issue: str, priority: str = "normal") -> str:
        """Create support ticket."""
        import uuid
        ticket_id = f"TICKET-{uuid.uuid4().hex[:8]}"

        # In production, save to database
        print(f"Created ticket {ticket_id}: {issue} (priority: {priority})")

        return ticket_id

    def should_escalate(self, query: str) -> bool:
        """Check if query should be escalated to human."""
        return any(kw in query.lower() for kw in self.escalation_keywords)

    def respond_to_customer(self, query: str) -> dict:
        """Handle customer inquiry."""
        # Check for escalation
        if self.should_escalate(query):
            ticket_id = self.create_ticket(query, priority="high")
            return {
                "response": f"I understand this is important. I've created a high-priority ticket ({ticket_id}) and a senior agent will contact you within 2 hours.",
                "escalated": True,
                "ticket_id": ticket_id
            }

        # Search knowledge base
        kb_results = self.search_kb(query)

        # Build context from KB
        kb_context = "\n".join([
            f"- {r['title']}: {r['content'][:200]}..."
            for r in kb_results
        ])

        # Get experiences
        experiences = self.experience_manager.format_for_prompt()

        # Generate response
        prompt = f"""Customer Query: {query}

Relevant Knowledge Base Articles:
{kb_context}

Helpful Experiences:
{experiences}

Provide a helpful, empathetic response."""

        response = self.model.generate(prompt, system_prompt=self.config.system_prompt)

        return {
            "response": response,
            "escalated": False,
            "kb_articles": kb_results
        }

# Usage
knowledge_base = {
    "kb001": {
        "title": "How to Reset Password",
        "content": "To reset your password: 1. Click 'Forgot Password' 2. Enter email 3. Check inbox for reset link",
        "url": "https://help.example.com/password-reset"
    },
    "kb002": {
        "title": "Shipping Times",
        "content": "Standard shipping: 5-7 business days. Express: 2-3 business days.",
        "url": "https://help.example.com/shipping"
    }
}

agent = CustomerSupportAgent(api_key="sk-xxx", knowledge_base=knowledge_base)

# Train on past interactions
training_examples = [
    {
        "query": "I forgot my password",
        "expected_response": "I can help! Click 'Forgot Password' on the login page...",
        "reward": 1.0
    },
    {
        "query": "Where's my order?",
        "expected_response": "I understand you're eager to receive your order. Let me help track it...",
        "reward": 1.0
    }
]

agent.learn_from_examples(training_examples, num_epochs=2)

# Handle customer query
customer_query = "I can't log in to my account!"
result = agent.respond_to_customer(customer_query)

print(f"Customer: {customer_query}")
print(f"Agent: {result['response']}")
if result['escalated']:
    print(f"[Escalated - Ticket: {result['ticket_id']}]")

Advanced Features

1. Multi-Agent Collaboration

class AgentOrchestrator:
    """Coordinate multiple specialized agents."""

    def __init__(self, agents: dict):
        """
        Args:
            agents: Dict of {agent_name: agent_instance}
        """
        self.agents = agents

    def route_query(self, query: str) -> str:
        """Route query to most appropriate agent."""
        # Simple keyword-based routing (use classifier in production)
        routing_rules = {
            "math": ["equation", "solve", "derivative", "integral"],
            "code": ["function", "code", "bug", "error"],
            "support": ["order", "account", "help", "issue"]
        }

        for agent_name, keywords in routing_rules.items():
            if any(kw in query.lower() for kw in keywords):
                return agent_name

        return "support"  # Default

    def handle_query(self, query: str) -> dict:
        """Handle query using appropriate agent."""
        agent_name = self.route_query(query)
        agent = self.agents.get(agent_name)

        if agent is None:
            return {"error": f"No agent found for: {agent_name}"}

        response = agent.respond(query)

        return {
            "agent": agent_name,
            "response": response
        }

# Usage
orchestrator = AgentOrchestrator({
    "math": MathTutorAgent(api_key="sk-xxx"),
    "code": CodeReviewAgent(api_key="sk-xxx"),
    "support": CustomerSupportAgent(api_key="sk-xxx", knowledge_base={})
})

# Route queries automatically
queries = [
    "Solve: x² + 3x + 2 = 0",
    "Review my Python function",
    "I forgot my password"
]

for q in queries:
    result = orchestrator.handle_query(q)
    print(f"Query: {q}")
    print(f"Routed to: {result['agent']}")
    print(f"Response: {result['response'][:100]}...\n")

2. Agent Memory (Conversation History)

class ConversationalAgent(ContinuousLearningAgent):
    """Agent with conversation memory."""

    def __init__(self, config: AgentConfig, max_history: int = 10):
        super().__init__(config)
        self.conversation_history = []
        self.max_history = max_history

    def respond(self, query: str, use_experiences: bool = True) -> str:
        """Generate response with conversation context."""
        # Build prompt with history
        full_prompt = f"{self.config.system_prompt}\n\n"

        # Add conversation history
        if self.conversation_history:
            full_prompt += "Previous conversation:\n"
            for turn in self.conversation_history[-self.max_history:]:
                full_prompt += f"User: {turn['user']}\n"
                full_prompt += f"Assistant: {turn['assistant']}\n"
            full_prompt += "\n"

        # Add experiences
        if use_experiences and len(self.experience_manager) > 0:
            experiences = self.experience_manager.format_for_prompt()
            full_prompt += f"Helpful Experiences:\n{experiences}\n\n"

        # Add current query
        full_prompt += f"User: {query}\nAssistant:"

        # Generate
        response = self.model.generate(full_prompt)

        # Store in history
        self.conversation_history.append({
            "user": query,
            "assistant": response
        })

        return response

    def clear_history(self):
        """Clear conversation history."""
        self.conversation_history = []

3. Human-in-the-Loop Feedback

class FeedbackLearningAgent(ContinuousLearningAgent):
    """Agent that learns from human feedback."""

    def __init__(self, config: AgentConfig):
        super().__init__(config)
        self.feedback_buffer = []

    def respond_with_feedback(self, query: str) -> str:
        """Generate response and collect feedback."""
        response = self.respond(query)

        # Display response
        print(f"Agent: {response}\n")

        # Collect feedback
        feedback = input("Feedback (good/bad/suggest:<alternative>): ")

        # Parse feedback
        if feedback.startswith("suggest:"):
            alternative = feedback.split(":", 1)[1].strip()
            reward = 1.0  # Suggestion is ground truth
            groundtruth = alternative
        elif feedback == "good":
            reward = 1.0
            groundtruth = response
        elif feedback == "bad":
            reward = 0.0
            groundtruth = None
        else:
            reward = 0.5
            groundtruth = None

        # Store in buffer
        self.feedback_buffer.append({
            "query": query,
            "response": response,
            "reward": reward,
            "groundtruth": groundtruth
        })

        return response

    def learn_from_feedback(self, batch_size: int = 10):
        """Learn from accumulated feedback."""
        if len(self.feedback_buffer) < batch_size:
            print(f"Not enough feedback ({len(self.feedback_buffer)}/{batch_size})")
            return

        # Use latest feedback
        examples = self.feedback_buffer[-batch_size:]

        # Run continuous learning
        self.learn_from_examples(examples, num_epochs=1)

        # Clear buffer
        self.feedback_buffer = []

        print(f"Learned from {batch_size} feedback examples")

# Usage
agent = FeedbackLearningAgent(config)

# Interactive session
for i in range(20):
    query = input("Query: ")
    agent.respond_with_feedback(query)

    # Learn every 10 interactions
    if (i + 1) % 10 == 0:
        agent.learn_from_feedback(batch_size=10)

Deployment

1. Save and Load Agents

import json
from pathlib import Path

def save_agent(agent: ContinuousLearningAgent, directory: str):
    """Save agent to directory."""
    dir_path = Path(directory)
    dir_path.mkdir(parents=True, exist_ok=True)

    # Save experiences
    agent.experience_manager.save(str(dir_path / "experiences.json"))

    # Save config
    config_dict = {
        "name": agent.config.name,
        "system_prompt": agent.config.system_prompt,
        "api_model": agent.config.api_model
    }
    with open(dir_path / "config.json", 'w') as f:
        json.dump(config_dict, f, indent=2)

    print(f"Agent saved to {directory}")

def load_agent(directory: str, api_key: str) -> ContinuousLearningAgent:
    """Load agent from directory."""
    dir_path = Path(directory)

    # Load config
    with open(dir_path / "config.json") as f:
        config_dict = json.load(f)

    # Reconstruct config
    config = AgentConfig(
        name=config_dict["name"],
        system_prompt=config_dict["system_prompt"],
        experience_lib_path=str(dir_path / "experiences.json"),
        api_key=api_key,
        api_model=config_dict["api_model"]
    )

    # Create agent
    agent = ContinuousLearningAgent(config)

    print(f"Agent loaded from {directory}")
    return agent

# Usage
save_agent(tutor, "./deployed_agents/math_tutor")
loaded_tutor = load_agent("./deployed_agents/math_tutor", api_key="sk-xxx")

2. REST API Deployment

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

app = FastAPI()

# Load agent
agent = load_agent("./deployed_agents/math_tutor", api_key="sk-xxx")

class Query(BaseModel):
    text: str
    use_experiences: bool = True

class Response(BaseModel):
    response: str
    experiences_used: int

@app.post("/query", response_model=Response)
async def handle_query(query: Query):
    """Handle agent query."""
    try:
        response = agent.respond(
            query.text,
            use_experiences=query.use_experiences
        )

        return Response(
            response=response,
            experiences_used=len(agent.experience_manager)
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/experiences")
async def get_experiences():
    """Get current experience library."""
    return {
        "count": len(agent.experience_manager),
        "experiences": agent.experience_manager.experiences
    }

# Run with: uvicorn agent_api:app --reload

Best Practices

1. Start with Strong Base Prompts

# Good system prompt characteristics:
# - Clear role definition
# - Specific guidelines
# - Example patterns
# - Quality standards

GOOD_PROMPT = """You are an expert {domain} assistant.

Guidelines:
1. {guideline_1}
2. {guideline_2}
...

Example patterns:
- When asked about X, do Y
- For Z queries, provide A, B, and C

Quality standards:
- Accuracy > 95%
- Response time < 3s
- Professional tone
"""

2. Domain-Specific Evaluation

# Override _evaluate_response for each domain

class MathAgent(ContinuousLearningAgent):
    def _evaluate_response(self, response, expected, default):
        # Use symbolic math comparison
        try:
            import sympy as sp
            resp_val = sp.sympify(extract_answer(response))
            exp_val = sp.sympify(expected)
            return 1.0 if sp.simplify(resp_val - exp_val) == 0 else 0.0
        except:
            return default

class CodeAgent(ContinuousLearningAgent):
    def _evaluate_response(self, response, expected, default):
        # Run code and compare outputs
        return run_code_tests(response, expected)

3. Incremental Learning

# Don't retrain from scratch - add new examples incrementally

# Week 1: Initial training
agent.learn_from_examples(initial_examples, num_epochs=3)
agent.save("./agent_v1")

# Week 2: Add more examples
agent.load("./agent_v1")
agent.learn_from_examples(new_examples, num_epochs=2)
agent.save("./agent_v2")

# Result: Experiences from both batches

Troubleshooting

Issue: Agent responses generic/unhelpful

Solution: Improve system prompt and add domain experiences

# Add domain-specific experiences manually
agent.experience_manager.add(
    "When solving equations, always verify solutions by substitution"
)
agent.experience_manager.add(
    "For word problems, identify known/unknown variables first"
)

Issue: Experiences not being used

Solution: Check prompt injection

# Debug: Print full prompt
full_prompt = f"{system_prompt}\n\nExperiences:\n{experiences}\n\nQuery: {query}"
print(full_prompt)  # Verify experiences are included

Issue: Learning too slow

Solution: Increase group size or epochs

agent.learn_from_examples(
    examples,
    num_epochs=5,  # More epochs
    group_size=8   # More rollouts
)

Summary

Custom agents with continuous learning enable:

Domain specialization in minutes, not days ✅ Learn from real usage - feedback, corrections ✅ Human-readable knowledge - experiences are transparent ✅ Incremental improvement - add new examples anytime ✅ Multi-agent orchestration - specialized experts

Next Steps: 1. Read Chat-to-Experience Tutorial 2. Check API Reference 3. Explore Main Documentation


Tutorial Last Updated: October 28, 2025