• Getting Started
  • Core Concepts
  • Reinforcement Learning
  • Model Context Protocol (MCP)
  • Workflow Patterns
  • Advanced Agent Patterns
  • Guides

Guides

Testing Guide

Best practices for testing agents, workflows, and Azcore applications.

Comprehensive guide for testing Az Core framework applications, including unit tests, integration tests, end-to-end tests, and LLM testing strategies.

Overview

Testing AI agent systems presents unique challenges. This guide covers testing strategies for agents, workflows, LLM interactions, and provides best practices for maintaining test reliability.

Testing Strategy

Test Pyramid

        /\
       /E2E\      ← Few, slow, expensive (10%)
      /------\
     /  Integ \   ← Some, medium speed (30%)
    /----------\
   /    Unit    \ ← Many, fast, cheap (60%)
  /--------------\

Test Structure

tests/
├── unit/                  # Unit tests
│   ├── test_agents.py
│   ├── test_workflows.py
│   └── test_utils.py
├── integration/           # Integration tests
│   ├── test_agent_integration.py
│   └── test_workflow_integration.py
├── e2e/                   # End-to-end tests
│   └── test_scenarios.py
├── fixtures/              # Test fixtures
│   ├── mock_llm.py
│   └── sample_data.py
└── conftest.py            # Pytest configuration

Unit Testing

Testing Agents

# tests/unit/test_agents.py
import pytest
from unittest.mock import Mock, patch
from azcore.agents import ReactAgent
from langchain_openai import ChatOpenAI

class TestReactAgent:
    """Unit tests for ReactAgent."""

    @pytest.fixture
    def mock_llm(self):
        """Mock LLM for testing."""
        llm = Mock(spec=ChatOpenAI)
        llm.invoke.return_value = Mock(
            content="Test response",
            response_metadata={"token_usage": {"prompt_tokens": 10, "completion_tokens": 5}}
        )
        return llm

    @pytest.fixture
    def agent(self, mock_llm):
        """Create agent with mocked LLM."""
        return ReactAgent(
            name="test_agent",
            llm=mock_llm,
            prompt="You are a test assistant"
        )

    def test_agent_initialization(self, agent):
        """Test agent initializes correctly."""
        assert agent.name == "test_agent"
        assert agent.prompt == "You are a test assistant"

    def test_agent_invoke(self, agent, mock_llm):
        """Test agent invoke method."""
        state = {
            "messages": [{"role": "user", "content": "test query"}]
        }

        result = agent.invoke(state)

        # Verify LLM was called
        mock_llm.invoke.assert_called_once()

        # Verify result structure
        assert "messages" in result
        assert len(result["messages"]) > 0

    def test_agent_with_tools(self, mock_llm):
        """Test agent with tools."""
        mock_tool = Mock()
        mock_tool.name = "test_tool"
        mock_tool.description = "A test tool"

        agent = ReactAgent(
            name="tool_agent",
            llm=mock_llm,
            tools=[mock_tool]
        )

        assert len(agent.tools) == 1
        assert agent.tools[0].name == "test_tool"

    def test_agent_error_handling(self, agent, mock_llm):
        """Test agent handles errors gracefully."""
        mock_llm.invoke.side_effect = Exception("API Error")

        state = {"messages": [{"role": "user", "content": "test"}]}

        with pytest.raises(Exception) as exc_info:
            agent.invoke(state)

        assert "API Error" in str(exc_info.value)


class TestAgentPatternRouter:
    """Test AgentPatternRouter."""

    def test_react_pattern(self):
        """Test creating ReAct agent."""
        from azcore.agents import AgentPatternRouter

        router = AgentPatternRouter(
            pattern="react",
            name="test",
            llm=Mock()
        )

        agent = router.create_agent()
        assert agent is not None

    def test_self_consistency_pattern(self):
        """Test creating self-consistency agent."""
        from azcore.agents import AgentPatternRouter

        router = AgentPatternRouter(
            pattern="self-consistency",
            name="test",
            llm=Mock(),
            num_samples=3
        )

        agent = router.create_agent()
        assert agent is not None

Testing Workflows

# tests/unit/test_workflows.py
import pytest
from unittest.mock import Mock, AsyncMock
from azcore.workflows import SequentialWorkflow, ConcurrentWorkflow

class TestSequentialWorkflow:
    """Unit tests for SequentialWorkflow."""

    @pytest.fixture
    def mock_agents(self):
        """Create mock agents."""
        agents = []
        for i in range(3):
            agent = Mock()
            agent.name = f"agent_{i}"
            agent.invoke = Mock(return_value={
                "messages": [{"role": "assistant", "content": f"Response {i}"}]
            })
            agents.append(agent)
        return agents

    def test_workflow_initialization(self, mock_agents):
        """Test workflow initializes correctly."""
        workflow = SequentialWorkflow(
            agents=mock_agents,
            max_loops=1
        )

        assert len(workflow.agents) == 3
        assert workflow.max_loops == 1

    def test_workflow_execution(self, mock_agents):
        """Test workflow executes all agents."""
        workflow = SequentialWorkflow(agents=mock_agents)

        result = workflow.run("test task")

        # Verify all agents were called
        for agent in mock_agents:
            agent.invoke.assert_called_once()

    def test_workflow_state_propagation(self, mock_agents):
        """Test state propagates between agents."""
        workflow = SequentialWorkflow(agents=mock_agents)

        workflow.run("test task")

        # Verify each agent received output from previous
        call_args = [agent.invoke.call_args for agent in mock_agents]

        # First agent gets initial task
        assert "test task" in str(call_args[0])

        # Subsequent agents get previous responses
        for i in range(1, len(call_args)):
            assert f"Response {i-1}" in str(call_args[i])


class TestConcurrentWorkflow:
    """Unit tests for ConcurrentWorkflow."""

    @pytest.fixture
    def mock_agents(self):
        """Create mock agents with async support."""
        agents = []
        for i in range(3):
            agent = Mock()
            agent.name = f"agent_{i}"
            agent.ainvoke = AsyncMock(return_value={
                "messages": [{"role": "assistant", "content": f"Response {i}"}]
            })
            agents.append(agent)
        return agents

    @pytest.mark.asyncio
    async def test_concurrent_execution(self, mock_agents):
        """Test agents execute concurrently."""
        aggregator = Mock()
        aggregator.ainvoke = AsyncMock(return_value={
            "messages": [{"role": "assistant", "content": "Aggregated"}]
        })

        workflow = ConcurrentWorkflow(
            agents=mock_agents,
            aggregator_agent=aggregator
        )

        result = await workflow.arun("test task")

        # Verify all agents were called
        for agent in mock_agents:
            agent.ainvoke.assert_called_once()

        # Verify aggregator was called
        aggregator.ainvoke.assert_called_once()

Testing Utilities

# tests/unit/test_utils.py
import pytest
from azcore.utils.caching import LRUCache, TTLCache
from azcore.utils.helpers import truncate_text

class TestLRUCache:
    """Test LRU cache implementation."""

    def test_cache_basic_operations(self):
        """Test basic cache operations."""
        cache = LRUCache(max_size=2)

        cache.put("key1", "value1")
        cache.put("key2", "value2")

        assert cache.get("key1") == "value1"
        assert cache.get("key2") == "value2"

    def test_cache_eviction(self):
        """Test LRU eviction."""
        cache = LRUCache(max_size=2)

        cache.put("key1", "value1")
        cache.put("key2", "value2")
        cache.put("key3", "value3")  # Should evict key1

        assert cache.get("key1") is None  # Evicted
        assert cache.get("key2") == "value2"
        assert cache.get("key3") == "value3"

    def test_cache_statistics(self):
        """Test cache statistics."""
        cache = LRUCache(max_size=10)

        cache.put("key1", "value1")
        cache.get("key1")  # Hit
        cache.get("key2")  # Miss

        stats = cache.get_stats()

        assert stats["hits"] == 1
        assert stats["misses"] == 1
        assert stats["hit_rate"] == 0.5


class TestTTLCache:
    """Test TTL cache implementation."""

    def test_ttl_expiration(self):
        """Test TTL expiration."""
        import time

        cache = TTLCache(ttl=1.0)  # 1 second TTL

        cache.put("key1", "value1")
        assert cache.get("key1") == "value1"

        time.sleep(1.5)  # Wait for expiration

        assert cache.get("key1") is None  # Expired

Integration Testing

Testing Agent Integrations

# tests/integration/test_agent_integration.py
import pytest
from azcore.agents import ReactAgent
from langchain_openai import ChatOpenAI

@pytest.mark.integration
class TestAgentIntegration:
    """Integration tests with real LLM (use sparingly)."""

    @pytest.fixture
    def real_llm(self):
        """Create real LLM instance."""
        return ChatOpenAI(
            model="gpt-4o-mini",
            temperature=0  # Deterministic for testing
        )

    @pytest.fixture
    def agent(self, real_llm):
        """Create agent with real LLM."""
        return ReactAgent(
            name="test_agent",
            llm=real_llm,
            prompt="You are a helpful assistant. Be concise."
        )

    @pytest.mark.slow
    def test_simple_query(self, agent):
        """Test agent with simple query."""
        state = {
            "messages": [{"role": "user", "content": "What is 2+2?"}]
        }

        result = agent.invoke(state)

        response = result["messages"][-1]["content"]

        # Verify response contains answer
        assert "4" in response

    @pytest.mark.slow
    def test_multi_turn_conversation(self, agent):
        """Test multi-turn conversation."""
        messages = [
            {"role": "user", "content": "My name is Alice"},
        ]

        result = agent.invoke({"messages": messages})

        messages.append(result["messages"][-1])
        messages.append({"role": "user", "content": "What's my name?"})

        result = agent.invoke({"messages": messages})

        response = result["messages"][-1]["content"]

        # Should remember name
        assert "Alice" in response


@pytest.mark.integration
class TestWorkflowIntegration:
    """Integration tests for workflows."""

    @pytest.mark.slow
    def test_sequential_workflow(self):
        """Test sequential workflow with real agents."""
        from azcore.workflows import SequentialWorkflow

        agent1 = ReactAgent(
            name="researcher",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
            prompt="Generate 3 facts about the topic. Be concise."
        )

        agent2 = ReactAgent(
            name="summarizer",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
            prompt="Summarize the previous response in one sentence."
        )

        workflow = SequentialWorkflow(agents=[agent1, agent2])

        result = workflow.run("artificial intelligence")

        # Verify output exists
        assert result is not None
        assert len(result) > 0

Testing with Tools

# tests/integration/test_tools.py
import pytest
from langchain.tools import Tool
from azcore.agents import ReactAgent
from langchain_openai import ChatOpenAI

@pytest.mark.integration
class TestAgentWithTools:
    """Test agents with tool usage."""

    @pytest.fixture
    def calculator_tool(self):
        """Create calculator tool."""
        def calculate(expression: str) -> str:
            try:
                result = eval(expression)  # In real code, use safe evaluation
                return str(result)
            except Exception as e:
                return f"Error: {str(e)}"

        return Tool(
            name="calculator",
            description="Calculate mathematical expressions",
            func=calculate
        )

    @pytest.mark.slow
    def test_agent_uses_tool(self, calculator_tool):
        """Test agent uses calculator tool."""
        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

        agent = ReactAgent(
            name="math_agent",
            llm=llm,
            tools=[calculator_tool],
            prompt="You are a math assistant. Use the calculator tool for calculations."
        )

        state = {
            "messages": [{"role": "user", "content": "What is 123 * 456?"}]
        }

        result = agent.invoke(state)

        response = result["messages"][-1]["content"]

        # Verify correct answer
        assert "56088" in response or "56,088" in response

End-to-End Testing

Testing Complete Scenarios

# tests/e2e/test_scenarios.py
import pytest
from azcore.agents import ReactAgent, AgentPatternRouter
from azcore.workflows import SequentialWorkflow
from langchain_openai import ChatOpenAI

@pytest.mark.e2e
class TestCompleteScenarios:
    """End-to-end tests for complete use cases."""

    @pytest.mark.slow
    def test_research_workflow(self):
        """Test complete research workflow."""

        # Create agents
        researcher = ReactAgent(
            name="researcher",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0.3),
            prompt="You are a researcher. Provide detailed information."
        )

        analyzer = ReactAgent(
            name="analyzer",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
            prompt="You are an analyzer. Analyze the information provided."
        )

        writer = ReactAgent(
            name="writer",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0.7),
            prompt="You are a writer. Create a well-structured summary."
        )

        # Create workflow
        workflow = SequentialWorkflow(agents=[researcher, analyzer, writer])

        # Execute
        result = workflow.run("Tell me about machine learning")

        # Verify result
        assert result is not None
        assert len(result) > 100  # Should be substantial

        # Verify it went through all agents
        assert "machine learning" in result.lower()

    @pytest.mark.slow
    def test_multi_pattern_workflow(self):
        """Test workflow with different agent patterns."""

        # Create agents with different patterns
        sc_agent = AgentPatternRouter(
            pattern="self-consistency",
            name="fact_checker",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
            num_samples=3
        ).create_agent()

        reflexion_agent = AgentPatternRouter(
            pattern="reflexion",
            name="improver",
            llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
            max_loops=2
        ).create_agent()

        # Test execution
        result1 = sc_agent.invoke({
            "messages": [{"role": "user", "content": "What is the capital of France?"}]
        })

        result2 = reflexion_agent.invoke({
            "messages": result1["messages"]
        })

        # Verify results
        assert "Paris" in result1["messages"][-1]["content"]
        assert result2 is not None

LLM Testing

Mocking LLM Responses

# tests/fixtures/mock_llm.py
from unittest.mock import Mock
from langchain_core.messages import AIMessage
from typing import List, Dict, Any

class MockLLM:
    """Mock LLM for testing."""

    def __init__(self, responses: List[str] = None):
        """
        Initialize mock LLM.

        Args:
            responses: List of canned responses
        """
        self.responses = responses or ["Mock response"]
        self.call_count = 0
        self.calls: List[Dict[str, Any]] = []

    def invoke(self, input, **kwargs):
        """Mock invoke method."""
        # Record call
        self.calls.append({
            "input": input,
            "kwargs": kwargs,
            "call_number": self.call_count
        })

        # Get response
        response = self.responses[self.call_count % len(self.responses)]
        self.call_count += 1

        return AIMessage(
            content=response,
            response_metadata={
                "token_usage": {
                    "prompt_tokens": 10,
                    "completion_tokens": 5,
                    "total_tokens": 15
                }
            }
        )

    async def ainvoke(self, input, **kwargs):
        """Mock async invoke method."""
        return self.invoke(input, **kwargs)

    def get_call_history(self) -> List[Dict[str, Any]]:
        """Get history of calls."""
        return self.calls

    def reset(self):
        """Reset call history."""
        self.call_count = 0
        self.calls = []


# Usage in tests
def test_with_mock_llm():
    """Test using mock LLM."""
    mock_llm = MockLLM(responses=[
        "Response 1",
        "Response 2"
    ])

    agent = ReactAgent(name="test", llm=mock_llm)

    result1 = agent.invoke({"messages": [{"role": "user", "content": "test"}]})
    result2 = agent.invoke({"messages": [{"role": "user", "content": "test"}]})

    # Verify different responses
    assert "Response 1" in result1["messages"][-1]["content"]
    assert "Response 2" in result2["messages"][-1]["content"]

    # Verify call history
    history = mock_llm.get_call_history()
    assert len(history) == 2

LLM Response Fixtures

# tests/fixtures/llm_fixtures.py
import pytest
import json
from pathlib import Path

@pytest.fixture
def sample_llm_responses():
    """Load sample LLM responses from file."""
    fixtures_dir = Path(__file__).parent
    with open(fixtures_dir / "sample_responses.json", "r") as f:
        return json.load(f)


# tests/fixtures/sample_responses.json
{
  "simple_query": {
    "input": "What is 2+2?",
    "output": "2+2 equals 4."
  },
  "complex_query": {
    "input": "Explain quantum computing",
    "output": "Quantum computing is a type of computation that harnesses quantum mechanical phenomena..."
  },
  "error_case": {
    "input": "Invalid query",
    "error": "Invalid input format"
  }
}

Testing LLM Determinism

# tests/integration/test_llm_determinism.py
import pytest
from langchain_openai import ChatOpenAI

@pytest.mark.integration
@pytest.mark.slow
class TestLLMDeterminism:
    """Test LLM response consistency."""

    def test_zero_temperature_consistency(self):
        """Test responses are consistent with temperature=0."""
        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

        prompt = "What is the capital of France? Answer with just the city name."

        # Make multiple calls
        responses = [llm.invoke(prompt).content for _ in range(3)]

        # Verify all responses are the same
        assert len(set(responses)) == 1  # All responses identical
        assert "Paris" in responses[0]

    def test_high_temperature_variation(self):
        """Test responses vary with high temperature."""
        llm = ChatOpenAI(model="gpt-4o-mini", temperature=1.5)

        prompt = "Write a creative story opening (one sentence)."

        # Make multiple calls
        responses = [llm.invoke(prompt).content for _ in range(5)]

        # Responses should be different
        assert len(set(responses)) > 1  # At least some variation

Mocks & Fixtures

Pytest Configuration

# tests/conftest.py
import pytest
import os
from unittest.mock import Mock

# Skip integration tests by default
def pytest_configure(config):
    """Configure pytest."""
    config.addinivalue_line(
        "markers", "integration: mark test as integration test"
    )
    config.addinivalue_line(
        "markers", "e2e: mark test as end-to-end test"
    )
    config.addinivalue_line(
        "markers", "slow: mark test as slow"
    )


def pytest_collection_modifyitems(config, items):
    """Modify test collection."""
    skip_integration = pytest.mark.skip(reason="use --integration to run")
    skip_e2e = pytest.mark.skip(reason="use --e2e to run")
    skip_slow = pytest.mark.skip(reason="use --slow to run")

    for item in items:
        if "integration" in item.keywords and not config.getoption("--integration"):
            item.add_marker(skip_integration)
        if "e2e" in item.keywords and not config.getoption("--e2e"):
            item.add_marker(skip_e2e)
        if "slow" in item.keywords and not config.getoption("--slow"):
            item.add_marker(skip_slow)


def pytest_addoption(parser):
    """Add custom command line options."""
    parser.addoption(
        "--integration",
        action="store_true",
        default=False,
        help="run integration tests"
    )
    parser.addoption(
        "--e2e",
        action="store_true",
        default=False,
        help="run end-to-end tests"
    )
    parser.addoption(
        "--slow",
        action="store_true",
        default=False,
        help="run slow tests"
    )


@pytest.fixture
def mock_openai_api_key(monkeypatch):
    """Set mock OpenAI API key for tests."""
    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-key")


@pytest.fixture
def sample_state():
    """Sample agent state for testing."""
    return {
        "messages": [
            {"role": "user", "content": "test query"}
        ],
        "context": {},
        "metadata": {}
    }

Test Coverage

Measuring Coverage

# pyproject.toml or setup.cfg
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]

# Coverage settings
[tool.coverage.run]
source = ["azcore"]
omit = [
    "*/tests/*",
    "*/test_*.py",
    "*/__pycache__/*",
    "*/site-packages/*"
]

[tool.coverage.report]
precision = 2
show_missing = true
skip_covered = false

exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "raise AssertionError",
    "raise NotImplementedError",
    "if __name__ == .__main__.:",
    "if TYPE_CHECKING:",
]

Running with Coverage

# Run tests with coverage
pytest --cov=azcore --cov-report=html --cov-report=term

# View coverage report
open htmlcov/index.html

# Generate coverage badge
coverage-badge -o coverage.svg

Coverage Goals

Overall Coverage: > 80%
Core Modules: > 90%
Utils: > 85%
Examples: Can be lower

Continuous Integration

GitHub Actions

# .github/workflows/test.yml
name: Tests

on:
  push:
    branches: [main, develop]
  pull_request:
    branches: [main, develop]

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.10", "3.11", "3.12"]

    steps:
      - uses: actions/checkout@v3

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -e ".[dev]"

      - name: Run linters
        run: |
          black --check azcore tests
          ruff check azcore tests
          mypy azcore

      - name: Run unit tests
        run: |
          pytest tests/unit -v --cov=azcore --cov-report=xml

      - name: Run integration tests
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          pytest tests/integration --integration -v

      - name: Upload coverage
        uses: codecov/codecov-action@v3
        with:
          file: ./coverage.xml
          fail_ci_if_error: true

Performance Testing

Load Testing

# tests/performance/test_load.py
import pytest
import asyncio
import time
from concurrent.futures import ThreadPoolExecutor

@pytest.mark.performance
class TestPerformance:
    """Performance and load tests."""

    def test_agent_throughput(self, agent):
        """Test agent throughput."""
        num_requests = 100
        start_time = time.time()

        for _ in range(num_requests):
            agent.invoke({"messages": [{"role": "user", "content": "test"}]})

        duration = time.time() - start_time
        throughput = num_requests / duration

        print(f"Throughput: {throughput:.2f} requests/sec")

        # Assert minimum throughput
        assert throughput > 1.0  # At least 1 req/sec

    @pytest.mark.asyncio
    async def test_concurrent_load(self, agent):
        """Test concurrent request handling."""
        num_concurrent = 10

        async def make_request():
            return await agent.ainvoke({
                "messages": [{"role": "user", "content": "test"}]
            })

        start_time = time.time()

        # Execute concurrently
        results = await asyncio.gather(*[
            make_request() for _ in range(num_concurrent)
        ])

        duration = time.time() - start_time

        # Verify all completed
        assert len(results) == num_concurrent

        # Verify reasonable performance
        assert duration < num_concurrent * 2  # Should be faster than sequential

    def test_memory_usage(self, agent):
        """Test memory usage under load."""
        import psutil
        import gc

        process = psutil.Process()

        gc.collect()
        initial_memory = process.memory_info().rss / 1024 / 1024  # MB

        # Execute many requests
        for _ in range(100):
            agent.invoke({"messages": [{"role": "user", "content": "test"}]})

        gc.collect()
        final_memory = process.memory_info().rss / 1024 / 1024  # MB

        memory_increase = final_memory - initial_memory

        print(f"Memory increase: {memory_increase:.2f} MB")

        # Assert reasonable memory usage
        assert memory_increase < 500  # Less than 500 MB increase

Best Practices

1. Test Isolation

# Good: Each test is independent
def test_agent_1(agent):
    result = agent.invoke(state)
    assert result is not None

def test_agent_2(agent):
    result = agent.invoke(state)
    assert len(result["messages"]) > 0

# Bad: Tests depend on each other
class TestSequence:
    result = None

    def test_step_1(self, agent):
        TestSequence.result = agent.invoke(state)

    def test_step_2(self):  # Depends on test_step_1
        assert TestSequence.result is not None

2. Use Fixtures

@pytest.fixture
def configured_agent():
    """Reusable agent fixture."""
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    return ReactAgent(
        name="test",
        llm=llm,
        prompt="You are helpful"
    )


def test_with_fixture(configured_agent):
    """Use fixture in test."""
    result = configured_agent.invoke(state)
    assert result is not None

3. Mock External Dependencies

# Mock LLM API calls
@patch('langchain_openai.ChatOpenAI')
def test_without_api_call(mock_llm):
    """Test without making real API calls."""
    mock_llm.return_value.invoke.return_value = Mock(content="test")

    agent = ReactAgent(name="test", llm=mock_llm())
    result = agent.invoke(state)

    assert result is not None

4. Parametrize Tests

@pytest.mark.parametrize("model,expected", [
    ("gpt-4o-mini", "success"),
    ("gpt-4o", "success"),
    ("claude-3-haiku", "success"),
])
def test_different_models(model, expected):
    """Test with different models."""
    llm = ChatOpenAI(model=model, temperature=0)
    agent = ReactAgent(name="test", llm=llm)

    result = agent.invoke(state)
    assert result is not None

5. Test Error Cases

def test_invalid_input(agent):
    """Test agent handles invalid input."""
    with pytest.raises(ValueError):
        agent.invoke({"invalid": "state"})

def test_api_timeout(agent, monkeypatch):
    """Test agent handles timeout."""
    def mock_invoke(*args, **kwargs):
        raise TimeoutError("API timeout")

    monkeypatch.setattr(agent.llm, "invoke", mock_invoke)

    with pytest.raises(TimeoutError):
        agent.invoke(state)

6. Test Boundaries

def test_empty_input(agent):
    """Test with empty input."""
    result = agent.invoke({"messages": []})
    assert result is not None

def test_large_input(agent):
    """Test with large input."""
    large_message = "word " * 10000
    result = agent.invoke({
        "messages": [{"role": "user", "content": large_message}]
    })
    assert result is not None

7. Keep Tests Fast

# Fast: Use mocks for unit tests
def test_fast(mock_llm):
    agent = ReactAgent(name="test", llm=mock_llm)
    result = agent.invoke(state)
    assert result is not None

# Slow: Only for integration tests
@pytest.mark.slow
def test_with_real_llm():
    llm = ChatOpenAI(model="gpt-4o-mini")
    agent = ReactAgent(name="test", llm=llm)
    result = agent.invoke(state)
    assert result is not None

Running Tests

# Run all unit tests (fast)
pytest tests/unit -v

# Run specific test file
pytest tests/unit/test_agents.py -v

# Run specific test
pytest tests/unit/test_agents.py::TestReactAgent::test_agent_invoke -v

# Run with coverage
pytest tests/unit --cov=azcore --cov-report=html

# Run integration tests (requires API keys)
pytest tests/integration --integration -v

# Run slow tests
pytest tests --slow -v

# Run all tests
pytest tests -v --integration --e2e --slow

# Run in parallel (faster)
pytest tests/unit -n auto
Edit this page on GitHub
AzrienLabs logo

AzrienLabs

Craftedby Team AzrienLabs