This guide will help you set up and use Reinforcement Learning in Azcore in under 5 minutes.
🚀 Quick Start
Step 1: Install Dependencies
# RL requires sentence-transformers for semantic matching
pip install sentence-transformers
# Or install with all RL dependencies
pip install azcore[rl]
Step 2: Create RLManager
from azcore.rl.rl_manager import RLManager
# Create RL manager
rl_manager = RLManager(
tool_names=["search", "calculate", "weather"],
q_table_path="rl_data/my_agent.pkl",
exploration_rate=0.15,
use_embeddings=True
)
Step 3: Create Reward Calculator
from azcore.rl.rewards import HeuristicRewardCalculator
reward_calc = HeuristicRewardCalculator(
success_reward=1.0,
failure_penalty=-0.5
)
Step 4: Create RL-Enabled Agent
from azcore.agents.agent_factory import AgentFactory
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4")
factory = AgentFactory(default_llm=llm)
agent = factory.create_react_agent(
name="rl_agent",
tools=[search_tool, calc_tool, weather_tool],
rl_enabled=True,
rl_manager=rl_manager,
reward_calculator=reward_calc
)
Step 5: Use and Train
from langchain_core.messages import HumanMessage
# Agent learns automatically from each interaction
queries = [
"What's 2+2?",
"Weather in NYC?",
"Search for Python tutorials"
]
for query in queries:
result = agent.invoke({
"messages": [HumanMessage(content=query)]
})
print(result["messages"][-1].content)
# RL updates happen automatically!
📊 Check Learning Progress
# Get statistics
stats = rl_manager.get_statistics()
print(f"States learned: {stats['total_states']}")
print(f"Exploration rate: {stats['exploration_rate']:.2%}")
# View Q-values for a query
q_values = rl_manager.get_q_values("What's the weather?")
for tool, value in sorted(q_values.items(), key=lambda x: x[1], reverse=True):
print(f"{tool}: {value:.3f}")
# Export readable Q-table
rl_manager.export_readable("rl_data/qtable_readable.txt")
🎯 RL-Enabled Teams
from azcore.agents.team_builder import TeamBuilder
# Create RL-enabled team
team = (TeamBuilder("security_team")
.with_llm(llm)
.with_tools([camera_tool, alert_tool, log_tool])
.with_rl(rl_manager, reward_calc)
.with_prompt("You are a security team...")
.build())
# Team learns optimal tool selection automatically
📈 Training Loop
# Explicit training loop
training_data = [
("Calculate 15 * 23", "calculate"),
("Weather in London", "weather"),
("Search for AI news", "search")
]
for query, expected_tool in training_data:
# Select tools
selected_tools, state_key = rl_manager.select_tools(query, top_n=2)
# Check if correct tool selected
reward = 1.0 if expected_tool in selected_tools else -0.5
# Update for each selected tool
for tool in selected_tools:
rl_manager.update(state_key, tool, reward)
# Save progress
rl_manager.force_persist()
🔄 Loading Existing Q-Table
# Q-table loads automatically from path
rl_manager = RLManager(
tool_names=tools,
q_table_path="rl_data/trained_agent.pkl" # Loads if exists
)
# Continue learning from where you left off
⚙️ Configuration Options
Basic Configuration
rl_manager = RLManager(
tool_names=["tool1", "tool2", "tool3"],
exploration_rate=0.15, # 15% exploration
learning_rate=0.1, # Learning speed
discount_factor=0.99, # Future reward importance
use_embeddings=True # Semantic matching
)
Advanced Configuration
from azcore.rl.rl_manager import ExplorationStrategy
rl_manager = RLManager(
tool_names=tools,
q_table_path="rl_data/agent.pkl",
# Exploration
exploration_strategy=ExplorationStrategy.EPSILON_DECAY,
exploration_rate=0.3,
min_exploration_rate=0.01,
epsilon_decay_rate=0.995,
# Performance
enable_async_persistence=True,
batch_update_size=10,
state_cache_size=1000,
# Maintenance
enable_q_table_pruning=True,
prune_threshold=100,
min_visits_to_keep=5
)
💡 Tips
- Start with high exploration (0.3-0.5) then decay
- Use semantic embeddings for better generalization
- Monitor Q-values to verify learning progress
- Save Q-tables frequently to preserve knowledge
- Test with diverse queries for robust learning
That's it! Your agent is now learning and optimizing automatically.