-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Open
Description
TestAgentUnitMocked looks and is documented as a unit test so I'm refactoring into a unit test. It has a lot of mocking that is pretty good as mocking goes - it does just have tests mixed in that fail if you don't have an actual agent in there. These integration tests should go somewhere else I think or we should verify the existing integration tests cover this functionality.
@pytest.mark.asyncio
async def test_router_agent_routing_decisions(self):
"""Test that router agent makes correct routing decisions."""
router = QueryRouterAgent(self.deps)
test_cases = [
("Create a BWA tool", "custom_tool"),
("Why did my job fail?", "error_analysis"),
("What tools can I use for RNA-seq?", "tool_recommendation"),
("How do I align sequences?", "tool_recommendation"),
("Find a tutorial on RNA-seq", "gtn_training"),
]
for query, expected_agent in test_cases:
decision = await router.route_query(query)
assert (
decision.primary_agent == expected_agent
), f"Query '{query}' should route to {expected_agent}, got {decision.primary_agent}"
# Just verify we got a reasoning string, don't check exact phrasing (LLM variability)
assert len(decision.reasoning) > 0, f"Query '{query}' should have reasoning"
@pytest.mark.asyncio
async def test_router_orchestration_detection_conservative(self):
"""Test that router orchestration detection is appropriately conservative."""
router = QueryRouterAgent(self.deps)
# Test cases that should NOT trigger orchestration (conservative behavior)
non_orchestration_cases = [
"What tools can I use for RNA-seq analysis?", # Single domain: tool_recommendation
"My BWA job failed with error code 1", # Single domain: error_analysis
"How do I create a custom Galaxy tool?", # Single domain: custom_tool
"Find me tutorials about RNA-seq", # Single domain: gtn_training
"I need help with my dataset format", # Single domain: dataset_analyzer
"Tell me about alignment tools", # Single domain: tool_recommendation
"My tool crashed", # Single domain: error_analysis
"Show me Galaxy tutorials", # Single domain: gtn_training
]
for query in non_orchestration_cases:
decision = await router.route_query(query)
assert (
decision.primary_agent != "orchestrator"
), f"Query '{query}' should NOT trigger orchestration (got {decision.primary_agent})"
@pytest.mark.asyncio
async def test_router_orchestration_scoring_logic(self):
"""Test specific orchestration scoring patterns."""
from galaxy.agents.router import QueryRouterAgent
router = QueryRouterAgent(self.deps)
# Test high-confidence multi-domain queries
multi_domain_cases = [
{
"query": "My FastQC tool failed with memory error and I need alternative quality control tools and tutorials on quality assessment",
"expected_domains": ["error_analysis", "tool_recommendation", "gtn_training"],
"should_orchestrate": True, # 3+ high-confidence domains
"reason": "3+ high-confidence domains should trigger orchestration",
},
{
"query": "Create a BWA tool and also provide training materials",
"expected_domains": ["custom_tool", "gtn_training"],
"should_orchestrate": False, # Only 2 domains, needs explicit language for orchestration
"reason": "2 domains without very explicit orchestration language should not trigger",
},
]
for case in multi_domain_cases:
decision = await router.route_query(str(case["query"]))
is_orchestrated = decision.primary_agent == "orchestrator"
# Log detailed information for debugging
print(f"\nQuery: {case['query']}")
print(f"Expected orchestration: {case['should_orchestrate']}")
print(f"Actual agent: {decision.primary_agent}")
print(f"Reasoning: {decision.reasoning}")
# The assertion depends on current conservative tuning
# This test documents the current behavior rather than enforcing it
if case["should_orchestrate"]:
# Might still not orchestrate due to conservative thresholds
print(
f"Expected orchestration but got {decision.primary_agent} - this may be due to conservative tuning"
)
else:
assert not is_orchestrated, case["reason"]
@pytest.mark.asyncio
async def test_router_orchestration_detection_explicit_triggers(self):
"""Test orchestration detection with explicit multi-part requests."""
router = QueryRouterAgent(self.deps)
# Test cases with explicit orchestration language that SHOULD trigger orchestration
# These require both multiple domains AND explicit conjunction language
orchestration_cases = [
# Explicit conjunctions with multiple domains
"My RNA-seq tool failed and also help me find alternative tools and show me tutorials",
"Fix this alignment error and then recommend better tools and provide training materials",
"I need a complete workflow for variant calling plus also help me troubleshoot any errors",
# Comprehensive requests spanning multiple domains
"Provide a full solution for RNA-seq analysis including error handling and tutorial recommendations",
"Walk me through the entire process from tool selection to error troubleshooting to learning resources",
# Problem + solution + learning patterns
"Help me fix this GATK error and teach me how to prevent it in the future",
"Solve this memory issue and learn about best practices for large datasets",
]
# Note: These might not trigger orchestration if the conservative thresholds are very high
# The test verifies the behavior is consistent with the current conservative tuning
for query in orchestration_cases:
decision = await router.route_query(query)
# Log the decision for debugging
print(f"Query: '{query}' -> Agent: {decision.primary_agent}, Reasoning: {decision.reasoning}")
@pytest.mark.asyncio
async def test_router_orchestration_explicit_indicators(self):
"""Test specific orchestration indicator patterns."""
router = QueryRouterAgent(self.deps)
# Test explicit orchestration language patterns
explicit_patterns = [
"and also help me",
"and then show me",
"plus also provide",
"as well as give me",
"complete workflow for",
"full solution including",
"entire process from",
"comprehensive help with",
"step by step workflow",
"start to finish guide",
"beginning to end process",
]
# Create test queries that include these patterns
for pattern in explicit_patterns:
# Create a query that has the pattern but might not have enough domain signals
test_query = f"I need help with RNA-seq analysis {pattern} tutorials"
decision = await router.route_query(test_query)
# Log the results - these might not trigger orchestration due to conservative scoring
print(f"Pattern '{pattern}' in query: {decision.primary_agent}")
# The presence of explicit language should at least contribute to scoring
# But conservative thresholds might still prevent orchestration
@pytest.mark.asyncio
async def test_gtn_agent_basic(self):
"""Test GTN training agent basic functionality."""
from galaxy.agents.gtn_training import GTNTrainingAgent
agent = GTNTrainingAgent(self.deps)
# Test with a basic query
response = await agent.process("How do I analyze RNA-seq data?")
assert response.content is not None
assert response.agent_type == "gtn_training"
# Should recommend GTN tutorials
assert "tutorial" in response.content.lower() or "training" in response.content.lower()
Metadata
Metadata
Assignees
Labels
No labels