Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Agent Branch Tests to Restore #21437

Copy link
Copy link
@jmchilton

Description

@jmchilton
Issue body actions

TestAgentUnitMocked looks and is documented as a unit test so I'm refactoring into a unit test. It has a lot of mocking that is pretty good as mocking goes - it does just have tests mixed in that fail if you don't have an actual agent in there. These integration tests should go somewhere else I think or we should verify the existing integration tests cover this functionality.

    @pytest.mark.asyncio
    async def test_router_agent_routing_decisions(self):
        """Test that router agent makes correct routing decisions."""
        router = QueryRouterAgent(self.deps)

        test_cases = [
            ("Create a BWA tool", "custom_tool"),
            ("Why did my job fail?", "error_analysis"),
            ("What tools can I use for RNA-seq?", "tool_recommendation"),
            ("How do I align sequences?", "tool_recommendation"),
            ("Find a tutorial on RNA-seq", "gtn_training"),
        ]

        for query, expected_agent in test_cases:
            decision = await router.route_query(query)
            assert (
                decision.primary_agent == expected_agent
            ), f"Query '{query}' should route to {expected_agent}, got {decision.primary_agent}"
            # Just verify we got a reasoning string, don't check exact phrasing (LLM variability)
            assert len(decision.reasoning) > 0, f"Query '{query}' should have reasoning"
    @pytest.mark.asyncio
    async def test_router_orchestration_detection_conservative(self):
        """Test that router orchestration detection is appropriately conservative."""
        router = QueryRouterAgent(self.deps)

        # Test cases that should NOT trigger orchestration (conservative behavior)
        non_orchestration_cases = [
            "What tools can I use for RNA-seq analysis?",  # Single domain: tool_recommendation
            "My BWA job failed with error code 1",  # Single domain: error_analysis
            "How do I create a custom Galaxy tool?",  # Single domain: custom_tool
            "Find me tutorials about RNA-seq",  # Single domain: gtn_training
            "I need help with my dataset format",  # Single domain: dataset_analyzer
            "Tell me about alignment tools",  # Single domain: tool_recommendation
            "My tool crashed",  # Single domain: error_analysis
            "Show me Galaxy tutorials",  # Single domain: gtn_training
        ]

        for query in non_orchestration_cases:
            decision = await router.route_query(query)
            assert (
                decision.primary_agent != "orchestrator"
            ), f"Query '{query}' should NOT trigger orchestration (got {decision.primary_agent})"
    @pytest.mark.asyncio
    async def test_router_orchestration_scoring_logic(self):
        """Test specific orchestration scoring patterns."""
        from galaxy.agents.router import QueryRouterAgent

        router = QueryRouterAgent(self.deps)

        # Test high-confidence multi-domain queries
        multi_domain_cases = [
            {
                "query": "My FastQC tool failed with memory error and I need alternative quality control tools and tutorials on quality assessment",
                "expected_domains": ["error_analysis", "tool_recommendation", "gtn_training"],
                "should_orchestrate": True,  # 3+ high-confidence domains
                "reason": "3+ high-confidence domains should trigger orchestration",
            },
            {
                "query": "Create a BWA tool and also provide training materials",
                "expected_domains": ["custom_tool", "gtn_training"],
                "should_orchestrate": False,  # Only 2 domains, needs explicit language for orchestration
                "reason": "2 domains without very explicit orchestration language should not trigger",
            },
        ]

        for case in multi_domain_cases:
            decision = await router.route_query(str(case["query"]))

            is_orchestrated = decision.primary_agent == "orchestrator"

            # Log detailed information for debugging
            print(f"\nQuery: {case['query']}")
            print(f"Expected orchestration: {case['should_orchestrate']}")
            print(f"Actual agent: {decision.primary_agent}")
            print(f"Reasoning: {decision.reasoning}")

            # The assertion depends on current conservative tuning
            # This test documents the current behavior rather than enforcing it
            if case["should_orchestrate"]:
                # Might still not orchestrate due to conservative thresholds
                print(
                    f"Expected orchestration but got {decision.primary_agent} - this may be due to conservative tuning"
                )
            else:
                assert not is_orchestrated, case["reason"]
    @pytest.mark.asyncio
    async def test_router_orchestration_detection_explicit_triggers(self):
        """Test orchestration detection with explicit multi-part requests."""
        router = QueryRouterAgent(self.deps)

        # Test cases with explicit orchestration language that SHOULD trigger orchestration
        # These require both multiple domains AND explicit conjunction language
        orchestration_cases = [
            # Explicit conjunctions with multiple domains
            "My RNA-seq tool failed and also help me find alternative tools and show me tutorials",
            "Fix this alignment error and then recommend better tools and provide training materials",
            "I need a complete workflow for variant calling plus also help me troubleshoot any errors",
            # Comprehensive requests spanning multiple domains
            "Provide a full solution for RNA-seq analysis including error handling and tutorial recommendations",
            "Walk me through the entire process from tool selection to error troubleshooting to learning resources",
            # Problem + solution + learning patterns
            "Help me fix this GATK error and teach me how to prevent it in the future",
            "Solve this memory issue and learn about best practices for large datasets",
        ]

        # Note: These might not trigger orchestration if the conservative thresholds are very high
        # The test verifies the behavior is consistent with the current conservative tuning
        for query in orchestration_cases:
            decision = await router.route_query(query)
            # Log the decision for debugging
            print(f"Query: '{query}' -> Agent: {decision.primary_agent}, Reasoning: {decision.reasoning}")
    @pytest.mark.asyncio
    async def test_router_orchestration_explicit_indicators(self):
        """Test specific orchestration indicator patterns."""
        router = QueryRouterAgent(self.deps)

        # Test explicit orchestration language patterns
        explicit_patterns = [
            "and also help me",
            "and then show me",
            "plus also provide",
            "as well as give me",
            "complete workflow for",
            "full solution including",
            "entire process from",
            "comprehensive help with",
            "step by step workflow",
            "start to finish guide",
            "beginning to end process",
        ]

        # Create test queries that include these patterns
        for pattern in explicit_patterns:
            # Create a query that has the pattern but might not have enough domain signals
            test_query = f"I need help with RNA-seq analysis {pattern} tutorials"

            decision = await router.route_query(test_query)

            # Log the results - these might not trigger orchestration due to conservative scoring
            print(f"Pattern '{pattern}' in query: {decision.primary_agent}")

            # The presence of explicit language should at least contribute to scoring
            # But conservative thresholds might still prevent orchestration
    @pytest.mark.asyncio
    async def test_gtn_agent_basic(self):
        """Test GTN training agent basic functionality."""
        from galaxy.agents.gtn_training import GTNTrainingAgent

        agent = GTNTrainingAgent(self.deps)

        # Test with a basic query
        response = await agent.process("How do I analyze RNA-seq data?")

        assert response.content is not None
        assert response.agent_type == "gtn_training"
        # Should recommend GTN tutorials
        assert "tutorial" in response.content.lower() or "training" in response.content.lower()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      Morty Proxy This is a proxified and sanitized view of the page, visit original site.