cubbi/tests/test_integration.py

"""Integration tests for cubbi images with different model combinations."""

import subprocess
import pytest
from typing import Dict


IMAGES = ["goose", "aider", "opencode", "crush"]

MODELS = [
    "anthropic/claude-sonnet-4-20250514",
    "openai/gpt-4o",
    "openrouter/openai/gpt-4o",
    "litellm/gpt-oss:120b",
]

# Command templates for each tool (based on research)
COMMANDS: Dict[str, str] = {
    "goose": "goose run -t '{prompt}' --no-session --quiet",
    "aider": "aider --message '{prompt}' --yes-always --no-fancy-input --no-check-update --no-auto-commits",
    "opencode": "opencode run '{prompt}'",
    "crush": "crush run -q '{prompt}'",
}


def run_cubbi_command(
    image: str, model: str, command: str, timeout: int = 20
) -> subprocess.CompletedProcess:
    """Run a cubbi command with specified image, model, and command."""
    full_command = [
        "uv",
        "run",
        "-m",
        "cubbi.cli",
        "session",
        "create",
        "-i",
        image,
        "-m",
        model,
        "--no-connect",
        "--no-shell",
        "--run",
        command,
    ]

    return subprocess.run(
        full_command,
        capture_output=True,
        text=True,
        timeout=timeout,
        cwd="/home/tito/code/monadical/cubbi",
    )


def is_successful_response(result: subprocess.CompletedProcess) -> bool:
    """Check if the cubbi command completed successfully."""
    # Check for successful completion markers
    return (
        result.returncode == 0
        and "Initial command finished (exit code: 0)" in result.stdout
        and "Command execution complete" in result.stdout
    )


@pytest.mark.integration
@pytest.mark.parametrize("image", IMAGES)
@pytest.mark.parametrize("model", MODELS)
def test_image_model_combination(image: str, model: str):
    """Test each image with each model using appropriate command syntax."""
    prompt = "What is 2+2?"

    # Get the command template for this image
    command_template = COMMANDS[image]

    # For opencode, we need to substitute the model in the command
    if image == "opencode":
        command = command_template.format(prompt=prompt, model=model)
    else:
        command = command_template.format(prompt=prompt)

    # Run the test with timeout handling
    try:
        result = run_cubbi_command(image, model, command)
    except subprocess.TimeoutExpired:
        pytest.fail(f"Test timed out after 20s for {image} with {model}")

    # Check if the command was successful
    assert is_successful_response(result), (
        f"Failed to run {image} with {model}. "
        f"Return code: {result.returncode}\n"
        f"Stdout: {result.stdout}\n"
        f"Stderr: {result.stderr}"
    )


@pytest.mark.integration
def test_all_images_available():
    """Test that all required images are available for testing."""
    # Run image list command
    result = subprocess.run(
        ["uv", "run", "-m", "cubbi.cli", "image", "list"],
        capture_output=True,
        text=True,
        timeout=30,
        cwd="/home/tito/code/monadical/cubbi",
    )

    assert result.returncode == 0, f"Failed to list images: {result.stderr}"

    for image in IMAGES:
        assert image in result.stdout, f"Image {image} not found in available images"


@pytest.mark.integration
def test_claudecode():
    """Test Claude Code without model preselection since it only supports Anthropic."""
    command = "claude -p hello"

    try:
        result = run_cubbi_command("claudecode", MODELS[0], command, timeout=20)
    except subprocess.TimeoutExpired:
        pytest.fail("Claude Code help command timed out after 20s")

    assert is_successful_response(result), (
        f"Failed to run Claude Code help command. "
        f"Return code: {result.returncode}\n"
        f"Stdout: {result.stdout}\n"
        f"Stderr: {result.stderr}"
    )


if __name__ == "__main__":
    # Allow running the test file directly for development
    pytest.main([__file__, "-v", "-m", "integration"])