Files
cubbi/tests/test_integration.py

136 lines
3.9 KiB
Python

"""Integration tests for cubbi images with different model combinations."""
import subprocess
import pytest
from typing import Dict
IMAGES = ["goose", "aider", "opencode", "crush"]
MODELS = [
"anthropic/claude-sonnet-4-20250514",
"openai/gpt-4o",
"openrouter/openai/gpt-4o",
"litellm/gpt-oss:120b",
]
# Command templates for each tool (based on research)
COMMANDS: Dict[str, str] = {
"goose": "goose run -t '{prompt}' --no-session --quiet",
"aider": "aider --message '{prompt}' --yes-always --no-fancy-input --no-check-update --no-auto-commits",
"opencode": "opencode run '{prompt}'",
"crush": "crush run -q '{prompt}'",
}
def run_cubbi_command(
image: str, model: str, command: str, timeout: int = 20
) -> subprocess.CompletedProcess:
"""Run a cubbi command with specified image, model, and command."""
full_command = [
"uv",
"run",
"-m",
"cubbi.cli",
"session",
"create",
"-i",
image,
"-m",
model,
"--no-connect",
"--no-shell",
"--run",
command,
]
return subprocess.run(
full_command,
capture_output=True,
text=True,
timeout=timeout,
cwd="/home/tito/code/monadical/cubbi",
)
def is_successful_response(result: subprocess.CompletedProcess) -> bool:
"""Check if the cubbi command completed successfully."""
# Check for successful completion markers
return (
result.returncode == 0
and "Initial command finished (exit code: 0)" in result.stdout
and "Command execution complete" in result.stdout
)
@pytest.mark.integration
@pytest.mark.parametrize("image", IMAGES)
@pytest.mark.parametrize("model", MODELS)
def test_image_model_combination(image: str, model: str):
"""Test each image with each model using appropriate command syntax."""
prompt = "What is 2+2?"
# Get the command template for this image
command_template = COMMANDS[image]
# For opencode, we need to substitute the model in the command
if image == "opencode":
command = command_template.format(prompt=prompt, model=model)
else:
command = command_template.format(prompt=prompt)
# Run the test with timeout handling
try:
result = run_cubbi_command(image, model, command)
except subprocess.TimeoutExpired:
pytest.fail(f"Test timed out after 20s for {image} with {model}")
# Check if the command was successful
assert is_successful_response(result), (
f"Failed to run {image} with {model}. "
f"Return code: {result.returncode}\n"
f"Stdout: {result.stdout}\n"
f"Stderr: {result.stderr}"
)
@pytest.mark.integration
def test_all_images_available():
"""Test that all required images are available for testing."""
# Run image list command
result = subprocess.run(
["uv", "run", "-m", "cubbi.cli", "image", "list"],
capture_output=True,
text=True,
timeout=30,
cwd="/home/tito/code/monadical/cubbi",
)
assert result.returncode == 0, f"Failed to list images: {result.stderr}"
for image in IMAGES:
assert image in result.stdout, f"Image {image} not found in available images"
@pytest.mark.integration
def test_claudecode():
"""Test Claude Code without model preselection since it only supports Anthropic."""
command = "claude -p hello"
try:
result = run_cubbi_command("claudecode", MODELS[0], command, timeout=20)
except subprocess.TimeoutExpired:
pytest.fail("Claude Code help command timed out after 20s")
assert is_successful_response(result), (
f"Failed to run Claude Code help command. "
f"Return code: {result.returncode}\n"
f"Stdout: {result.stdout}\n"
f"Stderr: {result.stderr}"
)
if __name__ == "__main__":
# Allow running the test file directly for development
pytest.main([__file__, "-v", "-m", "integration"])