mirror of
https://github.com/Monadical-SAS/cubbi.git
synced 2025-12-20 12:19:07 +00:00
136 lines
3.9 KiB
Python
136 lines
3.9 KiB
Python
"""Integration tests for cubbi images with different model combinations."""
|
|
|
|
import subprocess
|
|
import pytest
|
|
from typing import Dict
|
|
|
|
|
|
IMAGES = ["goose", "aider", "opencode", "crush"]
|
|
|
|
MODELS = [
|
|
"anthropic/claude-sonnet-4-20250514",
|
|
"openai/gpt-4o",
|
|
"openrouter/openai/gpt-4o",
|
|
"litellm/gpt-oss:120b",
|
|
]
|
|
|
|
# Command templates for each tool (based on research)
|
|
COMMANDS: Dict[str, str] = {
|
|
"goose": "goose run -t '{prompt}' --no-session --quiet",
|
|
"aider": "aider --message '{prompt}' --yes-always --no-fancy-input --no-check-update --no-auto-commits",
|
|
"opencode": "opencode run '{prompt}'",
|
|
"crush": "crush run -q '{prompt}'",
|
|
}
|
|
|
|
|
|
def run_cubbi_command(
|
|
image: str, model: str, command: str, timeout: int = 20
|
|
) -> subprocess.CompletedProcess:
|
|
"""Run a cubbi command with specified image, model, and command."""
|
|
full_command = [
|
|
"uv",
|
|
"run",
|
|
"-m",
|
|
"cubbi.cli",
|
|
"session",
|
|
"create",
|
|
"-i",
|
|
image,
|
|
"-m",
|
|
model,
|
|
"--no-connect",
|
|
"--no-shell",
|
|
"--run",
|
|
command,
|
|
]
|
|
|
|
return subprocess.run(
|
|
full_command,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
cwd="/home/tito/code/monadical/cubbi",
|
|
)
|
|
|
|
|
|
def is_successful_response(result: subprocess.CompletedProcess) -> bool:
|
|
"""Check if the cubbi command completed successfully."""
|
|
# Check for successful completion markers
|
|
return (
|
|
result.returncode == 0
|
|
and "Initial command finished (exit code: 0)" in result.stdout
|
|
and "Command execution complete" in result.stdout
|
|
)
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.parametrize("image", IMAGES)
|
|
@pytest.mark.parametrize("model", MODELS)
|
|
def test_image_model_combination(image: str, model: str):
|
|
"""Test each image with each model using appropriate command syntax."""
|
|
prompt = "What is 2+2?"
|
|
|
|
# Get the command template for this image
|
|
command_template = COMMANDS[image]
|
|
|
|
# For opencode, we need to substitute the model in the command
|
|
if image == "opencode":
|
|
command = command_template.format(prompt=prompt, model=model)
|
|
else:
|
|
command = command_template.format(prompt=prompt)
|
|
|
|
# Run the test with timeout handling
|
|
try:
|
|
result = run_cubbi_command(image, model, command)
|
|
except subprocess.TimeoutExpired:
|
|
pytest.fail(f"Test timed out after 20s for {image} with {model}")
|
|
|
|
# Check if the command was successful
|
|
assert is_successful_response(result), (
|
|
f"Failed to run {image} with {model}. "
|
|
f"Return code: {result.returncode}\n"
|
|
f"Stdout: {result.stdout}\n"
|
|
f"Stderr: {result.stderr}"
|
|
)
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_all_images_available():
|
|
"""Test that all required images are available for testing."""
|
|
# Run image list command
|
|
result = subprocess.run(
|
|
["uv", "run", "-m", "cubbi.cli", "image", "list"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30,
|
|
cwd="/home/tito/code/monadical/cubbi",
|
|
)
|
|
|
|
assert result.returncode == 0, f"Failed to list images: {result.stderr}"
|
|
|
|
for image in IMAGES:
|
|
assert image in result.stdout, f"Image {image} not found in available images"
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_claudecode():
|
|
"""Test Claude Code without model preselection since it only supports Anthropic."""
|
|
command = "claude -p hello"
|
|
|
|
try:
|
|
result = run_cubbi_command("claudecode", MODELS[0], command, timeout=20)
|
|
except subprocess.TimeoutExpired:
|
|
pytest.fail("Claude Code help command timed out after 20s")
|
|
|
|
assert is_successful_response(result), (
|
|
f"Failed to run Claude Code help command. "
|
|
f"Return code: {result.returncode}\n"
|
|
f"Stdout: {result.stdout}\n"
|
|
f"Stderr: {result.stderr}"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Allow running the test file directly for development
|
|
pytest.main([__file__, "-v", "-m", "integration"])
|