cubbi/test.sh

#!/bin/bash

# Comprehensive test script for all cubbi images with different model combinations
# Tests single prompt/response functionality for each tool

set -e

# Configuration
TIMEOUT="180s"
TEST_PROMPT="What is 2+2?"
LOG_FILE="test_results.log"
TEMP_DIR="/tmp/cubbi_test_$$"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Test matrix
declare -a IMAGES=("goose" "aider" "claudecode" "opencode" "crush")
declare -a MODELS=(
    "anthropic/claude-sonnet-4-20250514"
    "openai/gpt-4o"
    "openrouter/openai/gpt-4o"
    "litellm/gpt-oss:120b"
)

# Command templates for each tool (based on research)
declare -A COMMANDS=(
    ["goose"]="goose run -t '$TEST_PROMPT' --no-session --quiet"
    ["aider"]="aider --message '$TEST_PROMPT' --yes-always --no-fancy-input --no-check-update --no-auto-commits"
    ["claudecode"]="claude -p '$TEST_PROMPT'"
    ["opencode"]="opencode run -m %MODEL% '$TEST_PROMPT'"
    ["crush"]="crush run '$TEST_PROMPT'"
)

# Initialize results tracking
declare -A RESULTS
TOTAL_TESTS=0
PASSED_TESTS=0
FAILED_TESTS=0

# Setup
echo -e "${BLUE}=== Cubbi Plugin Configuration Test Suite ===${NC}"
echo "Starting comprehensive test at $(date)"
echo "Test prompt: '$TEST_PROMPT'"
echo "Timeout: $TIMEOUT"
echo ""

mkdir -p "$TEMP_DIR"
> "$LOG_FILE"

# Function to log with timestamp
log() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') $1" >> "$LOG_FILE"
}

# Function to run a single test
run_test() {
    local image="$1"
    local model="$2"
    local command="$3"

    # Replace %MODEL% placeholder in command
    command="${command//%MODEL%/$model}"

    local test_name="${image}_${model//\//_}"
    local log_file="${TEMP_DIR}/${test_name}.log"

    echo -ne "Testing ${BLUE}$image${NC} with ${YELLOW}$model${NC}... "

    log "Starting test: $test_name"
    log "Command: $command"

    # Run the test with timeout
    local start_time=$(date +%s)
    if timeout "$TIMEOUT" uv run -m cubbi.cli session create \
        -i "$image" \
        -m "$model" \
        --no-connect \
        --no-shell \
        --run "$command" > "$log_file" 2>&1; then

        local end_time=$(date +%s)
        local duration=$((end_time - start_time))

        # Check if we got a meaningful response
        if grep -q "Initial command finished (exit code: 0)" "$log_file" &&
           grep -q "Command execution complete" "$log_file"; then
            echo -e "${GREEN}PASS${NC} (${duration}s)"
            RESULTS["$test_name"]="PASS"
            ((PASSED_TESTS++))
            log "Test passed in ${duration}s"
        else
            echo -e "${RED}FAIL${NC} (no valid output)"
            RESULTS["$test_name"]="FAIL_NO_OUTPUT"
            ((FAILED_TESTS++))
            log "Test failed - no valid output"
        fi
    else
        local end_time=$(date +%s)
        local duration=$((end_time - start_time))
        echo -e "${RED}FAIL${NC} (timeout/error after ${duration}s)"
        RESULTS["$test_name"]="FAIL_TIMEOUT"
        ((FAILED_TESTS++))
        log "Test failed - timeout or error after ${duration}s"
    fi

    ((TOTAL_TESTS++))

    # Save detailed log
    log "=== Test output for $test_name ==="
    cat "$log_file" >> "$LOG_FILE"
    log "=== End test output ==="
    log ""
}

# Function to print test matrix header
print_matrix_header() {
    echo ""
    echo -e "${BLUE}=== Test Results Matrix ===${NC}"
    printf "%-15s" "Image/Model"
    for model in "${MODELS[@]}"; do
        # Shorten model name for display
        short_model=$(echo "$model" | sed 's/.*\///')
        printf "%-20s" "$short_model"
    done
    echo ""
    printf "%-15s" "==============="
    for model in "${MODELS[@]}"; do
        printf "%-20s" "===================="
    done
    echo ""
}

# Function to print test matrix row
print_matrix_row() {
    local image="$1"
    printf "%-15s" "$image"

    for model in "${MODELS[@]}"; do
        local test_name="${image}_${model//\//_}"
        local result="${RESULTS[$test_name]}"

        case "$result" in
            "PASS")
                printf "%-20s" "$(echo -e "${GREEN}PASS${NC}")"
                ;;
            "FAIL_NO_OUTPUT")
                printf "%-20s" "$(echo -e "${RED}FAIL (no output)${NC}")"
                ;;
            "FAIL_TIMEOUT")
                printf "%-20s" "$(echo -e "${RED}FAIL (timeout)${NC}")"
                ;;
            *)
                printf "%-20s" "$(echo -e "${YELLOW}UNKNOWN${NC}")"
                ;;
        esac
    done
    echo ""
}

# Main test execution
echo -e "${YELLOW}Running ${#IMAGES[@]} images × ${#MODELS[@]} models = $((${#IMAGES[@]} * ${#MODELS[@]})) total tests${NC}"
echo ""

for image in "${IMAGES[@]}"; do
    echo -e "${BLUE}--- Testing $image ---${NC}"

    for model in "${MODELS[@]}"; do
        command="${COMMANDS[$image]}"
        run_test "$image" "$model" "$command"
    done
    echo ""
done

# Print results summary
print_matrix_header
for image in "${IMAGES[@]}"; do
    print_matrix_row "$image"
done

echo ""
echo -e "${BLUE}=== Final Summary ===${NC}"
echo "Total tests: $TOTAL_TESTS"
echo -e "Passed: ${GREEN}$PASSED_TESTS${NC}"
echo -e "Failed: ${RED}$FAILED_TESTS${NC}"

if [ $FAILED_TESTS -eq 0 ]; then
    echo -e "${GREEN}All tests passed! 🎉${NC}"
    exit_code=0
else
    echo -e "${RED}$FAILED_TESTS tests failed${NC}"
    exit_code=1
fi

echo ""
echo "Detailed logs saved to: $LOG_FILE"
echo "Test completed at $(date)"

# Cleanup
rm -rf "$TEMP_DIR"

log "Test suite completed. Total: $TOTAL_TESTS, Passed: $PASSED_TESTS, Failed: $FAILED_TESTS"

exit $exit_code