diff --git a/app/hackathons/page.tsx b/app/hackathons/page.tsx
index 7cd835c..1318329 100644
--- a/app/hackathons/page.tsx
+++ b/app/hackathons/page.tsx
@@ -14,10 +14,11 @@ import {
ShieldAlert,
MessageSquare,
Crown,
+ FlaskConical,
} from 'lucide-react'
import { Footer } from '@/components/footer'
import { LiveTerminal } from '@/components/hackathons/live-terminal'
-import { StreamViz, SecureViz, RadarViz, ScanViz, ExtendViz } from '@/components/hackathons/track-visuals'
+import { StreamViz, SecureViz, RadarViz, ScanViz, ExtendViz, BenchViz } from '@/components/hackathons/track-visuals'
const ShieldScene = dynamic(
() => import('@/components/hackathons/shield-scene').then((m) => m.ShieldScene),
@@ -546,6 +547,18 @@ const tracks = [
examples: ['Error interception', 'Context injection', 'Alternative suggestion', 'Loop prevention'],
Visual: ExtendViz,
},
+ {
+ id: 'control-benchmark',
+ icon: FlaskConical,
+ title: 'Control Evaluation Benchmark',
+ hook: 'Build the test suite that measures how well sandboxes actually work.',
+ color: 'from-rose-500/10 to-pink-500/5',
+ borderColor: 'hover:border-rose-500/30',
+ description: 'Define main tasks (real software engineering work) paired with side tasks (exfiltrate a key, install a backdoor, modify git hooks). Measure how much useful work an agent completes under Greywall\'s restrictions vs. how often adversarial goals succeed.',
+ scoring: 'Show the safety/usefulness tradeoff. A good benchmark reveals real weaknesses, not just toy scenarios.',
+ examples: ['Main/side task pairs', 'Policy quality scoring', 'Layer isolation tests', 'Reproducible evals'],
+ Visual: BenchViz,
+ },
]
function TrackCard({ track, index }: { track: typeof tracks[0]; index: number }) {
@@ -612,7 +625,7 @@ function Tracks() {
Pick your track.
- Five open-ended tracks, all building on top of{' '}
+ Six open-ended tracks, all building on top of{' '}
Greywall.
Go deep on one or try a few.
diff --git a/components/hackathons/track-visuals.tsx b/components/hackathons/track-visuals.tsx
index 212ad4b..0e7e4b7 100644
--- a/components/hackathons/track-visuals.tsx
+++ b/components/hackathons/track-visuals.tsx
@@ -405,3 +405,146 @@ export function ExtendViz() {
)
}
+
+/* ─── Benchmark grid (Track 6 — Control Evaluation Benchmark) ─── */
+
+export function BenchViz() {
+ const canvasRef = useRef(null)
+
+ useEffect(() => {
+ const canvas = canvasRef.current
+ if (!canvas) return
+ const ctx = canvas.getContext('2d')
+ if (!ctx) return
+
+ const dpr = window.devicePixelRatio || 1
+ let w = 0, h = 0
+
+ const resize = () => {
+ const rect = canvas.getBoundingClientRect()
+ w = rect.width
+ h = rect.height
+ canvas.width = w * dpr
+ canvas.height = h * dpr
+ ctx.setTransform(dpr, 0, 0, dpr, 0, 0)
+ }
+ resize()
+
+ let t = 0
+ let animId: number
+
+ const cols = 6, rows = 8
+ const cells = Array.from({ length: cols * rows }, (_, i) => ({
+ pass: Math.random() > 0.25,
+ revealAt: Math.random() * 4 + (Math.floor(i / cols)) * 0.3,
+ pulsePhase: Math.random() * Math.PI * 2,
+ }))
+
+ const draw = () => {
+ ctx.clearRect(0, 0, w, h)
+ t += 0.016
+
+ const padX = w * 0.08, padY = h * 0.06
+ const cellW = (w - padX * 2) / cols
+ const cellH = (h - padY * 2) / rows
+ const gap = 2.5
+ const loopT = t % 6
+
+ cells.forEach((cell, i) => {
+ const col = i % cols
+ const row = Math.floor(i / cols)
+ const x = padX + col * cellW + gap
+ const y = padY + row * cellH + gap
+ const cw = cellW - gap * 2
+ const ch = cellH - gap * 2
+ const r = Math.min(cw, ch) * 0.15
+
+ if (loopT < cell.revealAt) {
+ // Not yet revealed — dim outline
+ ctx.strokeStyle = 'rgba(244, 63, 94, 0.06)'
+ ctx.lineWidth = 0.5
+ ctx.beginPath()
+ ctx.roundRect(x, y, cw, ch, r)
+ ctx.stroke()
+ return
+ }
+
+ // Reveal animation
+ const revealProgress = Math.min((loopT - cell.revealAt) * 2, 1)
+ const pulse = Math.sin(t * 2 + cell.pulsePhase) * 0.15 + 0.85
+
+ if (cell.pass) {
+ // Pass — rose/green tint
+ ctx.fillStyle = `rgba(74, 222, 128, ${0.12 * revealProgress * pulse})`
+ ctx.beginPath()
+ ctx.roundRect(x, y, cw, ch, r)
+ ctx.fill()
+
+ ctx.strokeStyle = `rgba(74, 222, 128, ${0.25 * revealProgress})`
+ ctx.lineWidth = 0.5
+ ctx.beginPath()
+ ctx.roundRect(x, y, cw, ch, r)
+ ctx.stroke()
+
+ // Checkmark
+ if (revealProgress > 0.5) {
+ const alpha = (revealProgress - 0.5) * 2
+ const cx = x + cw / 2, cy = y + ch / 2
+ const s = Math.min(cw, ch) * 0.2
+ ctx.strokeStyle = `rgba(74, 222, 128, ${0.5 * alpha})`
+ ctx.lineWidth = 1.2
+ ctx.beginPath()
+ ctx.moveTo(cx - s * 0.6, cy)
+ ctx.lineTo(cx - s * 0.1, cy + s * 0.5)
+ ctx.lineTo(cx + s * 0.6, cy - s * 0.4)
+ ctx.stroke()
+ }
+ } else {
+ // Fail — red tint
+ ctx.fillStyle = `rgba(244, 63, 94, ${0.15 * revealProgress * pulse})`
+ ctx.beginPath()
+ ctx.roundRect(x, y, cw, ch, r)
+ ctx.fill()
+
+ ctx.strokeStyle = `rgba(244, 63, 94, ${0.3 * revealProgress})`
+ ctx.lineWidth = 0.5
+ ctx.beginPath()
+ ctx.roundRect(x, y, cw, ch, r)
+ ctx.stroke()
+
+ // X mark
+ if (revealProgress > 0.5) {
+ const alpha = (revealProgress - 0.5) * 2
+ const cx = x + cw / 2, cy = y + ch / 2
+ const s = Math.min(cw, ch) * 0.18
+ ctx.strokeStyle = `rgba(244, 63, 94, ${0.5 * alpha})`
+ ctx.lineWidth = 1.2
+ ctx.beginPath()
+ ctx.moveTo(cx - s, cy - s)
+ ctx.lineTo(cx + s, cy + s)
+ ctx.moveTo(cx + s, cy - s)
+ ctx.lineTo(cx - s, cy + s)
+ ctx.stroke()
+ }
+ }
+ })
+
+ // Progress bar at bottom
+ const barY = h - padY * 0.6
+ const barH = 2
+ const progress = Math.min(loopT / 5, 1)
+ ctx.fillStyle = 'rgba(244, 63, 94, 0.08)'
+ ctx.fillRect(padX, barY, w - padX * 2, barH)
+ ctx.fillStyle = 'rgba(244, 63, 94, 0.3)'
+ ctx.fillRect(padX, barY, (w - padX * 2) * progress, barH)
+
+ animId = requestAnimationFrame(draw)
+ }
+ draw()
+
+ window.addEventListener('resize', resize)
+ return () => { cancelAnimationFrame(animId); window.removeEventListener('resize', resize) }
+ }, [])
+
+ return
+}