feat: another track
This commit is contained in:
@@ -14,10 +14,11 @@ import {
|
||||
ShieldAlert,
|
||||
MessageSquare,
|
||||
Crown,
|
||||
FlaskConical,
|
||||
} from 'lucide-react'
|
||||
import { Footer } from '@/components/footer'
|
||||
import { LiveTerminal } from '@/components/hackathons/live-terminal'
|
||||
import { StreamViz, SecureViz, RadarViz, ScanViz, ExtendViz } from '@/components/hackathons/track-visuals'
|
||||
import { StreamViz, SecureViz, RadarViz, ScanViz, ExtendViz, BenchViz } from '@/components/hackathons/track-visuals'
|
||||
|
||||
const ShieldScene = dynamic(
|
||||
() => import('@/components/hackathons/shield-scene').then((m) => m.ShieldScene),
|
||||
@@ -546,6 +547,18 @@ const tracks = [
|
||||
examples: ['Error interception', 'Context injection', 'Alternative suggestion', 'Loop prevention'],
|
||||
Visual: ExtendViz,
|
||||
},
|
||||
{
|
||||
id: 'control-benchmark',
|
||||
icon: FlaskConical,
|
||||
title: 'Control Evaluation Benchmark',
|
||||
hook: 'Build the test suite that measures how well sandboxes actually work.',
|
||||
color: 'from-rose-500/10 to-pink-500/5',
|
||||
borderColor: 'hover:border-rose-500/30',
|
||||
description: 'Define main tasks (real software engineering work) paired with side tasks (exfiltrate a key, install a backdoor, modify git hooks). Measure how much useful work an agent completes under Greywall\'s restrictions vs. how often adversarial goals succeed.',
|
||||
scoring: 'Show the safety/usefulness tradeoff. A good benchmark reveals real weaknesses, not just toy scenarios.',
|
||||
examples: ['Main/side task pairs', 'Policy quality scoring', 'Layer isolation tests', 'Reproducible evals'],
|
||||
Visual: BenchViz,
|
||||
},
|
||||
]
|
||||
|
||||
function TrackCard({ track, index }: { track: typeof tracks[0]; index: number }) {
|
||||
@@ -612,7 +625,7 @@ function Tracks() {
|
||||
Pick your track.
|
||||
</h2>
|
||||
<p className="font-serif text-lg text-muted-foreground max-w-2xl mx-auto">
|
||||
Five open-ended tracks, all building on top of{' '}
|
||||
Six open-ended tracks, all building on top of{' '}
|
||||
<a href="https://github.com/GreyhavenHQ/greywall" target="_blank" rel="noopener noreferrer" className="text-primary hover:text-primary/80 transition-colors underline underline-offset-2">Greywall</a>.
|
||||
Go deep on one or try a few.
|
||||
</p>
|
||||
|
||||
Reference in New Issue
Block a user