Skip to content

Commit 5084ac6

Browse files
authored
Merge branch 'main' into main
2 parents c36ef29 + 50d6b43 commit 5084ac6

File tree

266 files changed

+5590
-2980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

266 files changed

+5590
-2980
lines changed

.changeset/big-sloths-dig.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

.github/workflows/evals.yml

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
name: Evals
2+
3+
on:
4+
pull_request:
5+
types: [labeled]
6+
workflow_dispatch:
7+
8+
env:
9+
DOCKER_BUILDKIT: 1
10+
COMPOSE_DOCKER_CLI_BUILD: 1
11+
12+
jobs:
13+
evals:
14+
# Run if triggered manually or if PR has 'evals' label.
15+
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'evals')
16+
runs-on: blacksmith-16vcpu-ubuntu-2404
17+
timeout-minutes: 45
18+
19+
defaults:
20+
run:
21+
working-directory: packages/evals
22+
23+
steps:
24+
- name: Checkout repository
25+
uses: actions/checkout@v4
26+
27+
- name: Set up Docker Buildx
28+
uses: docker/setup-buildx-action@v3
29+
30+
- name: Create environment
31+
run: |
32+
cat > .env.local << EOF
33+
OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY || 'test-key-for-build' }}
34+
EOF
35+
36+
cat > .env.development << EOF
37+
NODE_ENV=development
38+
DATABASE_URL=postgresql://postgres:password@db:5432/evals_development
39+
REDIS_URL=redis://redis:6379
40+
HOST_EXECUTION_METHOD=docker
41+
EOF
42+
43+
- name: Build image
44+
uses: docker/build-push-action@v5
45+
with:
46+
context: .
47+
file: packages/evals/Dockerfile.runner
48+
tags: evals-runner:latest
49+
cache-from: type=gha
50+
cache-to: type=gha,mode=max
51+
push: false
52+
load: true
53+
54+
- name: Tag image
55+
run: docker tag evals-runner:latest evals-runner
56+
57+
- name: Start containers
58+
run: |
59+
docker compose up -d db redis
60+
timeout 60 bash -c 'until docker compose exec -T db pg_isready -U postgres; do sleep 2; done'
61+
timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done'
62+
docker compose run --rm runner sh -c 'nc -z db 5432 && echo "✓ Runner -> Database connection successful"'
63+
docker compose run --rm runner sh -c 'nc -z redis 6379 && echo "✓ Runner -> Redis connection successful"'
64+
docker compose run --rm runner docker ps
65+
66+
- name: Run database migrations
67+
run: docker compose run --rm runner pnpm --filter @roo-code/evals db:migrate
68+
69+
- name: Run evals
70+
run: docker compose run --rm runner pnpm --filter @roo-code/evals cli --ci
71+
72+
- name: Cleanup
73+
if: always()
74+
run: docker compose down -v --remove-orphans

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ logs
4343
.idea/
4444
.qodo/
4545
.vercel
46+
.roo/mcp.json

CHANGELOG.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,47 @@
11
# kilo-code
22

3+
## [v4.36.0]
4+
5+
- [#690](https://github.com/Kilo-Org/kilocode/pull/690) [`9b1451a`](https://github.com/Kilo-Org/kilocode/commit/9b1451a47bd2bc567646a4a0c2a12b42826ab9d1) Thanks [@kevinvandijk](https://github.com/kevinvandijk)! - Include changes from Roo Code v3.19.7:
6+
7+
- Fix McpHub sidebar focus behavior to prevent unwanted focus grabbing
8+
- Disable checkpoint functionality when nested git repositories are detected to prevent conflicts
9+
- Remove unused Storybook components and dependencies to reduce bundle size
10+
- Add data-testid ESLint rule for improved testing standards (thanks @elianiva!)
11+
- Update development dependencies including eslint, knip, @types/node, i18next, fast-xml-parser, and @google/genai
12+
- Improve CI infrastructure with GitHub Actions and Blacksmith runner migrations
13+
- Replace explicit caching with implicit caching to reduce latency for Gemini models
14+
- Clarify that the default concurrent file read limit is 15 files (thanks @olearycrew!)
15+
- Fix copy button logic (thanks @samhvw8!)
16+
- Fade buttons on history preview if no interaction in progress (thanks @sachasayan!)
17+
- Allow MCP server refreshing, fix state changes in MCP server management UI view (thanks @taylorwilsdon!)
18+
- Remove unnecessary npx usage in some npm scripts (thanks @user202729!)
19+
- Bug fix for trailing slash error when using LiteLLM provider (thanks @kcwhite!)
20+
- Fix Gemini 2.5 Pro Preview thinking budget bug
21+
- Add Gemini Pro 06-05 model support (thanks @daniel-lxs and @shariqriazz!)
22+
- Fix reading PDF, DOCX, and IPYNB files in read_file tool (thanks @samhvw8!)
23+
- Fix Mermaid CSP errors with enhanced bundling strategy (thanks @KJ7LNW!)
24+
- Improve model info detection for custom Bedrock ARNs (thanks @adamhill!)
25+
- Add OpenAI Compatible embedder for codebase indexing (thanks @SannidhyaSah!)
26+
- Fix multiple memory leaks in ChatView component (thanks @kiwina!)
27+
- Fix WorkspaceTracker resource leaks by disposing FileSystemWatcher (thanks @kiwina!)
28+
- Fix RooTips setTimeout cleanup to prevent state updates on unmounted components (thanks @kiwina!)
29+
- Fix FileSystemWatcher leak in RooIgnoreController (thanks @kiwina!)
30+
- Fix clipboard memory leak by clearing setTimeout in useCopyToClipboard (thanks @kiwina!)
31+
- Fix ClineProvider instance cleanup (thanks @xyOz-dev!)
32+
- Enforce codebase_search as primary tool for code understanding tasks (thanks @hannesrudolph!)
33+
- Improve Docker setup for evals
34+
- Move evals into pnpm workspace, switch from SQLite to Postgres
35+
- Refactor MCP to use getDefaultEnvironment for stdio client transport (thanks @samhvw8!)
36+
- Get rid of "partial" component in names referencing not necessarily partial messages (thanks @wkordalski!)
37+
- Improve feature request template (thanks @elianiva!)
38+
39+
- [#592](https://github.com/Kilo-Org/kilocode/pull/592) [`68c3d6e`](https://github.com/Kilo-Org/kilocode/commit/68c3d6e7a1250e08e2bd2b9cbbbd6b4312bad045) Thanks [@chrarnoldus](https://github.com/chrarnoldus)! - Workflow and rules configuration screen added
40+
41+
### Patch Changes
42+
43+
- [#697](https://github.com/Kilo-Org/kilocode/pull/697) [`9514f22`](https://github.com/Kilo-Org/kilocode/commit/9514f22a9d77b2d838ddcb97b5f2c5909aaea68a) Thanks [@kevinvandijk](https://github.com/kevinvandijk)! - Add correct path to walkthrough files to show walkthrough on first load (thanks for the report @adamhill!)
44+
345
## [v4.35.1]
446

547
- [#695](https://github.com/Kilo-Org/kilocode/pull/695) [`a7910eb`](https://github.com/Kilo-Org/kilocode/commit/a7910eba54a4ede296bfa82beddae71a1d9f77c5) Thanks [@kevinvandijk](https://github.com/kevinvandijk)! - Fix: Feedback button overlaps new mode creation dialog

apps/web-evals/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"scripts": {
66
"lint": "next lint",
77
"check-types": "tsc -b",
8-
"dev": "scripts/check-services.sh && next dev --turbopack",
8+
"dev": "scripts/check-services.sh && next dev",
99
"format": "prettier --write src",
1010
"build": "next build",
1111
"start": "next start"
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"use server"
2+
3+
import * as path from "path"
4+
import { fileURLToPath } from "url"
5+
6+
import { exerciseLanguages, listDirectories } from "@roo-code/evals"
7+
8+
const __dirname = path.dirname(fileURLToPath(import.meta.url)) // <repo>/apps/web-evals/src/actions
9+
10+
const EVALS_REPO_PATH = path.resolve(__dirname, "../../../../../evals")
11+
12+
export const getExercises = async () => {
13+
const result = await Promise.all(
14+
exerciseLanguages.map(async (language) => {
15+
const languagePath = path.join(EVALS_REPO_PATH, language)
16+
const exercises = await listDirectories(__dirname, languagePath)
17+
return exercises.map((exercise) => `${language}/${exercise}`)
18+
}),
19+
)
20+
21+
return result.flat()
22+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"use server"
2+
3+
import { redisClient } from "@/lib/server/redis"
4+
5+
export const getHeartbeat = async (runId: number) => {
6+
const redis = await redisClient()
7+
return redis.get(`heartbeat:${runId}`)
8+
}

apps/web-evals/src/lib/server/runners.ts renamed to apps/web-evals/src/actions/runners.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"use server"
22

3-
import { redisClient } from "./redis"
3+
import { redisClient } from "@/lib/server/redis"
44

55
export const getRunners = async (runId: number) => {
66
const redis = await redisClient()

apps/web-evals/src/lib/server/runs.ts renamed to apps/web-evals/src/actions/runs.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"use server"
22

3-
import { spawn } from "child_process"
3+
import * as path from "path"
44
import fs from "fs"
5+
import { fileURLToPath } from "url"
6+
import { spawn } from "child_process"
57

68
import { revalidatePath } from "next/cache"
79
import pMap from "p-map"
@@ -12,11 +14,12 @@ import {
1214
createRun as _createRun,
1315
deleteRun as _deleteRun,
1416
createTask,
17+
getExercisesForLanguage,
1518
} from "@roo-code/evals"
1619

1720
import { CreateRun } from "@/lib/schemas"
1821

19-
import { getExercisesForLanguage } from "./exercises"
22+
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2023

2124
// eslint-disable-next-line @typescript-eslint/no-unused-vars
2225
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
@@ -37,9 +40,9 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
3740
}
3841
} else {
3942
for (const language of exerciseLanguages) {
40-
const exercises = await getExercisesForLanguage(language)
43+
const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
4144

42-
await pMap(exercises, (exercise) => createTask({ ...values, runId: run.id, language, exercise }), {
45+
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
4346
concurrency: 10,
4447
})
4548
}
@@ -52,9 +55,10 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
5255

5356
const dockerArgs = [
5457
`--name evals-controller-${run.id}`,
55-
"--rm",
58+
// "--rm",
5659
"--network evals_default",
5760
"-v /var/run/docker.sock:/var/run/docker.sock",
61+
"-v /tmp/evals:/var/log/evals",
5862
"-e HOST_EXECUTION_METHOD=docker",
5963
]
6064

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { NextResponse } from "next/server"
2+
3+
export async function GET() {
4+
try {
5+
return NextResponse.json(
6+
{
7+
status: "healthy",
8+
timestamp: new Date().toISOString(),
9+
uptime: process.uptime(),
10+
environment: process.env.NODE_ENV || "production",
11+
},
12+
{ status: 200 },
13+
)
14+
} catch (error) {
15+
return NextResponse.json(
16+
{
17+
status: "unhealthy",
18+
timestamp: new Date().toISOString(),
19+
error: error instanceof Error ? error.message : "Unknown error",
20+
},
21+
{ status: 503 },
22+
)
23+
}
24+
}

apps/web-evals/src/app/home.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { Ellipsis, Rocket } from "lucide-react"
77

88
import type { Run, TaskMetrics } from "@roo-code/evals"
99

10-
import { deleteRun } from "@/lib/server/runs"
10+
import { deleteRun } from "@/actions/runs"
1111
import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
1212
import {
1313
Button,

apps/web-evals/src/app/runs/[id]/connection-status.tsx

Lines changed: 0 additions & 50 deletions
This file was deleted.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"use client"
2+
3+
import type { RunStatus as _RunStatus } from "@/hooks/use-run-status"
4+
import { cn } from "@/lib/utils"
5+
6+
export const RunStatus = ({ runStatus: { sseStatus, heartbeat, runners = [] } }: { runStatus: _RunStatus }) => (
7+
<div>
8+
<div className="flex items-center gap-2">
9+
<div className="flex items-center gap-2">
10+
<div>Task Stream:</div>
11+
<div className="font-mono text-sm text-muted-foreground">{sseStatus}</div>
12+
</div>
13+
<div className="relative">
14+
<div
15+
className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
16+
"bg-green-500": sseStatus === "connected",
17+
"bg-amber-500": sseStatus === "waiting",
18+
"bg-rose-500": sseStatus === "error",
19+
})}
20+
/>
21+
<div
22+
className={cn("size-2.5 rounded-full", {
23+
"bg-green-500": sseStatus === "connected",
24+
"bg-amber-500": sseStatus === "waiting",
25+
"bg-rose-500": sseStatus === "error",
26+
})}
27+
/>
28+
</div>
29+
</div>
30+
<div className="flex items-center gap-2">
31+
<div className="flex items-center gap-2">
32+
<div>Task Controller:</div>
33+
<div className="font-mono text-sm text-muted-foreground">{heartbeat ?? "dead"}</div>
34+
</div>
35+
<div className="relative">
36+
<div
37+
className={cn("absolute size-2.5 rounded-full opacity-50 animate-ping", {
38+
"bg-green-500": !!heartbeat,
39+
"bg-rose-500": !heartbeat,
40+
})}
41+
/>
42+
<div
43+
className={cn("size-2.5 rounded-full", {
44+
"bg-green-500": !!heartbeat,
45+
"bg-rose-500": !heartbeat,
46+
})}
47+
/>
48+
</div>
49+
</div>
50+
<div className="flex items-center gap-2">
51+
<div>Task Runners:</div>
52+
{runners.length > 0 && <div className="font-mono text-sm text-muted-foreground">{runners?.join(", ")}</div>}
53+
</div>
54+
</div>
55+
)

0 commit comments

Comments
 (0)