diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index fbca89ad7..44bf43d98 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -1,6 +1,7 @@ import z from "zod" import * as fs from "fs" import * as path from "path" +import { createInterface } from "readline" import { Tool } from "./tool" import { LSP } from "../lsp" import { FileTime } from "../file/time" @@ -11,7 +12,9 @@ import { InstructionPrompt } from "../session/instruction" const DEFAULT_READ_LIMIT = 2000 const MAX_LINE_LENGTH = 2000 +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)` const MAX_BYTES = 50 * 1024 +const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB` export const ReadTool = Tool.define("read", { description: DESCRIPTION, @@ -134,27 +137,53 @@ export const ReadTool = Tool.define("read", { } } - const isBinary = await isBinaryFile(filepath, file) + const isBinary = await isBinaryFile(filepath, stat.size) if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`) + const stream = fs.createReadStream(filepath, { encoding: "utf8" }) + const rl = createInterface({ + input: stream, + // Note: we use the crlfDelay option to recognize all instances of CR LF + // ('\r\n') in file as a single line break. + crlfDelay: Infinity, + }) + const limit = params.limit ?? DEFAULT_READ_LIMIT const offset = params.offset ?? 1 const start = offset - 1 - const lines = await file.text().then((text) => text.split("\n")) - if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`) - const raw: string[] = [] let bytes = 0 + let lines = 0 let truncatedByBytes = false - for (let i = start; i < Math.min(lines.length, start + limit); i++) { - const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i] - const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0) - if (bytes + size > MAX_BYTES) { - truncatedByBytes = true - break + let hasMoreLines = false + try { + for await (const text of rl) { + lines += 1 + if (lines <= start) continue + + if (raw.length >= limit) { + hasMoreLines = true + continue + } + + const line = text.length > MAX_LINE_LENGTH ? text.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : text + const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0) + if (bytes + size > MAX_BYTES) { + truncatedByBytes = true + hasMoreLines = true + break + } + + raw.push(line) + bytes += size } - raw.push(line) - bytes += size + } finally { + rl.close() + stream.destroy() + } + + if (lines < offset && !(lines === 0 && offset === 1)) { + throw new Error(`Offset ${offset} is out of range for this file (${lines} lines)`) } const content = raw.map((line, index) => { @@ -165,15 +194,15 @@ export const ReadTool = Tool.define("read", { let output = [`${filepath}`, `file`, ""].join("\n") output += content.join("\n") - const totalLines = lines.length + const totalLines = lines const lastReadLine = offset + raw.length - 1 - const hasMoreLines = totalLines > lastReadLine + const nextOffset = lastReadLine + 1 const truncated = hasMoreLines || truncatedByBytes if (truncatedByBytes) { - output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})` + output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${offset}-${lastReadLine}. Use offset=${nextOffset} to continue.)` } else if (hasMoreLines) { - output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})` + output += `\n\n(Showing lines ${offset}-${lastReadLine} of ${totalLines}. Use offset=${nextOffset} to continue.)` } else { output += `\n\n(End of file - total ${totalLines} lines)` } @@ -199,7 +228,7 @@ export const ReadTool = Tool.define("read", { }, }) -async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise { +async function isBinaryFile(filepath: string, fileSize: number): Promise { const ext = path.extname(filepath).toLowerCase() // binary check for common non-text extensions switch (ext) { @@ -236,22 +265,25 @@ async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise 13 && bytes[i] < 32)) { - nonPrintableCount++ + let nonPrintableCount = 0 + for (let i = 0; i < result.bytesRead; i++) { + if (bytes[i] === 0) return true + if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) { + nonPrintableCount++ + } } + // If >30% non-printable characters, consider it binary + return nonPrintableCount / result.bytesRead > 0.3 + } finally { + await fh.close() } - // If >30% non-printable characters, consider it binary - return nonPrintableCount / bytes.length > 0.3 } diff --git a/packages/opencode/test/tool/read.test.ts b/packages/opencode/test/tool/read.test.ts index ba3e22edc..cc9d1a33e 100644 --- a/packages/opencode/test/tool/read.test.ts +++ b/packages/opencode/test/tool/read.test.ts @@ -211,8 +211,8 @@ describe("tool.read truncation", () => { const read = await ReadTool.init() const result = await read.execute({ filePath: path.join(tmp.path, "large.json") }, ctx) expect(result.metadata.truncated).toBe(true) - expect(result.output).toContain("Output truncated at") - expect(result.output).toContain("bytes") + expect(result.output).toContain("Output capped at") + expect(result.output).toContain("Use offset=") }, }) }) @@ -230,7 +230,8 @@ describe("tool.read truncation", () => { const read = await ReadTool.init() const result = await read.execute({ filePath: path.join(tmp.path, "many-lines.txt"), limit: 10 }, ctx) expect(result.metadata.truncated).toBe(true) - expect(result.output).toContain("File has more lines") + expect(result.output).toContain("Showing lines 1-10 of 100") + expect(result.output).toContain("Use offset=11") expect(result.output).toContain("line0") expect(result.output).toContain("line9") expect(result.output).not.toContain("line10") @@ -267,6 +268,10 @@ describe("tool.read truncation", () => { fn: async () => { const read = await ReadTool.init() const result = await read.execute({ filePath: path.join(tmp.path, "offset.txt"), offset: 10, limit: 5 }, ctx) + expect(result.output).toContain("10: line10") + expect(result.output).toContain("14: line14") + expect(result.output).not.toContain("9: line10") + expect(result.output).not.toContain("15: line15") expect(result.output).toContain("line10") expect(result.output).toContain("line14") expect(result.output).not.toContain("line0") @@ -293,6 +298,40 @@ describe("tool.read truncation", () => { }) }) + test("allows reading empty file at default offset", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "empty.txt"), "") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + const result = await read.execute({ filePath: path.join(tmp.path, "empty.txt") }, ctx) + expect(result.metadata.truncated).toBe(false) + expect(result.output).toContain("End of file - total 0 lines") + }, + }) + }) + + test("throws when offset > 1 for empty file", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "empty.txt"), "") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + await expect(read.execute({ filePath: path.join(tmp.path, "empty.txt"), offset: 2 }, ctx)).rejects.toThrow( + "Offset 2 is out of range for this file (0 lines)", + ) + }, + }) + }) + test("does not mark final directory page as truncated", async () => { await using tmp = await tmpdir({ init: async (dir) => { @@ -324,7 +363,7 @@ describe("tool.read truncation", () => { fn: async () => { const read = await ReadTool.init() const result = await read.execute({ filePath: path.join(tmp.path, "long-line.txt") }, ctx) - expect(result.output).toContain("...") + expect(result.output).toContain("(line truncated to 2000 chars)") expect(result.output.length).toBeLessThan(3000) }, }) @@ -425,3 +464,40 @@ describe("tool.read loaded instructions", () => { }) }) }) + +describe("tool.read binary detection", () => { + test("rejects text extension files with null bytes", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + const bytes = Buffer.from([0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x77, 0x6f, 0x72, 0x6c, 0x64]) + await Bun.write(path.join(dir, "null-byte.txt"), bytes) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + await expect(read.execute({ filePath: path.join(tmp.path, "null-byte.txt") }, ctx)).rejects.toThrow( + "Cannot read binary file", + ) + }, + }) + }) + + test("rejects known binary extensions", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write(path.join(dir, "module.wasm"), "not really wasm") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + await expect(read.execute({ filePath: path.join(tmp.path, "module.wasm") }, ctx)).rejects.toThrow( + "Cannot read binary file", + ) + }, + }) + }) +})