fix: make read tool more mem efficient (#14009)

2026-02-17 15:36:45 -06:00
parent 2a2437bf22
commit c1b03b728a
2 changed files with 142 additions and 34 deletions
--- a/packages/opencode/src/tool/read.ts
+++ b/packages/opencode/src/tool/read.ts
@@ -1,6 +1,7 @@
 import z from "zod"
 import * as fs from "fs"
 import * as path from "path"
+import { createInterface } from "readline"
 import { Tool } from "./tool"
 import { LSP } from "../lsp"
 import { FileTime } from "../file/time"
@@ -11,7 +12,9 @@ import { InstructionPrompt } from "../session/instruction"

 const DEFAULT_READ_LIMIT = 2000
 const MAX_LINE_LENGTH = 2000
+const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
 const MAX_BYTES = 50 * 1024
+const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`

 export const ReadTool = Tool.define("read", {
  description: DESCRIPTION,
@@ -134,27 +137,53 @@ export const ReadTool = Tool.define("read", {
      }
    }

-    const isBinary = await isBinaryFile(filepath, file)
+    const isBinary = await isBinaryFile(filepath, stat.size)
    if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)

+    const stream = fs.createReadStream(filepath, { encoding: "utf8" })
+    const rl = createInterface({
+      input: stream,
+      // Note: we use the crlfDelay option to recognize all instances of CR LF
+      // ('\r\n') in file as a single line break.
+      crlfDelay: Infinity,
+    })
+
    const limit = params.limit ?? DEFAULT_READ_LIMIT
    const offset = params.offset ?? 1
    const start = offset - 1
-    const lines = await file.text().then((text) => text.split("\n"))
-    if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`)
-
    const raw: string[] = []
    let bytes = 0
+    let lines = 0
    let truncatedByBytes = false
-    for (let i = start; i < Math.min(lines.length, start + limit); i++) {
-      const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i]
-      const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
-      if (bytes + size > MAX_BYTES) {
-        truncatedByBytes = true
-        break
+    let hasMoreLines = false
+    try {
+      for await (const text of rl) {
+        lines += 1
+        if (lines <= start) continue
+
+        if (raw.length >= limit) {
+          hasMoreLines = true
+          continue
+        }
+
+        const line = text.length > MAX_LINE_LENGTH ? text.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : text
+        const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
+        if (bytes + size > MAX_BYTES) {
+          truncatedByBytes = true
+          hasMoreLines = true
+          break
+        }
+
+        raw.push(line)
+        bytes += size
      }
-      raw.push(line)
-      bytes += size
+    } finally {
+      rl.close()
+      stream.destroy()
+    }
+
+    if (lines < offset && !(lines === 0 && offset === 1)) {
+      throw new Error(`Offset ${offset} is out of range for this file (${lines} lines)`)
    }

    const content = raw.map((line, index) => {
@@ -165,15 +194,15 @@ export const ReadTool = Tool.define("read", {
    let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>"].join("\n")
    output += content.join("\n")

-    const totalLines = lines.length
+    const totalLines = lines
    const lastReadLine = offset + raw.length - 1
-    const hasMoreLines = totalLines > lastReadLine
+    const nextOffset = lastReadLine + 1
    const truncated = hasMoreLines || truncatedByBytes

    if (truncatedByBytes) {
-      output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})`
+      output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${offset}-${lastReadLine}. Use offset=${nextOffset} to continue.)`
    } else if (hasMoreLines) {
-      output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
+      output += `\n\n(Showing lines ${offset}-${lastReadLine} of ${totalLines}. Use offset=${nextOffset} to continue.)`
    } else {
      output += `\n\n(End of file - total ${totalLines} lines)`
    }
@@ -199,7 +228,7 @@ export const ReadTool = Tool.define("read", {
  },
 })

-async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
+async function isBinaryFile(filepath: string, fileSize: number): Promise<boolean> {
  const ext = path.extname(filepath).toLowerCase()
  // binary check for common non-text extensions
  switch (ext) {
@@ -236,22 +265,25 @@ async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolea
      break
  }

-  const stat = await file.stat()
-  const fileSize = stat.size
  if (fileSize === 0) return false

-  const bufferSize = Math.min(4096, fileSize)
-  const buffer = await file.arrayBuffer()
-  if (buffer.byteLength === 0) return false
-  const bytes = new Uint8Array(buffer.slice(0, bufferSize))
+  const fh = await fs.promises.open(filepath, "r")
+  try {
+    const sampleSize = Math.min(4096, fileSize)
+    const bytes = Buffer.alloc(sampleSize)
+    const result = await fh.read(bytes, 0, sampleSize, 0)
+    if (result.bytesRead === 0) return false

-  let nonPrintableCount = 0
-  for (let i = 0; i < bytes.length; i++) {
-    if (bytes[i] === 0) return true
-    if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
-      nonPrintableCount++
+    let nonPrintableCount = 0
+    for (let i = 0; i < result.bytesRead; i++) {
+      if (bytes[i] === 0) return true
+      if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
+        nonPrintableCount++
+      }
    }
+    // If >30% non-printable characters, consider it binary
+    return nonPrintableCount / result.bytesRead > 0.3
+  } finally {
+    await fh.close()
  }
-  // If >30% non-printable characters, consider it binary
-  return nonPrintableCount / bytes.length > 0.3
 }
--- a/packages/opencode/test/tool/read.test.ts
+++ b/packages/opencode/test/tool/read.test.ts
@@ -211,8 +211,8 @@ describe("tool.read truncation", () => {
        const read = await ReadTool.init()
        const result = await read.execute({ filePath: path.join(tmp.path, "large.json") }, ctx)
        expect(result.metadata.truncated).toBe(true)
-        expect(result.output).toContain("Output truncated at")
-        expect(result.output).toContain("bytes")
+        expect(result.output).toContain("Output capped at")
+        expect(result.output).toContain("Use offset=")
      },
    })
  })
@@ -230,7 +230,8 @@ describe("tool.read truncation", () => {
        const read = await ReadTool.init()
        const result = await read.execute({ filePath: path.join(tmp.path, "many-lines.txt"), limit: 10 }, ctx)
        expect(result.metadata.truncated).toBe(true)
-        expect(result.output).toContain("File has more lines")
+        expect(result.output).toContain("Showing lines 1-10 of 100")
+        expect(result.output).toContain("Use offset=11")
        expect(result.output).toContain("line0")
        expect(result.output).toContain("line9")
        expect(result.output).not.toContain("line10")
@@ -267,6 +268,10 @@ describe("tool.read truncation", () => {
      fn: async () => {
        const read = await ReadTool.init()
        const result = await read.execute({ filePath: path.join(tmp.path, "offset.txt"), offset: 10, limit: 5 }, ctx)
+        expect(result.output).toContain("10: line10")
+        expect(result.output).toContain("14: line14")
+        expect(result.output).not.toContain("9: line10")
+        expect(result.output).not.toContain("15: line15")
        expect(result.output).toContain("line10")
        expect(result.output).toContain("line14")
        expect(result.output).not.toContain("line0")
@@ -293,6 +298,40 @@ describe("tool.read truncation", () => {
    })
  })

+  test("allows reading empty file at default offset", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(path.join(dir, "empty.txt"), "")
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const read = await ReadTool.init()
+        const result = await read.execute({ filePath: path.join(tmp.path, "empty.txt") }, ctx)
+        expect(result.metadata.truncated).toBe(false)
+        expect(result.output).toContain("End of file - total 0 lines")
+      },
+    })
+  })
+
+  test("throws when offset > 1 for empty file", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(path.join(dir, "empty.txt"), "")
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const read = await ReadTool.init()
+        await expect(read.execute({ filePath: path.join(tmp.path, "empty.txt"), offset: 2 }, ctx)).rejects.toThrow(
+          "Offset 2 is out of range for this file (0 lines)",
+        )
+      },
+    })
+  })
+
  test("does not mark final directory page as truncated", async () => {
    await using tmp = await tmpdir({
      init: async (dir) => {
@@ -324,7 +363,7 @@ describe("tool.read truncation", () => {
      fn: async () => {
        const read = await ReadTool.init()
        const result = await read.execute({ filePath: path.join(tmp.path, "long-line.txt") }, ctx)
-        expect(result.output).toContain("...")
+        expect(result.output).toContain("(line truncated to 2000 chars)")
        expect(result.output.length).toBeLessThan(3000)
      },
    })
@@ -425,3 +464,40 @@ describe("tool.read loaded instructions", () => {
    })
  })
 })
+
+describe("tool.read binary detection", () => {
+  test("rejects text extension files with null bytes", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        const bytes = Buffer.from([0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x77, 0x6f, 0x72, 0x6c, 0x64])
+        await Bun.write(path.join(dir, "null-byte.txt"), bytes)
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const read = await ReadTool.init()
+        await expect(read.execute({ filePath: path.join(tmp.path, "null-byte.txt") }, ctx)).rejects.toThrow(
+          "Cannot read binary file",
+        )
+      },
+    })
+  })
+
+  test("rejects known binary extensions", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(path.join(dir, "module.wasm"), "not really wasm")
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const read = await ReadTool.init()
+        await expect(read.execute({ filePath: path.join(tmp.path, "module.wasm") }, ctx)).rejects.toThrow(
+          "Cannot read binary file",
+        )
+      },
+    })
+  })
+})