Skip to content

Commit 765528e

Browse files
committed
fix: use chunked streaming for search to avoid memory blowup
Replace fs.readFile with chunked streaming in searchInArtifact() to keep memory usage bounded for large command outputs. Instead of loading the entire file into memory, reads in 64KB chunks and processes lines as they are encountered. This addresses the concern that loading 100MB+ build logs into memory defeats the purpose of the persisted output feature.
1 parent b0ec581 commit 765528e

File tree

2 files changed

+85
-33
lines changed

2 files changed

+85
-33
lines changed

src/core/tools/ReadCommandOutputTool.ts

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,14 @@ export class ReadCommandOutputTool extends BaseTool<"read_command_output"> {
268268
}
269269

270270
/**
271-
* Search artifact content for lines matching a pattern.
271+
* Search artifact content for lines matching a pattern using chunked streaming.
272272
*
273-
* Performs grep-like searching through the artifact file. The pattern
274-
* is treated as a case-insensitive regex. If the pattern is invalid
273+
* Performs grep-like searching through the artifact file using bounded memory.
274+
* Instead of loading the entire file into memory, this reads in fixed-size chunks
275+
* and processes lines as they are encountered. This keeps memory usage predictable
276+
* even for very large command outputs (e.g., 100MB+ build logs).
277+
*
278+
* The pattern is treated as a case-insensitive regex. If the pattern is invalid
275279
* regex syntax, it's escaped and treated as a literal string.
276280
*
277281
* Results are limited by the byte limit to prevent excessive output.
@@ -289,9 +293,7 @@ export class ReadCommandOutputTool extends BaseTool<"read_command_output"> {
289293
totalSize: number,
290294
limit: number,
291295
): Promise<string> {
292-
// Read the entire file for search (we need all content to search)
293-
const content = await fs.readFile(artifactPath, "utf8")
294-
const lines = content.split("\n")
296+
const CHUNK_SIZE = 64 * 1024 // 64KB chunks for bounded memory
295297

296298
// Create case-insensitive regex for search
297299
let regex: RegExp
@@ -302,23 +304,65 @@ export class ReadCommandOutputTool extends BaseTool<"read_command_output"> {
302304
regex = new RegExp(this.escapeRegExp(pattern), "i")
303305
}
304306

305-
// Find matching lines with their line numbers
307+
const fileHandle = await fs.open(artifactPath, "r")
306308
const matches: Array<{ lineNumber: number; content: string }> = []
307309
let totalMatchBytes = 0
310+
let lineNumber = 0
311+
let partialLine = "" // Holds incomplete line from previous chunk
312+
let bytesRead = 0
313+
let hitLimit = false
308314

309-
for (let i = 0; i < lines.length; i++) {
310-
if (regex.test(lines[i])) {
311-
const lineContent = lines[i]
312-
const lineBytes = Buffer.byteLength(lineContent, "utf8")
315+
try {
316+
while (bytesRead < totalSize && !hitLimit) {
317+
const chunkSize = Math.min(CHUNK_SIZE, totalSize - bytesRead)
318+
const buffer = Buffer.alloc(chunkSize)
319+
const result = await fileHandle.read(buffer, 0, chunkSize, bytesRead)
313320

314-
// Stop if we've exceeded the byte limit
315-
if (totalMatchBytes + lineBytes > limit) {
321+
if (result.bytesRead === 0) {
316322
break
317323
}
318324

319-
matches.push({ lineNumber: i + 1, content: lineContent })
320-
totalMatchBytes += lineBytes
325+
const chunk = buffer.slice(0, result.bytesRead).toString("utf8")
326+
bytesRead += result.bytesRead
327+
328+
// Combine with partial line from previous chunk
329+
const combined = partialLine + chunk
330+
const lines = combined.split("\n")
331+
332+
// Last element may be incomplete (no trailing newline), save for next iteration
333+
partialLine = lines.pop() ?? ""
334+
335+
// Process complete lines
336+
for (const line of lines) {
337+
lineNumber++
338+
339+
if (regex.test(line)) {
340+
const lineBytes = Buffer.byteLength(line, "utf8")
341+
342+
// Stop if we've exceeded the byte limit
343+
if (totalMatchBytes + lineBytes > limit) {
344+
hitLimit = true
345+
break
346+
}
347+
348+
matches.push({ lineNumber, content: line })
349+
totalMatchBytes += lineBytes
350+
}
351+
}
352+
}
353+
354+
// Process any remaining partial line at end of file
355+
if (!hitLimit && partialLine.length > 0) {
356+
lineNumber++
357+
if (regex.test(partialLine)) {
358+
const lineBytes = Buffer.byteLength(partialLine, "utf8")
359+
if (totalMatchBytes + lineBytes <= limit) {
360+
matches.push({ lineNumber, content: partialLine })
361+
}
362+
}
321363
}
364+
} finally {
365+
await fileHandle.close()
322366
}
323367

324368
const artifactId = path.basename(artifactPath)

src/core/tools/__tests__/ReadCommandOutputTool.test.ts

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,31 @@ describe("ReadCommandOutputTool", () => {
276276
})
277277

278278
describe("Search filtering", () => {
279+
// Helper to setup file handle mock for search (which now uses streaming)
280+
const setupSearchMock = (content: string) => {
281+
const buffer = Buffer.from(content)
282+
const fileSize = buffer.length
283+
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
284+
285+
// Mock streaming read - return entire content in one chunk (simulates small file)
286+
mockFileHandle.read.mockImplementation(
287+
(buf: Buffer, bufOffset: number, length: number, position: number | null) => {
288+
const pos = position ?? 0
289+
if (pos >= fileSize) {
290+
return Promise.resolve({ bytesRead: 0 })
291+
}
292+
const bytesToRead = Math.min(length, fileSize - pos)
293+
buffer.copy(buf, 0, pos, pos + bytesToRead)
294+
return Promise.resolve({ bytesRead: bytesToRead })
295+
},
296+
)
297+
}
298+
279299
it("should filter lines matching pattern", async () => {
280300
const artifactId = "cmd-1706119234567.txt"
281301
const content = "Line 1: error occurred\nLine 2: success\nLine 3: error found\nLine 4: complete\n"
282-
const fileSize = Buffer.byteLength(content, "utf8")
283302

284-
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
285-
vi.mocked(fs.readFile).mockResolvedValue(content)
303+
setupSearchMock(content)
286304

287305
await tool.execute({ artifact_id: artifactId, search: "error" }, mockTask, mockCallbacks)
288306

@@ -296,10 +314,8 @@ describe("ReadCommandOutputTool", () => {
296314
it("should use case-insensitive matching", async () => {
297315
const artifactId = "cmd-1706119234567.txt"
298316
const content = "ERROR: Something bad\nwarning: minor issue\nERROR: Another problem\n"
299-
const fileSize = Buffer.byteLength(content, "utf8")
300317

301-
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
302-
vi.mocked(fs.readFile).mockResolvedValue(content)
318+
setupSearchMock(content)
303319

304320
await tool.execute({ artifact_id: artifactId, search: "error" }, mockTask, mockCallbacks)
305321

@@ -311,10 +327,8 @@ describe("ReadCommandOutputTool", () => {
311327
it("should show match count and line numbers", async () => {
312328
const artifactId = "cmd-1706119234567.txt"
313329
const content = "Line 1\nError on line 2\nLine 3\nError on line 4\n"
314-
const fileSize = Buffer.byteLength(content, "utf8")
315330

316-
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
317-
vi.mocked(fs.readFile).mockResolvedValue(content)
331+
setupSearchMock(content)
318332

319333
await tool.execute({ artifact_id: artifactId, search: "Error" }, mockTask, mockCallbacks)
320334

@@ -327,10 +341,8 @@ describe("ReadCommandOutputTool", () => {
327341
it("should handle empty search results gracefully", async () => {
328342
const artifactId = "cmd-1706119234567.txt"
329343
const content = "Line 1\nLine 2\nLine 3\n"
330-
const fileSize = Buffer.byteLength(content, "utf8")
331344

332-
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
333-
vi.mocked(fs.readFile).mockResolvedValue(content)
345+
setupSearchMock(content)
334346

335347
await tool.execute({ artifact_id: artifactId, search: "NOTFOUND" }, mockTask, mockCallbacks)
336348

@@ -341,10 +353,8 @@ describe("ReadCommandOutputTool", () => {
341353
it("should handle regex patterns in search", async () => {
342354
const artifactId = "cmd-1706119234567.txt"
343355
const content = "test123\ntest456\nabc789\ntest000\n"
344-
const fileSize = Buffer.byteLength(content, "utf8")
345356

346-
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
347-
vi.mocked(fs.readFile).mockResolvedValue(content)
357+
setupSearchMock(content)
348358

349359
await tool.execute({ artifact_id: artifactId, search: "test\\d+" }, mockTask, mockCallbacks)
350360

@@ -358,10 +368,8 @@ describe("ReadCommandOutputTool", () => {
358368
it("should handle invalid regex patterns by treating as literal", async () => {
359369
const artifactId = "cmd-1706119234567.txt"
360370
const content = "Line with [brackets]\nLine without\n"
361-
const fileSize = Buffer.byteLength(content, "utf8")
362371

363-
vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
364-
vi.mocked(fs.readFile).mockResolvedValue(content)
372+
setupSearchMock(content)
365373

366374
// Invalid regex but valid as literal string
367375
await tool.execute({ artifact_id: artifactId, search: "[" }, mockTask, mockCallbacks)

0 commit comments

Comments
 (0)