fix: use chunked streaming for search to avoid memory blowup

hannesrudolph · hannesrudolph · commit 765528eae1af · 2026-01-27T12:05:36.000-07:00
Replace fs.readFile with chunked streaming in searchInArtifact() to keep
memory usage bounded for large command outputs. Instead of loading the
entire file into memory, reads in 64KB chunks and processes lines as
they are encountered.

This addresses the concern that loading 100MB+ build logs into memory
defeats the purpose of the persisted output feature.
diff --git a/src/core/tools/ReadCommandOutputTool.ts b/src/core/tools/ReadCommandOutputTool.ts
@@ -268,10 +268,14 @@ export class ReadCommandOutputTool extends BaseTool<"read_command_output"> {
 	}
 
 	/**
-	 * Search artifact content for lines matching a pattern.
+	 * Search artifact content for lines matching a pattern using chunked streaming.
 	 *
-	 * Performs grep-like searching through the artifact file. The pattern
-	 * is treated as a case-insensitive regex. If the pattern is invalid
+	 * Performs grep-like searching through the artifact file using bounded memory.
+	 * Instead of loading the entire file into memory, this reads in fixed-size chunks
+	 * and processes lines as they are encountered. This keeps memory usage predictable
+	 * even for very large command outputs (e.g., 100MB+ build logs).
+	 *
+	 * The pattern is treated as a case-insensitive regex. If the pattern is invalid
 	 * regex syntax, it's escaped and treated as a literal string.
 	 *
 	 * Results are limited by the byte limit to prevent excessive output.
@@ -289,9 +293,7 @@ export class ReadCommandOutputTool extends BaseTool<"read_command_output"> {
 		totalSize: number,
 		limit: number,
 	): Promise<string> {
-		// Read the entire file for search (we need all content to search)
-		const content = await fs.readFile(artifactPath, "utf8")
-		const lines = content.split("\n")
+		const CHUNK_SIZE = 64 * 1024 // 64KB chunks for bounded memory
 
 		// Create case-insensitive regex for search
 		let regex: RegExp
@@ -302,23 +304,65 @@ export class ReadCommandOutputTool extends BaseTool<"read_command_output"> {
 			regex = new RegExp(this.escapeRegExp(pattern), "i")
 		}
 
-		// Find matching lines with their line numbers
+		const fileHandle = await fs.open(artifactPath, "r")
 		const matches: Array<{ lineNumber: number; content: string }> = []
 		let totalMatchBytes = 0
+		let lineNumber = 0
+		let partialLine = "" // Holds incomplete line from previous chunk
+		let bytesRead = 0
+		let hitLimit = false
 
-		for (let i = 0; i < lines.length; i++) {
-			if (regex.test(lines[i])) {
-				const lineContent = lines[i]
-				const lineBytes = Buffer.byteLength(lineContent, "utf8")
+		try {
+			while (bytesRead < totalSize && !hitLimit) {
+				const chunkSize = Math.min(CHUNK_SIZE, totalSize - bytesRead)
+				const buffer = Buffer.alloc(chunkSize)
+				const result = await fileHandle.read(buffer, 0, chunkSize, bytesRead)
 
-				// Stop if we've exceeded the byte limit
-				if (totalMatchBytes + lineBytes > limit) {
+				if (result.bytesRead === 0) {
 					break
 				}
 
-				matches.push({ lineNumber: i + 1, content: lineContent })
-				totalMatchBytes += lineBytes
+				const chunk = buffer.slice(0, result.bytesRead).toString("utf8")
+				bytesRead += result.bytesRead
+
+				// Combine with partial line from previous chunk
+				const combined = partialLine + chunk
+				const lines = combined.split("\n")
+
+				// Last element may be incomplete (no trailing newline), save for next iteration
+				partialLine = lines.pop() ?? ""
+
+				// Process complete lines
+				for (const line of lines) {
+					lineNumber++
+
+					if (regex.test(line)) {
+						const lineBytes = Buffer.byteLength(line, "utf8")
+
+						// Stop if we've exceeded the byte limit
+						if (totalMatchBytes + lineBytes > limit) {
+							hitLimit = true
+							break
+						}
+
+						matches.push({ lineNumber, content: line })
+						totalMatchBytes += lineBytes
+					}
+				}
+			}
+
+			// Process any remaining partial line at end of file
+			if (!hitLimit && partialLine.length > 0) {
+				lineNumber++
+				if (regex.test(partialLine)) {
+					const lineBytes = Buffer.byteLength(partialLine, "utf8")
+					if (totalMatchBytes + lineBytes <= limit) {
+						matches.push({ lineNumber, content: partialLine })
+					}
+				}
 			}
+		} finally {
+			await fileHandle.close()
 		}
 
 		const artifactId = path.basename(artifactPath)
diff --git a/src/core/tools/__tests__/ReadCommandOutputTool.test.ts b/src/core/tools/__tests__/ReadCommandOutputTool.test.ts
@@ -276,13 +276,31 @@ describe("ReadCommandOutputTool", () => {
 	})
 
 	describe("Search filtering", () => {
+		// Helper to setup file handle mock for search (which now uses streaming)
+		const setupSearchMock = (content: string) => {
+			const buffer = Buffer.from(content)
+			const fileSize = buffer.length
+			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
+
+			// Mock streaming read - return entire content in one chunk (simulates small file)
+			mockFileHandle.read.mockImplementation(
+				(buf: Buffer, bufOffset: number, length: number, position: number | null) => {
+					const pos = position ?? 0
+					if (pos >= fileSize) {
+						return Promise.resolve({ bytesRead: 0 })
+					}
+					const bytesToRead = Math.min(length, fileSize - pos)
+					buffer.copy(buf, 0, pos, pos + bytesToRead)
+					return Promise.resolve({ bytesRead: bytesToRead })
+				},
+			)
+		}
+
 		it("should filter lines matching pattern", async () => {
 			const artifactId = "cmd-1706119234567.txt"
 			const content = "Line 1: error occurred\nLine 2: success\nLine 3: error found\nLine 4: complete\n"
-			const fileSize = Buffer.byteLength(content, "utf8")
 
-			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
-			vi.mocked(fs.readFile).mockResolvedValue(content)
+			setupSearchMock(content)
 
 			await tool.execute({ artifact_id: artifactId, search: "error" }, mockTask, mockCallbacks)
 
@@ -296,10 +314,8 @@ describe("ReadCommandOutputTool", () => {
 		it("should use case-insensitive matching", async () => {
 			const artifactId = "cmd-1706119234567.txt"
 			const content = "ERROR: Something bad\nwarning: minor issue\nERROR: Another problem\n"
-			const fileSize = Buffer.byteLength(content, "utf8")
 
-			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
-			vi.mocked(fs.readFile).mockResolvedValue(content)
+			setupSearchMock(content)
 
 			await tool.execute({ artifact_id: artifactId, search: "error" }, mockTask, mockCallbacks)
 
@@ -311,10 +327,8 @@ describe("ReadCommandOutputTool", () => {
 		it("should show match count and line numbers", async () => {
 			const artifactId = "cmd-1706119234567.txt"
 			const content = "Line 1\nError on line 2\nLine 3\nError on line 4\n"
-			const fileSize = Buffer.byteLength(content, "utf8")
 
-			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
-			vi.mocked(fs.readFile).mockResolvedValue(content)
+			setupSearchMock(content)
 
 			await tool.execute({ artifact_id: artifactId, search: "Error" }, mockTask, mockCallbacks)
 
@@ -327,10 +341,8 @@ describe("ReadCommandOutputTool", () => {
 		it("should handle empty search results gracefully", async () => {
 			const artifactId = "cmd-1706119234567.txt"
 			const content = "Line 1\nLine 2\nLine 3\n"
-			const fileSize = Buffer.byteLength(content, "utf8")
 
-			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
-			vi.mocked(fs.readFile).mockResolvedValue(content)
+			setupSearchMock(content)
 
 			await tool.execute({ artifact_id: artifactId, search: "NOTFOUND" }, mockTask, mockCallbacks)
 
@@ -341,10 +353,8 @@ describe("ReadCommandOutputTool", () => {
 		it("should handle regex patterns in search", async () => {
 			const artifactId = "cmd-1706119234567.txt"
 			const content = "test123\ntest456\nabc789\ntest000\n"
-			const fileSize = Buffer.byteLength(content, "utf8")
 
-			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
-			vi.mocked(fs.readFile).mockResolvedValue(content)
+			setupSearchMock(content)
 
 			await tool.execute({ artifact_id: artifactId, search: "test\\d+" }, mockTask, mockCallbacks)
 
@@ -358,10 +368,8 @@ describe("ReadCommandOutputTool", () => {
 		it("should handle invalid regex patterns by treating as literal", async () => {
 			const artifactId = "cmd-1706119234567.txt"
 			const content = "Line with [brackets]\nLine without\n"
-			const fileSize = Buffer.byteLength(content, "utf8")
 
-			vi.mocked(fs.stat).mockResolvedValue({ size: fileSize } as any)
-			vi.mocked(fs.readFile).mockResolvedValue(content)
+			setupSearchMock(content)
 
 			// Invalid regex but valid as literal string
 			await tool.execute({ artifact_id: artifactId, search: "[" }, mockTask, mockCallbacks)