Skip to content

Commit 6e54d4d

Browse files
committed
fix: address PR docker#1889 review feedback for vision support
- Guard against decompression bombs in ResizeImage by rejecting decoded images exceeding 20000x20000 pixels before processing - Fix image stripping for models with unknown capabilities: only strip images when modalities are explicitly known and exclude image input - Check file existence before calling IsImageFile in read_file handler to provide clearer errors and avoid type detection on missing files Assisted-By: cagent
1 parent 93b9555 commit 6e54d4d

File tree

3 files changed

+30
-7
lines changed

3 files changed

+30
-7
lines changed

pkg/chat/image.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ const (
2222
// MaxImageBytes is the maximum file size for images sent to LLM providers (4.5MB,
2323
// below Anthropic's 5MB limit).
2424
MaxImageBytes = 4_500_000
25+
// maxDecodedDimension is the absolute upper bound on decoded image dimensions.
26+
// Images exceeding this are rejected before processing to guard against
27+
// decompression bombs (small files that expand to huge pixel buffers).
28+
maxDecodedDimension = 20_000
2529
// jpegQuality is the default JPEG encoding quality.
2630
jpegQuality = 80
2731
)
@@ -75,6 +79,13 @@ func ResizeImage(data []byte, mimeType string) (*ImageResizeResult, error) {
7579
bounds := img.Bounds()
7680
origW, origH := bounds.Dx(), bounds.Dy()
7781

82+
// Guard against decompression bombs: reject images whose decoded
83+
// dimensions are absurdly large. A small compressed file can expand
84+
// to hundreds of megabytes in memory (e.g. 20000×20000×4 ≈ 1.6 GB).
85+
if origW > maxDecodedDimension || origH > maxDecodedDimension {
86+
return nil, fmt.Errorf("image dimensions too large: %dx%d (max %d)", origW, origH, maxDecodedDimension)
87+
}
88+
7889
// If the image already fits within all limits, return unchanged.
7990
if origW <= MaxImageDimension && origH <= MaxImageDimension && len(data) <= MaxImageBytes {
8091
return &ImageResizeResult{

pkg/runtime/runtime.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1121,7 +1121,7 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
11211121
// Strip image content from messages if the model doesn't support image input.
11221122
// This prevents API errors when conversation history contains images (e.g. from
11231123
// tool results or user attachments) but the current model is text-only.
1124-
if m != nil && !slices.Contains(m.Modalities.Input, "image") {
1124+
if m != nil && len(m.Modalities.Input) > 0 && !slices.Contains(m.Modalities.Input, "image") {
11251125
messages = stripImageContent(messages)
11261126
}
11271127

pkg/tools/builtin/filesystem.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -522,12 +522,8 @@ func (t *FilesystemTool) handleListDirectory(_ context.Context, args ListDirecto
522522
func (t *FilesystemTool) handleReadFile(_ context.Context, args ReadFileArgs) (*tools.ToolCallResult, error) {
523523
resolvedPath := t.resolvePath(args.Path)
524524

525-
// Check if the file is an image
526-
if chat.IsImageFile(resolvedPath) {
527-
return t.readImageFile(resolvedPath, args.Path)
528-
}
529-
530-
content, err := os.ReadFile(resolvedPath)
525+
// Check if the file exists before any type detection.
526+
info, err := os.Stat(resolvedPath)
531527
if err != nil {
532528
var errMsg string
533529
if os.IsNotExist(err) {
@@ -545,6 +541,22 @@ func (t *FilesystemTool) handleReadFile(_ context.Context, args ReadFileArgs) (*
545541
}, nil
546542
}
547543

544+
// Only check for image files on regular files (not directories, etc.)
545+
if info.Mode().IsRegular() && chat.IsImageFile(resolvedPath) {
546+
return t.readImageFile(resolvedPath, args.Path)
547+
}
548+
549+
content, err := os.ReadFile(resolvedPath)
550+
if err != nil {
551+
return &tools.ToolCallResult{
552+
Output: err.Error(),
553+
IsError: true,
554+
Meta: ReadFileMeta{
555+
Error: err.Error(),
556+
},
557+
}, nil
558+
}
559+
548560
return &tools.ToolCallResult{
549561
Output: string(content),
550562
Meta: ReadFileMeta{

0 commit comments

Comments
 (0)