Skip to content

Commit e277635

Browse files
committed
feat: add content-based MIME detection and image resizing
MIME detection: DetectMimeType now falls back to content sniffing (magic bytes via http.DetectContentType + manual WebP check) when the file extension is unrecognised. This correctly identifies images even when they have wrong or missing extensions. Image resizing: New ResizeImage/ResizeImageBase64 functions ensure images stay within provider limits (max 2000x2000 pixels, max 4.5MB). Uses CatmullRom (bicubic) interpolation via golang.org/x/image/draw. Progressive fallback: tries PNG vs JPEG, then decreasing JPEG quality (70/55/40), then reduced dimensions (75%/50%/35%/25%). FormatDimensionNote generates coordinate mapping notes so models can translate between resized and original image coordinates. Both image ingestion points now resize: - read_file tool (filesystem.go): resizes before base64 encoding - @file attachments (app.go): reads, resizes, and inlines as base64 data URL (MessagePartTypeImageURL) instead of MessagePartTypeFile, making image attachments work across all providers not just Anthropic Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
1 parent febde9b commit e277635

File tree

10 files changed

+599
-73
lines changed

10 files changed

+599
-73
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ require (
6060
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0
6161
go.opentelemetry.io/otel/sdk v1.40.0
6262
go.opentelemetry.io/otel/trace v1.40.0
63+
golang.org/x/image v0.36.0
6364
golang.org/x/net v0.51.0
6465
golang.org/x/oauth2 v0.35.0
6566
golang.org/x/sync v0.19.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,8 @@ golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
529529
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
530530
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
531531
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
532+
golang.org/x/image v0.36.0 h1:Iknbfm1afbgtwPTmHnS2gTM/6PPZfH+z2EFuOkSbqwc=
533+
golang.org/x/image v0.36.0/go.mod h1:YsWD2TyyGKiIX1kZlu9QfKIsQ4nAAK9bdgdrIsE7xy4=
532534
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
533535
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
534536
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=

pkg/app/app.go

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package app
22

33
import (
44
"context"
5+
"encoding/base64"
56
"fmt"
67
"log/slog"
78
"os"
@@ -396,13 +397,41 @@ func (a *App) processFileAttachment(ctx context.Context, att messages.Attachment
396397
textBuilder.WriteString(content)
397398

398399
case chat.IsSupportedMimeType(mimeType):
399-
*binaryParts = append(*binaryParts, chat.MessagePart{
400-
Type: chat.MessagePartTypeFile,
401-
File: &chat.MessageFile{
402-
Path: absPath,
403-
MimeType: mimeType,
404-
},
405-
})
400+
if chat.IsImageMimeType(mimeType) {
401+
// Read, resize if needed, and inline as base64 data URL.
402+
// This works across all providers (not just Anthropic's File API).
403+
imgData, readErr := os.ReadFile(absPath)
404+
if readErr != nil {
405+
slog.Warn("skipping attachment: failed to read image", "path", absPath, "error", readErr)
406+
a.sendEvent(ctx, runtime.Warning(fmt.Sprintf("Skipped attachment %s: failed to read image", att.Name), ""))
407+
return
408+
}
409+
resized, resizeErr := chat.ResizeImage(imgData, mimeType)
410+
if resizeErr != nil {
411+
slog.Warn("image resize failed, sending original", "path", absPath, "error", resizeErr)
412+
resized = &chat.ImageResizeResult{Data: imgData, MimeType: mimeType}
413+
}
414+
dataURL := fmt.Sprintf("data:%s;base64,%s", resized.MimeType, base64.StdEncoding.EncodeToString(resized.Data))
415+
*binaryParts = append(*binaryParts, chat.MessagePart{
416+
Type: chat.MessagePartTypeImageURL,
417+
ImageURL: &chat.MessageImageURL{
418+
URL: dataURL,
419+
Detail: chat.ImageURLDetailAuto,
420+
},
421+
})
422+
if note := chat.FormatDimensionNote(resized); note != "" {
423+
textBuilder.WriteString("\n" + note)
424+
}
425+
} else {
426+
// Non-image supported types (e.g. PDF) use the file upload path.
427+
*binaryParts = append(*binaryParts, chat.MessagePart{
428+
Type: chat.MessagePartTypeFile,
429+
File: &chat.MessageFile{
430+
Path: absPath,
431+
MimeType: mimeType,
432+
},
433+
})
434+
}
406435

407436
default:
408437
slog.Warn("skipping attachment: unsupported file type", "path", absPath, "mime_type", mimeType)

pkg/chat/chat.go

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -173,32 +173,40 @@ type MessageStream interface {
173173
Close()
174174
}
175175

176-
// DetectMimeType returns the MIME type for a file based on its extension.
177-
// This is the canonical implementation used across all packages for consistency.
178-
// For binary file types (images, PDF), returns the specific MIME type.
179-
// For text-based files, returns "text/plain".
180-
// Unrecognized extensions return "application/octet-stream".
176+
// DetectMimeType returns the MIME type for a file by reading its first 512
177+
// bytes and inspecting the content (magic bytes). For text-based files that
178+
// http.DetectContentType cannot distinguish (e.g. source code vs plain text),
179+
// it falls back to extension matching. This is the canonical implementation
180+
// used across all packages for consistency.
181181
func DetectMimeType(filePath string) string {
182-
ext := strings.ToLower(filepath.Ext(filePath))
183-
switch ext {
184-
// Images
185-
case ".jpg", ".jpeg":
186-
return "image/jpeg"
187-
case ".png":
188-
return "image/png"
189-
case ".gif":
190-
return "image/gif"
191-
case ".webp":
192-
return "image/webp"
193-
// PDF
194-
case ".pdf":
195-
return "application/pdf"
196-
default:
197-
if isTextExtension(ext) {
198-
return "text/plain"
199-
}
182+
// Content sniffing — reliably detects images, PDF, etc.
183+
if ct := detectMimeTypeFromFile(filePath); ct != "application/octet-stream" {
184+
return ct
185+
}
186+
187+
// http.DetectContentType returns "application/octet-stream" for text
188+
// files it can't classify, so fall back to extension for those.
189+
if isTextExtension(strings.ToLower(filepath.Ext(filePath))) {
190+
return "text/plain"
191+
}
192+
return "application/octet-stream"
193+
}
194+
195+
// detectMimeTypeFromFile reads the first 512 bytes of a file and uses
196+
// content-based detection (magic bytes) to determine the MIME type.
197+
func detectMimeTypeFromFile(filePath string) string {
198+
f, err := os.Open(filePath)
199+
if err != nil {
200+
return "application/octet-stream"
201+
}
202+
defer f.Close()
203+
204+
buf := make([]byte, 512)
205+
n, _ := f.Read(buf)
206+
if n == 0 {
200207
return "application/octet-stream"
201208
}
209+
return DetectMimeTypeByContent(buf[:n])
202210
}
203211

204212
// IsImageMimeType returns true if the MIME type is a supported image type.

pkg/chat/chat_test.go

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,7 @@ func TestDetectMimeType(t *testing.T) {
1515
path string
1616
expected string
1717
}{
18-
// Images
19-
{"photo.jpg", "image/jpeg"},
20-
{"photo.jpeg", "image/jpeg"},
21-
{"photo.png", "image/png"},
22-
{"photo.gif", "image/gif"},
23-
{"photo.webp", "image/webp"},
24-
// PDF
25-
{"document.pdf", "application/pdf"},
26-
// Text files - all map to text/plain
18+
// Text files (detected by extension fallback) - all map to text/plain
2719
{"readme.txt", "text/plain"},
2820
{"readme.md", "text/plain"},
2921
{"readme.markdown", "text/plain"},
@@ -46,7 +38,7 @@ func TestDetectMimeType(t *testing.T) {
4638
{"query.graphql", "text/plain"},
4739
{"icon.svg", "text/plain"},
4840
{"changes.diff", "text/plain"},
49-
// Unknown binary
41+
// Unknown binary (no file to sniff, unknown extension)
5042
{"archive.tar.gz", "application/octet-stream"},
5143
{"program.exe", "application/octet-stream"},
5244
{"movie.mp4", "application/octet-stream"},
@@ -169,3 +161,50 @@ func TestReadFileForInline_NotFound(t *testing.T) {
169161
_, err := ReadFileForInline("/nonexistent/file.txt")
170162
assert.Error(t, err)
171163
}
164+
165+
func TestDetectMimeType_ContentSniffing(t *testing.T) {
166+
t.Parallel()
167+
168+
tests := []struct {
169+
name string
170+
filename string
171+
content []byte
172+
want string
173+
}{
174+
{
175+
name: "png with unknown extension",
176+
filename: "image.bin",
177+
content: []byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A, 0, 0, 0, 0},
178+
want: "image/png",
179+
},
180+
{
181+
name: "jpeg with unknown extension",
182+
filename: "photo.dat",
183+
content: []byte{0xFF, 0xD8, 0xFF, 0xE0, 0, 0, 0, 0},
184+
want: "image/jpeg",
185+
},
186+
{
187+
name: "webp with unknown extension",
188+
filename: "anim.unknown",
189+
content: append([]byte("RIFF"), append([]byte{0, 0, 0, 0}, []byte("WEBP")...)...),
190+
want: "image/webp",
191+
},
192+
{
193+
name: "plain text with unknown extension",
194+
filename: "data.xyz",
195+
content: []byte("just some text content here"),
196+
want: "text/plain; charset=utf-8",
197+
},
198+
}
199+
200+
for _, tt := range tests {
201+
t.Run(tt.name, func(t *testing.T) {
202+
t.Parallel()
203+
dir := t.TempDir()
204+
path := filepath.Join(dir, tt.filename)
205+
require.NoError(t, os.WriteFile(path, tt.content, 0o644))
206+
got := DetectMimeType(path)
207+
assert.Equal(t, tt.want, got)
208+
})
209+
}
210+
}

0 commit comments

Comments
 (0)