Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ The processor builds an aggregated result containing all accumulated data for th

The system uses two separate mechanisms to inform the agent about available files:

- **User attachments**: The `_AttachmentFilter` (used in `AssistantInvoker`) appends text metadata to user message
content (e.g., `Attachment X, of type Y, url Z`). This is simple, direct, and preserves the natural conversation
flow.
- **Attachments**: The `_AttachmentFilter` (used in `AssistantInvoker`) appends structured XML metadata
(`<attachments>`) to message content. Each attachment is represented as an `<attachment>` element with
`<title>`, `<type>`, `<url>`, and optionally `<reference_url>` sub-elements.
- **Admin context files**: The Attachment Notification Injector uses synthetic tool call/result messages via the
`available_context` internal tool. This provides structured metadata without modifying user messages.

Expand Down
32 changes: 24 additions & 8 deletions src/quickapp/agent/_attachment_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import copy
import logging
from xml.sax.saxutils import escape

from aidial_sdk.chat_completion import Message, Role
from aidial_sdk.chat_completion import Attachment, Message, Role

from quickapp.common.utils import matches_type

Expand All @@ -24,23 +25,38 @@ def filter_attachments(self, messages: list[Message]) -> list[Message]:
for item in messages
]

def _filter(self, message: Message):
@staticmethod
def _build_attachment_xml(attachments: list[Attachment]) -> str:
xml_parts = ["<attachments>"]
for attachment in attachments:
xml_parts.append(" <attachment>")
xml_parts.append(f" <title>{escape(str(attachment.title or ''))}</title>")
xml_parts.append(f" <type>{escape(str(attachment.type or ''))}</type>")
xml_parts.append(f" <url>{escape(str(attachment.url or ''))}</url>")
if attachment.reference_url is not None:
xml_parts.append(
f" <reference_url>{escape(str(attachment.reference_url))}</reference_url>"
)
xml_parts.append(" </attachment>")
xml_parts.append("</attachments>")
return "\n".join(xml_parts)

def _filter(self, message: Message) -> Message:
updated_attachments = []
if message.content is None:
message.content = ""
content = message.content if isinstance(message.content, str) else str(message.content)
if self._has_attachments(message):
all_attachments: list[Attachment] = []
for attachment in message.custom_content.attachments: # type: ignore[union-attr]
if message.role == Role.USER and matches_type(
attachment.type, self.SUPPORTED_ATTACHMENTS
):
updated_attachments.append(attachment)
# Inform agent that message had contained some attachment.
# As adapter would resolve the actual bytes and URL would be lost.
content += (
f"\r\nAttachment {attachment.title}, of type {attachment.type}, "
f"url {attachment.url}, reference_url {attachment.reference_url}\r\n"
)
all_attachments.append(attachment)
# Inform agent that message had contained some attachment.
# As adapter would resolve the actual bytes and URL would be lost.
content += "\n" + self._build_attachment_xml(all_attachments)
message.custom_content.attachments = updated_attachments # type: ignore[union-attr]
message.content = content

Expand Down
6 changes: 6 additions & 0 deletions src/quickapp/attachment_processing/_context_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class ContextEntry(BaseModel):

class AvailableContextToolResponse(BaseModel):
entries: list[ContextEntry] = Field()
disclaimer: str = Field(
default=(
"This information is related only to the files configured by admin. It does not contain any information "
"on attachments from user or from tool results."
)
)


def build_context_entries(
Expand Down
6 changes: 4 additions & 2 deletions src/quickapp/attachment_processing/_tool_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
function=OpenAiToolFunction(
name=f"{INTERNAL_TOOL_NAME_PREFIX}available_context",
description=(
"Returns metadata about admin-configured context files"
" attached to this application."
"Returns metadata about admin-configured context files."
" **IMPORTANT**: this tool is not applicable to user-attached files or files from tool results, "
"and will not return any information about them. If you see file in <attachments> section of user "
"message, it means that the file was attached by the user, and is available for you to use."
),
parameters=OpenAiToolFunctionParameters(
type=JsonTypeEnum.object,
Expand Down
152 changes: 142 additions & 10 deletions src/tests/unit_tests/agent_tests/test_attachment_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,30 @@
from quickapp.agent._attachment_filter import _AttachmentFilter


def _user_msg(content: str = "", attachments: list[Attachment] | None = None) -> Message:
msg = Message(role=Role.USER, content=content)
def _msg(
role: Role, content: str | None = "", attachments: list[Attachment] | None = None
) -> Message:
msg = Message(role=role, content=content)
if attachments:
msg.custom_content = CustomContent(attachments=attachments)
return msg


def _attachment(title: str, url: str, mime_type: str) -> Attachment:
def _user_msg(content: str = "", attachments: list[Attachment] | None = None) -> Message:
return _msg(Role.USER, content, attachments)


def _attachment(
title: str,
url: str,
mime_type: str,
reference_url: str | None = None,
) -> Attachment:
return Attachment(
title=title,
url=url,
type=mime_type,
reference_url=reference_url,
)


Expand All @@ -38,7 +50,7 @@ def test_non_image_attachments_removed(self):
result = transformer.filter_attachments([msg])
assert len(result[0].custom_content.attachments) == 0

def test_text_metadata_injected_for_attachments(self):
def test_xml_metadata_injected_for_attachments(self):
transformer = _AttachmentFilter()
msg = _user_msg(
"original content",
Expand All @@ -49,12 +61,13 @@ def test_text_metadata_injected_for_attachments(self):
)
result = transformer.filter_attachments([msg])
content = str(result[0].content)
assert "Attachment doc.pdf" in content
assert "application/pdf" in content
assert "<attachments>" in content
assert "<title>doc.pdf</title>" in content
assert "<type>application/pdf</type>" in content
assert "<title>photo.png</title>" in content
assert "<type>image/png</type>" in content

# Image attachments are kept inline AND get text metadata injected
assert "Attachment photo.png" in content
assert "image/png" in content
# Image attachments are kept inline AND get XML metadata injected
assert result[0].custom_content.attachments[0].type == "image/png"
assert result[0].custom_content.attachments[0].title == "photo.png"

Expand Down Expand Up @@ -105,4 +118,123 @@ def test_filter_idempotent_on_repeated_calls(self):
second_content = str(second_pass[0].content)

assert first_content == second_content
assert first_content.count("Attachment doc.pdf") == 1
assert first_content.count("<title>doc.pdf</title>") == 1

# --- Multi-message tests ---

def test_multi_message_each_filtered_independently(self):
transformer = _AttachmentFilter()
msg1 = _user_msg(
"first",
[
_attachment("doc.pdf", "/files/doc.pdf", "application/pdf"),
_attachment("photo.png", "/files/photo.png", "image/png"),
],
)
msg2 = _user_msg(
"second",
[_attachment("data.csv", "/files/data.csv", "text/csv")],
)
result = transformer.filter_attachments([msg1, msg2])

# First message: image kept, pdf removed
assert len(result[0].custom_content.attachments) == 1
assert result[0].custom_content.attachments[0].type == "image/png"
content0 = str(result[0].content)
assert "<title>doc.pdf</title>" in content0
assert "<title>photo.png</title>" in content0

# Second message: csv removed
assert len(result[1].custom_content.attachments) == 0
content1 = str(result[1].content)
assert "<title>data.csv</title>" in content1

def test_multi_message_non_attachment_messages_unchanged(self):
transformer = _AttachmentFilter()
plain_msg = _user_msg("just text")
attach_msg = _user_msg(
"with file",
[_attachment("doc.pdf", "/files/doc.pdf", "application/pdf")],
)
result = transformer.filter_attachments([plain_msg, attach_msg])

# Plain message is passed through as-is (same object, no deepcopy)
assert result[0] is plain_msg
assert result[0].content == "just text"

# Attachment message is filtered
assert "<title>doc.pdf</title>" in str(result[1].content)

# --- Role-based tests ---

def test_assistant_message_image_attachments_stripped(self):
transformer = _AttachmentFilter()
msg = _msg(
Role.ASSISTANT,
"response",
[_attachment("photo.png", "/files/photo.png", "image/png")],
)
result = transformer.filter_attachments([msg])
# Non-USER roles: images are NOT kept inline
assert len(result[0].custom_content.attachments) == 0
content = str(result[0].content)
assert "<title>photo.png</title>" in content

def test_tool_message_attachments_stripped(self):
transformer = _AttachmentFilter()
msg = _msg(
Role.TOOL,
"tool output",
[_attachment("result.png", "/files/result.png", "image/png")],
)
result = transformer.filter_attachments([msg])
assert len(result[0].custom_content.attachments) == 0
content = str(result[0].content)
assert "<title>result.png</title>" in content

# --- Edge case tests ---

def test_empty_message_list(self):
transformer = _AttachmentFilter()
result = transformer.filter_attachments([])
assert result == []

def test_content_none_with_attachments(self):
transformer = _AttachmentFilter()
msg = _msg(
Role.USER,
None,
[_attachment("doc.pdf", "/files/doc.pdf", "application/pdf")],
)
result = transformer.filter_attachments([msg])
content = str(result[0].content)
assert "<attachments>" in content
assert "<title>doc.pdf</title>" in content

def test_reference_url_conditional_absent(self):
transformer = _AttachmentFilter()
msg = _user_msg(
"test",
[_attachment("doc.pdf", "/files/doc.pdf", "application/pdf")],
)
result = transformer.filter_attachments([msg])
content = str(result[0].content)
# reference_url is None by default → no element
assert "<reference_url>" not in content

def test_reference_url_conditional_present(self):
transformer = _AttachmentFilter()
msg = _user_msg(
"test",
[
_attachment(
"doc.pdf",
"/files/doc.pdf",
"application/pdf",
reference_url="/refs/doc.pdf",
)
],
)
result = transformer.filter_attachments([msg])
content = str(result[0].content)
assert "<reference_url>/refs/doc.pdf</reference_url>" in content
Loading