Skip to content

Commit 260e16f

Browse files
committed
fix: Don't reprocess files in Advanced Mode
1 parent 04b4af6 commit 260e16f

File tree

2 files changed

+26
-5
lines changed

2 files changed

+26
-5
lines changed

src/lfx/src/lfx/base/data/base_file.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,15 +239,28 @@ def load_files_base(self) -> list[Data]:
239239
file.path.unlink()
240240

241241
def load_files_core(self) -> list[Data]:
242-
"""Load files and return as Data objects.
242+
"""Load files and return as Data objects, with per-instance caching.
243+
244+
Results are cached keyed by the ``markdown`` attribute so that multiple
245+
output methods that share the same processing parameters (e.g.
246+
``load_files_message`` and ``load_files_dataframe`` when both run with
247+
``markdown=False``) do not trigger redundant file processing.
243248
244249
Returns:
245250
list[Data]: List of Data objects from all files
246251
"""
252+
# Use the markdown flag (default False) as the cache key so that
253+
# structured and markdown outputs are cached independently.
254+
markdown_flag = getattr(self, "markdown", False)
255+
cache_attr = f"_load_files_core_cache_{markdown_flag}"
256+
257+
if hasattr(self, cache_attr):
258+
return getattr(self, cache_attr)
259+
247260
data_list = self.load_files_base()
248-
if not data_list:
249-
return [Data()]
250-
return data_list
261+
result = data_list if data_list else [Data()]
262+
setattr(self, cache_attr, result)
263+
return result
251264

252265
def _extract_file_metadata(self, data_item) -> dict:
253266
"""Extract metadata from a data item with file_path."""

src/lfx/src/lfx/components/files_and_knowledge/file.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,15 @@ class EmptySchema(BaseModel):
316316
async def read_files_tool() -> str:
317317
"""Read the content of uploaded files."""
318318
try:
319-
result = self.load_files_message()
319+
if getattr(self, "advanced_mode", False):
320+
# In advanced mode, use the markdown output path so that the
321+
# tool shares the same Docling processing as the advanced
322+
# outputs rather than triggering a second subprocess via
323+
# load_files_message.
324+
self.markdown = True
325+
result = self.load_files_markdown()
326+
else:
327+
result = self.load_files_message()
320328
if hasattr(result, "get_text"):
321329
return result.get_text()
322330
if hasattr(result, "text"):

0 commit comments

Comments
 (0)