fix: Don't reprocess files in Advanced Mode

erichare · erichare · commit 260e16fc38d7 · 2026-03-20T09:25:47.000-07:00
diff --git a/src/lfx/src/lfx/base/data/base_file.py b/src/lfx/src/lfx/base/data/base_file.py
@@ -239,15 +239,28 @@ def load_files_base(self) -> list[Data]:
                         file.path.unlink()
 
     def load_files_core(self) -> list[Data]:
-        """Load files and return as Data objects.
+        """Load files and return as Data objects, with per-instance caching.
+
+        Results are cached keyed by the ``markdown`` attribute so that multiple
+        output methods that share the same processing parameters (e.g.
+        ``load_files_message`` and ``load_files_dataframe`` when both run with
+        ``markdown=False``) do not trigger redundant file processing.
 
         Returns:
             list[Data]: List of Data objects from all files
         """
+        # Use the markdown flag (default False) as the cache key so that
+        # structured and markdown outputs are cached independently.
+        markdown_flag = getattr(self, "markdown", False)
+        cache_attr = f"_load_files_core_cache_{markdown_flag}"
+
+        if hasattr(self, cache_attr):
+            return getattr(self, cache_attr)
+
         data_list = self.load_files_base()
-        if not data_list:
-            return [Data()]
-        return data_list
+        result = data_list if data_list else [Data()]
+        setattr(self, cache_attr, result)
+        return result
 
     def _extract_file_metadata(self, data_item) -> dict:
         """Extract metadata from a data item with file_path."""
diff --git a/src/lfx/src/lfx/components/files_and_knowledge/file.py b/src/lfx/src/lfx/components/files_and_knowledge/file.py
@@ -316,7 +316,15 @@ class EmptySchema(BaseModel):
         async def read_files_tool() -> str:
             """Read the content of uploaded files."""
             try:
-                result = self.load_files_message()
+                if getattr(self, "advanced_mode", False):
+                    # In advanced mode, use the markdown output path so that the
+                    # tool shares the same Docling processing as the advanced
+                    # outputs rather than triggering a second subprocess via
+                    # load_files_message.
+                    self.markdown = True
+                    result = self.load_files_markdown()
+                else:
+                    result = self.load_files_message()
                 if hasattr(result, "get_text"):
                     return result.get_text()
                 if hasattr(result, "text"):