Skip to content

Commit 200052a

Browse files
authored
fix: fallback to other parsers if the doc is too big for MinerU API (#1382)
1 parent 1e2372e commit 200052a

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

aperag/docparser/mineru_parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ def parse_file(self, path: Path, metadata: dict[str, Any], **kwargs) -> list[Par
127127
return self._download_and_process_zip(zip_url, metadata, is_pdf_input)
128128
elif state == "failed":
129129
error_message = task_status.get("err_msg", "Unknown error")
130+
# "number of pages exceeds limit" or "file size exceeds limit"
131+
if "exceeds limit" in error_message:
132+
raise FallbackError(error_message)
130133
raise RuntimeError(f"Mineru parsing failed for batch {batch_id}: {error_message}")
131134

132135
except requests.exceptions.RequestException as e:

0 commit comments

Comments
 (0)