Skip to content

Commit b86f139

Browse files
author
Ashok
committed
Fix: handle missing w:styleId in DOCX to prevent KeyError
1 parent 8a9d8f1 commit b86f139

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

packages/markitdown/src/markitdown/converters/_docx_converter.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,15 @@ def convert(
8484

8585
style_map = kwargs.get("style_map", None)
8686
pre_process_stream = pre_process_docx(file_stream)
87-
return self._html_converter.convert_string(
88-
mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
89-
**kwargs,
90-
)
87+
88+
# Patch: handle missing styleId safely
89+
try:
90+
html = mammoth.convert_to_html(pre_process_stream, style_map=style_map).value
91+
except KeyError as e:
92+
if str(e) == "'w:styleId'":
93+
# Ignore missing style IDs and convert anyway
94+
html = mammoth.convert_to_html(pre_process_stream, style_map=style_map, ignore_empty_styles=True).value
95+
else:
96+
raise
97+
98+
return self._html_converter.convert_string(html, **kwargs)

0 commit comments

Comments
 (0)