feat: Allow disabling spellcheck in specific regions thanks to HTML comments

empwilli · web-flow · commit 17e4ff307c9b · 2025-12-01T15:27:33.000+01:00
In some instances users want to disable spell checking for sections of a document, instead of enabling/disabling it for the whole document. This introduces spell checking guards in the form of special HTML comments. For everything enclosed in a `` ... `` block, no spell checking is performed. Issue #33: #33 PR-34: #34
diff --git a/README.md b/README.md
@@ -78,3 +78,18 @@ The builtin dictionaries are:
     to be typos in other contexts (such as `uint`)
 - `names` for valid proper names that might be typos
 - `en-GB_to_en-US` for corrections from `en-GB` to `en-US`
+
+### Disabling spell checking for document regions
+
+In some situations it can be useful to temporarily disable spell checking for a document.
+To this end, MkDocs SpellCheck recognizes special guards `mkdocs-spellcheck-{on,off}`:
+
+```md
+Here MkDocs SpellCheck checks for correct spelling.
+
+<!-- mkdocs-spellcheck-off -->
+In this block it doesn't.
+<!-- mkdocs-spellcheck-on -->
+
+Here spelling checks are performed again.
+```
diff --git a/src/mkdocs_spellcheck/_internal/words.py b/src/mkdocs_spellcheck/_internal/words.py
@@ -8,6 +8,9 @@
 from html.parser import HTMLParser
 from io import StringIO
 
+_spell_check_guard_on = "mkdocs-spellcheck-on"
+_spell_check_guard_off = "mkdocs-spellcheck-off"
+
 
 class _MLStripper(HTMLParser):
     def __init__(self, ignore_code: bool = True) -> None:  # noqa: FBT001,FBT002
@@ -18,6 +21,7 @@ def __init__(self, ignore_code: bool = True) -> None:  # noqa: FBT001,FBT002
         self.text = StringIO()
         self.ignore_code = ignore_code
         self.in_code_tag = False
+        self.in_guard = False
 
     def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:  # noqa: ARG002
         if tag == "code":
@@ -28,15 +32,29 @@ def handle_endtag(self, tag: str) -> None:
         if tag == "code":
             self.in_code_tag = False
 
+    def handle_comment(self, data: str) -> None:
+        data = data.strip()
+
+        if not self.in_code_tag:
+            if data == _spell_check_guard_off:
+                self.in_guard = True
+            elif data == _spell_check_guard_on:
+                self.in_guard = False
+
     def handle_data(self, data: str) -> None:
-        if not (self.ignore_code and self.in_code_tag):
-            self.text.write(data)
+        if self.ignore_code and self.in_code_tag:
+            return
+
+        if self.in_guard:
+            return
+
+        self.text.write(data)
 
     def get_data(self) -> str:
         return self.text.getvalue()
 
 
-def _strip_tags(html: str, ignore_code: bool) -> str:  # noqa: FBT001
+def _strip(html: str, ignore_code: bool) -> str:  # noqa: FBT001
     stripper = _MLStripper(ignore_code)
     stripper.feed(html)
     return stripper.get_data()
@@ -94,6 +112,6 @@ def get_words(
     """
     known_words = known_words or set()
     keep = partial(_keep_word, min_length=min_length, max_capital=max_capital)
-    filtered = filter(keep, _normalize(_strip_tags(html, ignore_code), allow_unicode).split("-"))
+    filtered = filter(keep, _normalize(_strip(html, ignore_code), allow_unicode).split("-"))
     words = {word.lower() for word in filtered}
     return sorted(words - known_words)
diff --git a/tests/test_words.py b/tests/test_words.py
@@ -27,6 +27,43 @@ def test_remove_single_tags() -> None:
     assert "img" not in words
 
 
+def test_remove_guarded_blocks() -> None:
+    """Assert guarded text blocks are removed from HTML text."""
+    html = """\
+before
+<!-- mkdocs-spellcheck-off -->
+between
+<!-- mkdocs-spellcheck-on -->
+after
+"""
+    words = get_words(html, min_length=1)
+    assert "before" in words
+    assert "between" not in words
+    assert "after" in words
+
+
+@pytest.mark.parametrize(
+    ("ignore_code", "expected"),
+    [
+        (True, {"before", "after"}),
+        (False, {"before", "some", "guarded", "text", "after"}),
+    ],
+)
+def test_guarded_blocks_disabled_in_code_blocks(ignore_code: bool, expected: set[str]) -> None:
+    """Assert guarded blocks are disabled in code blocks."""
+    html = """\
+before
+<code>
+<!-- mkdocs-spellcheck-off -->
+some guarded text
+<!-- mkdocs-spellcheck-on -->
+</code>
+after
+"""
+    words = get_words(html, ignore_code=ignore_code)
+    assert set(words) == expected
+
+
 @pytest.mark.parametrize(
     ("text", "known_words", "expected"),
     [