Skip to content

Commit 17e4ff3

Browse files
authored
feat: Allow disabling spellcheck in specific regions thanks to HTML comments
In some instances users want to disable spell checking for sections of a document, instead of enabling/disabling it for the whole document. This introduces spell checking guards in the form of special HTML comments. For everything enclosed in a `<!-- mkdocs-spellcheck-off -->` ... `<!-- mkdocs-spellcheck-on -->` block, no spell checking is performed. Issue #33: #33 PR-34: #34
1 parent 0099ffa commit 17e4ff3

File tree

3 files changed

+74
-4
lines changed

3 files changed

+74
-4
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,18 @@ The builtin dictionaries are:
7878
to be typos in other contexts (such as `uint`)
7979
- `names` for valid proper names that might be typos
8080
- `en-GB_to_en-US` for corrections from `en-GB` to `en-US`
81+
82+
### Disabling spell checking for document regions
83+
84+
In some situations it can be useful to temporarily disable spell checking for a document.
85+
To this end, MkDocs SpellCheck recognizes special guards `mkdocs-spellcheck-{on,off}`:
86+
87+
```md
88+
Here MkDocs SpellCheck checks for correct spelling.
89+
90+
<!-- mkdocs-spellcheck-off -->
91+
In this block it doesn't.
92+
<!-- mkdocs-spellcheck-on -->
93+
94+
Here spelling checks are performed again.
95+
```

src/mkdocs_spellcheck/_internal/words.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from html.parser import HTMLParser
99
from io import StringIO
1010

11+
_spell_check_guard_on = "mkdocs-spellcheck-on"
12+
_spell_check_guard_off = "mkdocs-spellcheck-off"
13+
1114

1215
class _MLStripper(HTMLParser):
1316
def __init__(self, ignore_code: bool = True) -> None: # noqa: FBT001,FBT002
@@ -18,6 +21,7 @@ def __init__(self, ignore_code: bool = True) -> None: # noqa: FBT001,FBT002
1821
self.text = StringIO()
1922
self.ignore_code = ignore_code
2023
self.in_code_tag = False
24+
self.in_guard = False
2125

2226
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: # noqa: ARG002
2327
if tag == "code":
@@ -28,15 +32,29 @@ def handle_endtag(self, tag: str) -> None:
2832
if tag == "code":
2933
self.in_code_tag = False
3034

35+
def handle_comment(self, data: str) -> None:
36+
data = data.strip()
37+
38+
if not self.in_code_tag:
39+
if data == _spell_check_guard_off:
40+
self.in_guard = True
41+
elif data == _spell_check_guard_on:
42+
self.in_guard = False
43+
3144
def handle_data(self, data: str) -> None:
32-
if not (self.ignore_code and self.in_code_tag):
33-
self.text.write(data)
45+
if self.ignore_code and self.in_code_tag:
46+
return
47+
48+
if self.in_guard:
49+
return
50+
51+
self.text.write(data)
3452

3553
def get_data(self) -> str:
3654
return self.text.getvalue()
3755

3856

39-
def _strip_tags(html: str, ignore_code: bool) -> str: # noqa: FBT001
57+
def _strip(html: str, ignore_code: bool) -> str: # noqa: FBT001
4058
stripper = _MLStripper(ignore_code)
4159
stripper.feed(html)
4260
return stripper.get_data()
@@ -94,6 +112,6 @@ def get_words(
94112
"""
95113
known_words = known_words or set()
96114
keep = partial(_keep_word, min_length=min_length, max_capital=max_capital)
97-
filtered = filter(keep, _normalize(_strip_tags(html, ignore_code), allow_unicode).split("-"))
115+
filtered = filter(keep, _normalize(_strip(html, ignore_code), allow_unicode).split("-"))
98116
words = {word.lower() for word in filtered}
99117
return sorted(words - known_words)

tests/test_words.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,43 @@ def test_remove_single_tags() -> None:
2727
assert "img" not in words
2828

2929

30+
def test_remove_guarded_blocks() -> None:
31+
"""Assert guarded text blocks are removed from HTML text."""
32+
html = """\
33+
before
34+
<!-- mkdocs-spellcheck-off -->
35+
between
36+
<!-- mkdocs-spellcheck-on -->
37+
after
38+
"""
39+
words = get_words(html, min_length=1)
40+
assert "before" in words
41+
assert "between" not in words
42+
assert "after" in words
43+
44+
45+
@pytest.mark.parametrize(
46+
("ignore_code", "expected"),
47+
[
48+
(True, {"before", "after"}),
49+
(False, {"before", "some", "guarded", "text", "after"}),
50+
],
51+
)
52+
def test_guarded_blocks_disabled_in_code_blocks(ignore_code: bool, expected: set[str]) -> None:
53+
"""Assert guarded blocks are disabled in code blocks."""
54+
html = """\
55+
before
56+
<code>
57+
<!-- mkdocs-spellcheck-off -->
58+
some guarded text
59+
<!-- mkdocs-spellcheck-on -->
60+
</code>
61+
after
62+
"""
63+
words = get_words(html, ignore_code=ignore_code)
64+
assert set(words) == expected
65+
66+
3067
@pytest.mark.parametrize(
3168
("text", "known_words", "expected"),
3269
[

0 commit comments

Comments
 (0)