44from __future__ import annotations
55
66import re
7- from typing import Optional
7+ from functools import lru_cache
88
99from pythainlp import thai_consonants , thai_tonemarks
1010from pythainlp .corpus import thai_words
1313from pythainlp .util import remove_tonemark
1414
1515kv : KhaveeVerifier = KhaveeVerifier ()
16- all_thai_words_dict : Optional [list [str ]] = None
1716
1817
18+ @lru_cache (maxsize = None )
19+ def _single_syllable_thai_words () -> list [str ]:
20+ """Return cached list of single-syllable Thai words."""
21+ return [i for i in thai_words () if len (syllable_tokenize (i )) == 1 ]
22+
23+
24+ @lru_cache (maxsize = 1024 )
1925def rhyme (word : str ) -> list [str ]:
2026 """Find Thai rhyme
2127
@@ -31,16 +37,9 @@ def rhyme(word: str) -> list[str]:
3137 print(rhyme("จีบ"))
3238 # output: ['กลีบ', 'กีบ', 'ครีบ', ...]
3339 """
34- global all_thai_words_dict
35- list_sumpus = []
36- if all_thai_words_dict is None :
37- all_thai_words_dict = [
38- i for i in list (thai_words ()) if len (syllable_tokenize (i )) == 1
39- ]
40- for i in all_thai_words_dict :
41- if kv .is_sumpus (word , i ) and i != word :
42- list_sumpus .append (i )
43- return sorted (list_sumpus )
40+ return sorted (
41+ i for i in _single_syllable_thai_words () if kv .is_sumpus (word , i ) and i != word
42+ )
4443
4544
4645_vowel_str : str = "" .join (
@@ -130,31 +129,18 @@ def tone_to_spelling(t: str) -> str:
130129 return t
131130
132131
133- def spelling (word : str ) -> list [str ]:
134- """Thai word to spelling
135-
136- This funnction support Thai root word only.
137-
138- :param str word: A Thai word
139- :return: spelling
140- :rtype: List[str]
141-
142- :Example:
143- ::
144-
145- from pythainlp.util import spelling
146-
147- print(spelling("เรียน"))
148- # output: ['รอ', 'เอีย', 'นอ', 'เรียน']
149-
150- print(spelling("เฝ้า)
151- # output: ['ฝอ', 'เอา', 'เฝา', 'ไม้โท', 'เฝ้า']
152- """
153- if not word or not isinstance (word , str ):
154- return []
155- thai_vowel_tokenizer = Tokenizer (
132+ @lru_cache (maxsize = None )
133+ def _spelling_tokenizer () -> Tokenizer :
134+ """Lazy-load and cache the vowel/consonant tokenizer used by spelling()."""
135+ return Tokenizer (
156136 custom_dict = thai_vowel + list (thai_consonants ), engine = "longest"
157137 )
138+
139+
140+ @lru_cache (maxsize = 1024 )
141+ def _spelling_impl (word : str ) -> list [str ]:
142+ """Cached implementation of spelling() for valid string inputs."""
143+ thai_vowel_tokenizer = _spelling_tokenizer ()
158144 word_pre = remove_tonemark (word ).replace ("็" , "" )
159145 tone = [tone_to_spelling (i ) for i in word if i in thai_tonemarks ]
160146 word_output = word_pre
@@ -179,3 +165,28 @@ def spelling(word: str) -> list[str]:
179165 return output + [word ]
180166 else :
181167 return output + [word_pre , word ]
168+
169+
170+ def spelling (word : str ) -> list [str ]:
171+ """Thai word to spelling
172+
173+ This function supports Thai root words only.
174+
175+ :param str word: A Thai word
176+ :return: spelling
177+ :rtype: List[str]
178+
179+ :Example:
180+ ::
181+
182+ from pythainlp.util import spelling
183+
184+ print(spelling("เรียน"))
185+ # output: ['รอ', 'เอีย', 'นอ', 'เรียน']
186+
187+ print(spelling("เฝ้า"))
188+ # output: ['ฝอ', 'เอา', 'เฝา', 'ไม้โท', 'เฝ้า']
189+ """
190+ if not word or not isinstance (word , str ):
191+ return []
192+ return _spelling_impl (word )
0 commit comments