-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathabuse.go
More file actions
164 lines (138 loc) · 3.67 KB
/
abuse.go
File metadata and controls
164 lines (138 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
package masker
import (
"strings"
"unicode"
)
// TrieNode represents a node in the trie data structure
type TrieNode struct {
children map[rune]*TrieNode
isEnd bool
}
// AbuseTrie represents a trie data structure for storing abuse words
type AbuseTrie struct {
root *TrieNode
}
// NewAbuseTrie creates a new empty trie
func NewAbuseTrie() *AbuseTrie {
return &AbuseTrie{
root: &TrieNode{
children: make(map[rune]*TrieNode),
},
}
}
// Insert adds a word to the trie
func (t *AbuseTrie) Insert(word string) {
if word == "" {
return
}
node := t.root
word = strings.ToLower(strings.TrimSpace(word))
for _, char := range word {
if node.children[char] == nil {
node.children[char] = &TrieNode{
children: make(map[rune]*TrieNode),
}
}
node = node.children[char]
}
node.isEnd = true
}
// InsertAll adds multiple words to the trie
func (t *AbuseTrie) InsertAll(words []string) {
for _, word := range words {
t.Insert(word)
}
}
// Contains checks if a word exists in the trie
func (t *AbuseTrie) Contains(word string) bool {
if word == "" {
return false
}
node := t.root
word = strings.ToLower(strings.TrimSpace(word))
for _, char := range word {
if node.children[char] == nil {
return false
}
node = node.children[char]
}
return node.isEnd
}
// findAbuseWords finds all abuse words in the given text
func (t *AbuseTrie) findAbuseWords(text string) []string {
var found []string
words := strings.Fields(text)
for _, word := range words {
// Clean the word (remove punctuation)
cleanWord := cleanWord(word)
if cleanWord != "" && t.Contains(cleanWord) {
found = append(found, word)
}
}
return found
}
// cleanWord removes punctuation and converts to lowercase
func cleanWord(word string) string {
var result strings.Builder
for _, char := range word {
if unicode.IsLetter(char) || unicode.IsDigit(char) {
result.WriteRune(unicode.ToLower(char))
}
}
return result.String()
}
// AbuseMasker is a masker for abuse words
type AbuseMasker struct {
trie *AbuseTrie
}
// NewAbuseMasker creates a new abuse masker with an empty trie
func NewAbuseMasker() *AbuseMasker {
return &AbuseMasker{
trie: NewAbuseTrie(),
}
}
// NewAbuseMaskerWithWords creates a new abuse masker with predefined words
func NewAbuseMaskerWithWords(words []string) *AbuseMasker {
trie := NewAbuseTrie()
trie.InsertAll(words)
return &AbuseMasker{
trie: trie,
}
}
// AddWords adds abuse words to the masker
func (m *AbuseMasker) AddWords(words []string) {
m.trie.InsertAll(words)
}
// AddWord adds a single abuse word to the masker
func (m *AbuseMasker) AddWord(word string) {
m.trie.Insert(word)
}
// Marshal masks abuse words in the given text
// It replaces abuse words with the specified mask character
// Example:
//
// abuseMasker := NewAbuseMaskerWithWords([]string{"bad", "terrible"})
// abuseMasker.Marshal("*", "This is a bad word and terrible") // returns "This is a *** word and ********"
func (m *AbuseMasker) Marshal(maskChar string, text string) string {
if text == "" {
return text
}
// Find all abuse words in the text
abuseWords := m.trie.findAbuseWords(text)
// Replace each abuse word with masked version
result := text
for _, abuseWord := range abuseWords {
maskedWord := strLoop(maskChar, len([]rune(abuseWord)))
result = strings.ReplaceAll(result, abuseWord, maskedWord)
}
return result
}
// ContainsAbuse checks if the text contains any abuse words
func (m *AbuseMasker) ContainsAbuse(text string) bool {
abuseWords := m.trie.findAbuseWords(text)
return len(abuseWords) > 0
}
// GetAbuseWords returns all abuse words found in the text
func (m *AbuseMasker) GetAbuseWords(text string) []string {
return m.trie.findAbuseWords(text)
}