Skip to content

Commit 02da34d

Browse files
authored
Phrase match: enforce word boundary option (#256)
1 parent ccfdf76 commit 02da34d

File tree

15 files changed

+392
-13
lines changed

15 files changed

+392
-13
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"scenario": "phrase_match_matcher.enforce_word_boundary",
3+
"ruleset": {
4+
"rules": [
5+
{
6+
"id": "crs-913-110",
7+
"name": "Acunetix",
8+
"tags": {
9+
"type": "commercial_scanner",
10+
"category": "attack_attempt"
11+
},
12+
"conditions": [
13+
{
14+
"parameters": {
15+
"inputs": [
16+
{
17+
"address": "server.request.headers.no_cookies"
18+
}
19+
],
20+
"list": [
21+
"acunetix-product",
22+
"(acunetix web vulnerability scanner",
23+
"acunetix-scanning-agreement",
24+
"acunetix-user-agreement",
25+
"md5(acunetix_wvs_security_test)"
26+
],
27+
"options": {
28+
"enforce_word_boundary": true
29+
}
30+
},
31+
"operator": "phrase_match"
32+
}
33+
],
34+
"transformers": []
35+
}
36+
]
37+
},
38+
"fixtures": {
39+
"eval.valid": {
40+
"server.request.headers.no_cookies": [
41+
"acunetix-product",
42+
"(acunetix web vulnerability scanner",
43+
"acunetix-scanning-agreement",
44+
"acunetix-user-agreement",
45+
"md5(acunetix_wvs_security_test)"
46+
]
47+
}
48+
}
49+
}

fuzzing/scripts/build_corpus.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,9 @@ def get_random_condition(i):
391391

392392
if operator == "phrase_match":
393393
result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(1, 200))]
394+
result["parameters"]["options"] = {
395+
"enforce_word_boundary": choice((True, False))
396+
}
394397
elif operator == "match_regex":
395398
temp = choice(self.regexs)
396399

src/matcher/phrase_match.cpp

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,19 @@
1212

1313
namespace ddwaf::matcher {
1414

15-
phrase_match::phrase_match(std::vector<const char *> pattern, std::vector<uint32_t> lengths)
15+
namespace {
16+
bool is_bounded_word(std::string_view pattern, std::size_t begin, std::size_t end)
17+
{
18+
return ((end + 1 >= pattern.size()) || isboundary(pattern[end]) ||
19+
isboundary(pattern[end + 1])) &&
20+
(begin == 0 || (isboundary(pattern[begin]) || isboundary(pattern[begin - 1])));
21+
}
22+
23+
} // namespace
24+
25+
phrase_match::phrase_match(
26+
std::vector<const char *> pattern, std::vector<uint32_t> lengths, bool enforce_word_boundary)
27+
: enforce_word_boundary_(enforce_word_boundary)
1628
{
1729
if (pattern.size() != lengths.size()) {
1830
throw std::invalid_argument("inconsistent pattern and lengths array size");
@@ -33,22 +45,27 @@ std::pair<bool, std::string> phrase_match::match_impl(std::string_view pattern)
3345
return {false, {}};
3446
}
3547

36-
const ac_result_t result =
37-
ac_match(acStructure, pattern.data(), static_cast<uint32_t>(pattern.size()));
48+
auto u32_size = static_cast<uint32_t>(pattern.size());
49+
ac_result_t result;
50+
if (!enforce_word_boundary_) {
51+
result = ac_match(acStructure, pattern.data(), u32_size);
52+
} else {
53+
result = ac_match_longest_l(acStructure, pattern.data(), u32_size);
54+
}
55+
56+
auto begin = static_cast<std::size_t>(result.match_begin);
57+
auto end = static_cast<std::size_t>(result.match_end);
3858

39-
const bool didMatch =
40-
result.match_begin >= 0 && result.match_end >= 0 && result.match_begin < result.match_end;
41-
if (!didMatch) {
59+
if (result.match_begin < 0 || result.match_end < 0 || begin >= end ||
60+
(enforce_word_boundary_ && !is_bounded_word(pattern, begin, end))) {
4261
return {false, {}};
4362
}
4463

45-
std::string matched_value;
46-
if (pattern.size() > static_cast<std::size_t>(result.match_end)) {
47-
matched_value =
48-
pattern.substr(result.match_begin, (result.match_end - result.match_begin + 1));
64+
if (pattern.size() <= end) [[unlikely]] {
65+
return {true, {}};
4966
}
5067

51-
return {true, matched_value};
68+
return {true, std::string{pattern.substr(begin, (end - begin + 1))}};
5269
}
5370

5471
} // namespace ddwaf::matcher

src/matcher/phrase_match.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ namespace ddwaf::matcher {
1515

1616
class phrase_match : public base_impl<phrase_match> {
1717
public:
18-
phrase_match(std::vector<const char *> pattern, std::vector<uint32_t> lengths);
18+
phrase_match(std::vector<const char *> pattern, std::vector<uint32_t> lengths,
19+
bool enforce_word_boundary = false);
1920
~phrase_match() override = default;
2021
phrase_match(const phrase_match &) = delete;
2122
phrase_match(phrase_match &&) noexcept = default;
@@ -29,6 +30,7 @@ class phrase_match : public base_impl<phrase_match> {
2930

3031
[[nodiscard]] std::pair<bool, std::string> match_impl(std::string_view pattern) const;
3132

33+
bool enforce_word_boundary_{false};
3234
std::unique_ptr<ac_t, void (*)(void *)> ac{nullptr, nullptr};
3335

3436
friend class base_impl<phrase_match>;

src/parser/parser_v2.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ std::pair<std::string, std::unique_ptr<matcher::base>> parse_matcher(
5050

5151
if (name == "phrase_match") {
5252
auto list = at<parameter::vector>(params, "list");
53+
options = at<parameter::map>(params, "options", options);
54+
auto word_boundary = at<bool>(options, "enforce_word_boundary", false);
5355

5456
std::vector<const char *> patterns;
5557
std::vector<uint32_t> lengths;
@@ -66,7 +68,7 @@ std::pair<std::string, std::unique_ptr<matcher::base>> parse_matcher(
6668
lengths.push_back((uint32_t)pattern.nbEntries);
6769
}
6870

69-
matcher = std::make_unique<matcher::phrase_match>(patterns, lengths);
71+
matcher = std::make_unique<matcher::phrase_match>(patterns, lengths, word_boundary);
7072
} else if (name == "match_regex") {
7173
auto regex = at<std::string>(params, "regex");
7274
options = at<parameter::map>(params, "options", options);

src/utils.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ inline bool isspace(char c)
112112
inline bool isupper(char c) { return static_cast<unsigned>(c) - 'A' < 26; }
113113
inline bool islower(char c) { return static_cast<unsigned>(c) - 'a' < 26; }
114114
inline bool isalnum(char c) { return isalpha(c) || isdigit(c); }
115+
inline bool isboundary(char c) { return !isalnum(c) && c != '_'; }
115116
inline char tolower(char c) { return isupper(c) ? static_cast<char>(c | 32) : c; }
116117
inline uint8_t from_hex(char c)
117118
{

tests/integration/matchers/test.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,4 +161,89 @@ TEST(TestIntegrationOperation, FloatEquals)
161161
ddwaf_context_destroy(context);
162162
ddwaf_destroy(handle);
163163
}
164+
165+
TEST(TestIntegrationOperation, PhraseMatch)
166+
{
167+
auto rule = read_file("phrase_match.yaml", base_dir);
168+
ASSERT_TRUE(rule.type != DDWAF_OBJ_INVALID);
169+
ddwaf_handle handle = ddwaf_init(&rule, nullptr, nullptr);
170+
ASSERT_NE(handle, nullptr);
171+
ddwaf_object_free(&rule);
172+
173+
ddwaf_context context = ddwaf_context_init(handle);
174+
ASSERT_NE(context, nullptr);
175+
176+
ddwaf_object map = DDWAF_OBJECT_MAP;
177+
ddwaf_object value;
178+
ddwaf_object_string(&value, "string00");
179+
ddwaf_object_map_add(&map, "input1", &value);
180+
181+
ddwaf_result out;
182+
ASSERT_EQ(ddwaf_run(context, &map, nullptr, &out, LONG_TIME), DDWAF_MATCH);
183+
EXPECT_FALSE(out.timeout);
184+
EXPECT_EVENTS(out, {.id = "1",
185+
.name = "rule1-phrase-match",
186+
.tags = {{"type", "flow"}, {"category", "category"}},
187+
.matches = {{.op = "phrase_match",
188+
.address = "input1",
189+
.value = "string00",
190+
.highlight = "string00"}}});
191+
192+
ddwaf_result_free(&out);
193+
ddwaf_context_destroy(context);
194+
ddwaf_destroy(handle);
195+
}
196+
197+
TEST(TestIntegrationOperation, PhraseMatchWordBound)
198+
{
199+
auto rule = read_file("phrase_match.yaml", base_dir);
200+
ASSERT_TRUE(rule.type != DDWAF_OBJ_INVALID);
201+
ddwaf_handle handle = ddwaf_init(&rule, nullptr, nullptr);
202+
ASSERT_NE(handle, nullptr);
203+
ddwaf_object_free(&rule);
204+
205+
{
206+
ddwaf_context context = ddwaf_context_init(handle);
207+
ASSERT_NE(context, nullptr);
208+
209+
ddwaf_object map = DDWAF_OBJECT_MAP;
210+
ddwaf_object value;
211+
ddwaf_object_string(&value, "string01;");
212+
ddwaf_object_map_add(&map, "input2", &value);
213+
214+
ddwaf_result out;
215+
ASSERT_EQ(ddwaf_run(context, &map, nullptr, &out, LONG_TIME), DDWAF_MATCH);
216+
EXPECT_FALSE(out.timeout);
217+
EXPECT_EVENTS(out, {.id = "2",
218+
.name = "rule2-phrase-match-word-bound",
219+
.tags = {{"type", "flow"}, {"category", "category"}},
220+
.matches = {{.op = "phrase_match",
221+
.address = "input2",
222+
.value = "string01;",
223+
.highlight = "string01"}}});
224+
225+
ddwaf_result_free(&out);
226+
ddwaf_context_destroy(context);
227+
}
228+
229+
{
230+
ddwaf_context context = ddwaf_context_init(handle);
231+
ASSERT_NE(context, nullptr);
232+
233+
ddwaf_object map = DDWAF_OBJECT_MAP;
234+
ddwaf_object value;
235+
ddwaf_object_string(&value, "string010");
236+
ddwaf_object_map_add(&map, "input2", &value);
237+
238+
ddwaf_result out;
239+
ASSERT_EQ(ddwaf_run(context, &map, nullptr, &out, LONG_TIME), DDWAF_OK);
240+
EXPECT_FALSE(out.timeout);
241+
242+
ddwaf_result_free(&out);
243+
ddwaf_context_destroy(context);
244+
}
245+
246+
ddwaf_destroy(handle);
247+
}
248+
164249
} // namespace
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
version: '2.1'
2+
rules:
3+
- id: 1
4+
name: rule1-phrase-match
5+
tags:
6+
type: flow
7+
category: category
8+
conditions:
9+
- operator: phrase_match
10+
parameters:
11+
inputs:
12+
- address: input1
13+
list:
14+
- string00
15+
- string01
16+
- id: 2
17+
name: rule2-phrase-match-word-bound
18+
tags:
19+
type: flow
20+
category: category
21+
conditions:
22+
- operator: phrase_match
23+
parameters:
24+
inputs:
25+
- address: input2
26+
list:
27+
- string00
28+
- string01
29+
options:
30+
enforce_word_boundary: true

tests/matcher/phrase_match_test.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,89 @@ TEST(TestPhraseMatch, TestComplex)
9595
run("nonsense", nullptr);
9696
}
9797

98+
TEST(TestPhraseMatch, TestWordBoundary)
99+
{
100+
std::vector<const char *> strings{"banana", "$apple", "orange$", "$pear$"};
101+
std::vector<uint32_t> lengths(strings.size());
102+
std::generate(lengths.begin(), lengths.end(),
103+
[i = 0, &strings]() mutable { return strlen(strings[i++]); });
104+
105+
phrase_match matcher(strings, lengths, true);
106+
107+
auto run = [&matcher](const char *str, const char *expect) {
108+
ddwaf_object param;
109+
ddwaf_object_string(&param, str);
110+
if (expect != nullptr) {
111+
auto [res, highlight] = matcher.match(param);
112+
EXPECT_TRUE(res);
113+
EXPECT_STREQ(highlight.c_str(), expect);
114+
} else {
115+
EXPECT_FALSE(matcher.match(param).first);
116+
}
117+
ddwaf_object_free(&param);
118+
};
119+
120+
run("banana", "banana");
121+
run(" banana", "banana");
122+
run("banana ", "banana");
123+
run("word banana word", "banana");
124+
run("word ;banana/ word", "banana");
125+
126+
run("banan", nullptr);
127+
run("abanana", nullptr);
128+
run("bananaa", nullptr);
129+
run("abananaa", nullptr);
130+
run("banana_", nullptr);
131+
run("_banana", nullptr);
132+
run("_banana_", nullptr);
133+
run(" _banana ", nullptr);
134+
run(" banana_ ", nullptr);
135+
run(" _banana_ ", nullptr);
136+
137+
run("$apple", "$apple");
138+
run("s$apple", "$apple");
139+
run(";$apple", "$apple");
140+
run(";$apple;", "$apple");
141+
run("$apple;", "$apple");
142+
run("word $apple word", "$apple");
143+
144+
run("apple", nullptr);
145+
run("$applea", nullptr);
146+
run("a$applea", nullptr);
147+
run("$apple_", nullptr);
148+
run("_$apple_", nullptr);
149+
run(" $apple_ ", nullptr);
150+
run(" _$apple_ ", nullptr);
151+
152+
run("orange$", "orange$");
153+
run("orange$s", "orange$");
154+
run(";orange$", "orange$");
155+
run(";orange$;", "orange$");
156+
run("orange$;", "orange$");
157+
run("word orange$word", "orange$");
158+
159+
run("orange", nullptr);
160+
run("aorange$", nullptr);
161+
run("aorange$a", nullptr);
162+
run("_orange$", nullptr);
163+
run("_orange$_", nullptr);
164+
run(" _orange$ ", nullptr);
165+
run(" _orange$_ ", nullptr);
166+
167+
run("$pear$", "$pear$");
168+
run("$pear$s", "$pear$");
169+
run("s$pear$", "$pear$");
170+
run("s$pear$s", "$pear$");
171+
run(";$pear$", "$pear$");
172+
run(";$pear$;", "$pear$");
173+
run("$pear$;", "$pear$");
174+
run("word$pear$word", "$pear$");
175+
run("word $pear$ word", "$pear$");
176+
177+
run("pear$", nullptr);
178+
run("$pear", nullptr);
179+
}
180+
98181
TEST(TestPhraseMatch, TestInvalidInput)
99182
{
100183
std::vector<const char *> strings{"aaaa", "bbbb", "cccc"};

tests/utils_test.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,17 @@ TEST(TestUtils, IsAlnum)
9090
}
9191
}
9292

93+
TEST(TestUtils, IsBoundary)
94+
{
95+
for (char c = char_min; c < char_max; ++c) {
96+
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
97+
c == '_') {
98+
EXPECT_FALSE(isboundary(c));
99+
} else {
100+
EXPECT_TRUE(isboundary(c));
101+
}
102+
}
103+
}
93104
TEST(TestUtils, ToLower)
94105
{
95106
std::unordered_map<char, char> mapping{{'A', 'a'}, {'B', 'b'}, {'C', 'c'}, {'D', 'd'},

0 commit comments

Comments
 (0)