Skip to content

Commit 6b6d886

Browse files
authored
Merge pull request #21288 from microsoft/azure_python_sanitizer_upstream2
Azure python sanitizer upstream2
2 parents df35f9f + a1eaf42 commit 6b6d886

11 files changed

+921
-412
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added new full SSRF sanitization barrier from the new AntiSSRF library.

python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,36 @@ module ServerSideRequestForgery {
176176
strNode = [call.getArg(0), call.getArgByName("string")]
177177
)
178178
}
179+
180+
/** A validation of a URI using the `AntiSSRF` library, considered as a full-ssrf sanitizer. */
181+
private class UriValidator extends FullUrlControlSanitizer {
182+
UriValidator() { this = DataFlow::BarrierGuard<uri_validator/3>::getABarrierNode() }
183+
}
184+
185+
import semmle.python.dataflow.new.internal.DataFlowPublic
186+
187+
private predicate uri_validator(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
188+
exists(DataFlow::CallCfgNode call, string funcs |
189+
funcs in ["in_domain", "in_azure_keyvault_domain", "in_azure_storage_domain"] and
190+
call = API::moduleImport("AntiSSRF").getMember("URIValidator").getMember(funcs).getACall() and
191+
call.getArg(0).asCfgNode() = node
192+
|
193+
// validator call directly (e.g., if URIValidator.in_domain(...) )
194+
g = call.asCfgNode() and
195+
branch = true
196+
or
197+
// validator used in a comparison
198+
exists(Cmpop op, Node n, ControlFlowNode l |
199+
n.getALocalSource() = call and g.(CompareNode).operands(n.asCfgNode(), op, l)
200+
|
201+
// validator == true or validator == false or validator is True or validator is False
202+
(op instanceof Eq or op instanceof Is) and
203+
branch = l.getNode().(BooleanLiteral).booleanValue()
204+
or
205+
// validator != false or validator != true or validator is not True or validator is not False
206+
(op instanceof NotEq or op instanceof IsNot) and
207+
branch = l.getNode().(BooleanLiteral).booleanValue().booleanNot()
208+
)
209+
)
210+
}
179211
}

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/FullServerSideRequestForgery.expected

Lines changed: 237 additions & 130 deletions
Large diffs are not rendered by default.
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
Security/CWE-918/FullServerSideRequestForgery.ql
1+
query: Security/CWE-918/FullServerSideRequestForgery.ql
2+
postprocess:
3+
- utils/test/InlineExpectationsTestQuery.ql
4+
- utils/test/PrettyPrintModels.ql

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/PartialServerSideRequestForgery.expected

Lines changed: 384 additions & 215 deletions
Large diffs are not rendered by default.
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
Security/CWE-918/PartialServerSideRequestForgery.ql
1+
query: Security/CWE-918/PartialServerSideRequestForgery.ql
2+
postprocess:
3+
- utils/test/InlineExpectationsTestQuery.ql
4+
- utils/test/PrettyPrintModels.ql

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from flask import request
1+
from flask import request # $ Source
22

33
import requests
44
import re
@@ -7,20 +7,24 @@ def full_ssrf():
77
user_input = request.args['untrusted_input']
88
query_val = request.args['query_val']
99

10-
requests.get(user_input) # NOT OK -- user has full control
10+
# NOT OK -- user has full control
11+
requests.get(user_input) # $ Alert[py/full-ssrf]
1112

1213
url = "https://" + user_input
13-
requests.get(url) # NOT OK -- user has full control
14+
# NOT OK -- user has full control
15+
requests.get(url) # $ Alert[py/full-ssrf]
1416

1517
# although the path `/foo` is added here, this can be circumvented such that the
1618
# final URL is `https://evil.com/#/foo" -- since the fragment (#) is not sent to the
1719
# server.
1820
url = "https://" + user_input + "/foo"
19-
requests.get(url) # NOT OK -- user has full control
21+
# NOT OK -- user has full control
22+
requests.get(url) # $ Alert[py/full-ssrf]
2023

2124
# this might seem like a dummy test, but it serves to check how our sanitizers work.
2225
url = "https://" + user_input + "/foo?key=" + query_val
23-
requests.get(url) # NOT OK -- user has full control
26+
# NOT OK -- user has full control
27+
requests.get(url) # $ Alert[py/full-ssrf]
2428

2529
# taint-steps are added as `fromNode -> toNode`, but when adding a sanitizer it's
2630
# currently only possible to so on either `fromNode` or `toNode` (either all edges in
@@ -39,72 +43,87 @@ def full_ssrf_format():
3943

4044
# using .format
4145
url = "https://{}".format(user_input)
42-
requests.get(url) # NOT OK -- user has full control
46+
# NOT OK -- user has full control
47+
requests.get(url) # $ Alert[py/full-ssrf]
4348

4449
url = "https://{}/foo".format(user_input)
45-
requests.get(url) # NOT OK -- user has full control
50+
# NOT OK -- user has full control
51+
requests.get(url) # $ Alert[py/full-ssrf]
4652

4753
url = "https://{}/foo?key={}".format(user_input, query_val)
48-
requests.get(url) # NOT OK -- user has full control
54+
# NOT OK -- user has full control
55+
requests.get(url) # $ Alert[py/full-ssrf]
4956

5057
url = "https://{x}".format(x=user_input)
51-
requests.get(url) # NOT OK -- user has full control
58+
# NOT OK -- user has full control
59+
requests.get(url) # $ Alert[py/full-ssrf]
5260

5361
url = "https://{1}".format(0, user_input)
54-
requests.get(url) # NOT OK -- user has full control
62+
# NOT OK -- user has full control
63+
requests.get(url) # $ Alert[py/full-ssrf]
5564

5665
def full_ssrf_percent_format():
5766
user_input = request.args['untrusted_input']
5867
query_val = request.args['query_val']
5968

6069
# using %-formatting
6170
url = "https://%s" % user_input
62-
requests.get(url) # NOT OK -- user has full control
71+
# NOT OK -- user has full control
72+
requests.get(url) # $ Alert[py/full-ssrf]
6373

6474
url = "https://%s/foo" % user_input
65-
requests.get(url) # NOT OK -- user has full control
75+
# NOT OK -- user has full control
76+
requests.get(url) # $ Alert[py/full-ssrf]
6677

6778
url = "https://%s/foo/key=%s" % (user_input, query_val)
68-
requests.get(url) # NOT OK -- user has full control
79+
# NOT OK -- user has full and partial control
80+
requests.get(url) # $ Alert[py/partial-ssrf] $ MISSING: Alert[py/full-ssrf]
6981

7082
def full_ssrf_f_strings():
7183
user_input = request.args['untrusted_input']
7284
query_val = request.args['query_val']
7385

7486
# using f-strings
7587
url = f"https://{user_input}"
76-
requests.get(url) # NOT OK -- user has full control
88+
# NOT OK -- user has full control
89+
requests.get(url) # $ Alert[py/full-ssrf]
7790

7891
url = f"https://{user_input}/foo"
79-
requests.get(url) # NOT OK -- user has full control
92+
# NOT OK -- user has full control
93+
requests.get(url) # $ Alert[py/full-ssrf]
8094

8195
url = f"https://{user_input}/foo?key={query_val}"
82-
requests.get(url) # NOT OK -- user has full control
96+
# NOT OK -- user has full control
97+
requests.get(url) # $ Alert[py/full-ssrf]
8398

8499

85100
def partial_ssrf_1():
86101
user_input = request.args['untrusted_input']
87102

88103
url = "https://example.com/foo?" + user_input
89-
requests.get(url) # NOT OK -- user controls query parameters
104+
# NOT OK -- user controls query parameters
105+
requests.get(url) # $ Alert[py/partial-ssrf]
90106

91107
def partial_ssrf_2():
92108
user_input = request.args['untrusted_input']
93109

94110
url = "https://example.com/" + user_input
95-
requests.get(url) # NOT OK -- user controls path
111+
# NOT OK -- user controls path
112+
requests.get(url) # $ Alert[py/partial-ssrf]
96113

97114
def partial_ssrf_3():
98115
user_input = request.args['untrusted_input']
99116

100117
url = "https://example.com/" + user_input
101-
requests.get(url) # NOT OK -- user controls path
118+
# NOT OK -- user controls path
119+
requests.get(url) # $ Alert[py/partial-ssrf]
102120

103121
def partial_ssrf_4():
104122
user_input = request.args['untrusted_input']
105123

106124
url = "https://example.com/foo#{}".format(user_input)
107-
requests.get(url) # NOT OK -- user contollred fragment
125+
# NOT OK -- user controlled fragment
126+
requests.get(url) # $ Alert[py/partial-ssrf]
108127

109128
def partial_ssrf_5():
110129
user_input = request.args['untrusted_input']
@@ -113,20 +132,22 @@ def partial_ssrf_5():
113132
# controlled
114133

115134
url = "https://example.com/foo#%s" % user_input
116-
requests.get(url) # NOT OK -- user contollred fragment
135+
# NOT OK -- user controlled fragment
136+
requests.get(url) # $ Alert[py/partial-ssrf]
117137

118138
def partial_ssrf_6():
119139
user_input = request.args['untrusted_input']
120140

121141
url = f"https://example.com/foo#{user_input}"
122-
requests.get(url) # NOT OK -- user only controlled fragment
142+
# NOT OK -- user only controlled fragment
143+
requests.get(url) # $ Alert[py/partial-ssrf]
123144

124145
def partial_ssrf_7():
125146
user_input = request.args['untrusted_input']
126147

127148
if user_input.isalnum():
128149
url = f"https://example.com/foo#{user_input}"
129-
requests.get(url) # OK - user input can only contain alphanumerical characters
150+
requests.get(url) # OK - user input can only contain alphanumerical characters
130151

131152
if user_input.isalpha():
132153
url = f"https://example.com/foo#{user_input}"
@@ -154,7 +175,8 @@ def partial_ssrf_7():
154175

155176
if re.fullmatch(r'.*[a-zA-Z0-9]+.*', user_input):
156177
url = f"https://example.com/foo#{user_input}"
157-
requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary characters
178+
# NOT OK, but NOT FOUND - user input can contain arbitrary characters
179+
requests.get(url) # $ MISSING: Alert[py/partial-ssrf]
158180

159181

160182
if re.match(r'^[a-zA-Z0-9]+$', user_input):
@@ -163,7 +185,8 @@ def partial_ssrf_7():
163185

164186
if re.match(r'[a-zA-Z0-9]+', user_input):
165187
url = f"https://example.com/foo#{user_input}"
166-
requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
188+
# NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
189+
requests.get(url) # $ MISSING: Alert[py/partial-ssrf]
167190

168191
reg = re.compile(r'^[a-zA-Z0-9]+$')
169192

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/test_azure_client.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from azure.keyvault.keys import KeyClient
44
from azure.storage.blob import ContainerClient
55
from azure.storage.blob import download_blob_from_url
6-
7-
from flask import request
6+
from flask import request # $ Source
87

98
def azure_sdk_test(credential, output_path):
109
user_input = request.args['untrusted_input']
@@ -13,24 +12,14 @@ def azure_sdk_test(credential, output_path):
1312
url = f"https://example.com/foo#{user_input}"
1413
full_url = f"https://{user_input2}"
1514
# Testing Azure sink
16-
c = SecretClient(vault_url=url, credential=credential)# NOT OK -- user only controlled fragment
17-
c = SecretClient(vault_url=full_url, credential=credential) # NOT OK -- user has full control
18-
c = ShareFileClient.from_file_url(url) # NOT OK -- user only controlled fragment
19-
c = ShareFileClient.from_file_url(full_url) # NOT OK -- user has full control
20-
c = KeyClient(url, credential)# NOT OK -- user only controlled fragment
21-
c = KeyClient(full_url, credential) # NOT OK -- user has full control
22-
c = ContainerClient.from_container_url(container_url=url, credential=credential) # NOT OK -- user only controlled fragment
23-
c = ContainerClient.from_container_url(container_url=full_url, credential=credential) # NOT OK -- user has full control
15+
SecretClient(vault_url=url, credential=credential) # $ Alert[py/partial-ssrf]
16+
SecretClient(vault_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
17+
ShareFileClient.from_file_url(url) # $ Alert[py/partial-ssrf]
18+
ShareFileClient.from_file_url(full_url) # $ Alert[py/full-ssrf]
19+
KeyClient(url, credential) # $ Alert[py/partial-ssrf]
20+
KeyClient(full_url, credential) # $ Alert[py/full-ssrf]
21+
ContainerClient.from_container_url(container_url=url, credential=credential) # $ Alert[py/partial-ssrf]
22+
ContainerClient.from_container_url(container_url=full_url, credential=credential) # $ Alert[py/full-ssrf]
2423

25-
download_blob_from_url(
26-
blob_url=url, # NOT OK -- user only controlled fragment
27-
output=output_path,
28-
credential=credential,
29-
overwrite=True
30-
)
31-
download_blob_from_url(
32-
blob_url=full_url, # NOT OK -- user has full control
33-
output=output_path,
34-
credential=credential,
35-
overwrite=True
36-
)
24+
download_blob_from_url(blob_url=url, output=output_path, credential=credential, overwrite=True ) # $ Alert[py/partial-ssrf]
25+
download_blob_from_url(blob_url=full_url, output=output_path, credential=credential, overwrite=True) # $ Alert[py/full-ssrf]
Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from flask import Flask, request
2-
1+
from flask import request # $ Source
2+
from flask import Flask
33
from http.client import HTTPConnection
44

55
app = Flask(__name__)
@@ -10,28 +10,35 @@ def ssrf_test():
1010
unsafe_path = request.args["path"]
1111
user_input = request.args['untrusted_input']
1212

13-
conn = HTTPConnection(unsafe_host)
14-
conn.request("GET", unsafe_path) # NOT OK -- user has full control
13+
conn = HTTPConnection(unsafe_host) # $ Sink[py/full-ssrf]
14+
# NOT OK -- user has full control
15+
conn.request("GET", unsafe_path) # $ Alert[py/full-ssrf]
1516

16-
# Full SSRF variant, where there is ALSO made a request with fixed URL on the same
17+
# Full SSRF variant, where there is also a request with fixed URL on the same
1718
# connection later on. This should not change anything on the overall SSRF alerts.
18-
conn = HTTPConnection(unsafe_host)
19-
conn.request("GET", unsafe_path) # NOT OK -- user has full control
19+
conn = HTTPConnection(unsafe_host) # $ Sink
20+
# NOT OK -- user has full control
21+
conn.request("GET", unsafe_path) # $ Alert[py/full-ssrf]
2022

2123
# partial SSRF on SAME connection
22-
conn.request("GET", "/foo") # NOT OK -- user has control of host
24+
# NOT OK -- user has control of host
25+
conn.request("GET", "/foo") # $ Alert[py/partial-ssrf]
2326

2427
# the rest are partial SSRF
25-
conn = HTTPConnection(unsafe_host)
26-
conn.request("GET", "/foo") # NOT OK -- user controlled domain
28+
conn = HTTPConnection(unsafe_host) # $ Sink[py/partial-ssrf]
29+
# NOT OK -- user controlled domain
30+
conn.request("GET", "/foo") # $ Alert[py/partial-ssrf]
2731

2832
conn = HTTPConnection("example.com")
29-
conn.request("GET", unsafe_path) # NOT OK -- user controlled path
33+
# NOT OK -- user controlled path
34+
conn.request("GET", unsafe_path) # $ Alert[py/partial-ssrf]
3035

3136
path = "foo?" + user_input
3237
conn = HTTPConnection("example.com")
33-
conn.request("GET", path) # NOT OK -- user controlled query parameters
38+
# NOT OK -- user controlled query parameters
39+
conn.request("GET", path) # $ Alert[py/partial-ssrf]
3440

3541
path = "foo#" + user_input
3642
conn = HTTPConnection("example.com")
37-
conn.request("GET", path) # NOT OK -- user controlled fragment
43+
# NOT OK -- user controlled fragment
44+
conn.request("GET", path) # $ Alert[py/partial-ssrf]

0 commit comments

Comments
 (0)