Skip to content

Commit e6a675a

Browse files
refactor(trufflehog): simplify exclusions to a single regex file
Replace the multi-file build pipeline (prefixes.txt, build_exclude_file.py, update-excludes.sh, global-exclude.txt) and inline Python helper with a single exclude-paths.txt containing Go regexes passed directly to `trufflehog --exclude-paths`. Add a pattern, merge to main, done. Made-with: Cursor
1 parent f97f3e9 commit e6a675a

File tree

6 files changed

+46
-295
lines changed

6 files changed

+46
-295
lines changed

.github/workflows/reusable-trufflehog.yml

Lines changed: 15 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -56,61 +56,20 @@ jobs:
5656
run: |
5757
DEST=/tmp/trufflehog-exclude.txt
5858
REPO=grafana/security-github-actions
59-
REF=main
60-
# Source of truth: trufflehog/prefixes.txt (+ static rules in build_exclude_file.py). CI builds the exclude file at runtime.
61-
RAW_BASE="https://raw.githubusercontent.com/grafana/security-github-actions/${REF}/trufflehog"
62-
RAW_URL="${RAW_BASE}/global-exclude.txt"
63-
RAW_BUILD="${RAW_BASE}/build_exclude_file.py"
64-
RAW_PREFIXES="${RAW_BASE}/prefixes.txt"
65-
if gh api "repos/${REPO}/contents/trufflehog/build_exclude_file.py?ref=${REF}" \
66-
-H "Accept: application/vnd.github.v3.raw" -o /tmp/trufflehog_build_exclude_file.py 2>/dev/null \
67-
&& gh api "repos/${REPO}/contents/trufflehog/prefixes.txt?ref=${REF}" \
68-
-H "Accept: application/vnd.github.v3.raw" -o /tmp/trufflehog_prefixes.txt 2>/dev/null \
69-
&& python3 /tmp/trufflehog_build_exclude_file.py /tmp/trufflehog_prefixes.txt > "${DEST}" 2>/dev/null \
70-
&& [[ -s "${DEST}" ]]; then
71-
echo "Built exclude file from prefixes.txt + build_exclude_file.py (${REPO}@${REF}, GitHub API)"
72-
elif curl -fsSL "${RAW_BUILD}" -o /tmp/trufflehog_build_exclude_file.py 2>/dev/null \
73-
&& curl -fsSL "${RAW_PREFIXES}" -o /tmp/trufflehog_prefixes.txt 2>/dev/null \
74-
&& python3 /tmp/trufflehog_build_exclude_file.py /tmp/trufflehog_prefixes.txt > "${DEST}" 2>/dev/null \
75-
&& [[ -s "${DEST}" ]]; then
76-
echo "Built exclude file from prefixes.txt + build_exclude_file.py (raw GitHub)"
77-
elif gh api "repos/${REPO}/contents/trufflehog/global-exclude.txt?ref=${REF}" \
59+
FILE_PATH=trufflehog/exclude-paths.txt
60+
61+
if gh api "repos/${REPO}/contents/${FILE_PATH}?ref=main" \
7862
-H "Accept: application/vnd.github.v3.raw" -o "${DEST}" 2>/dev/null && [[ -s "${DEST}" ]]; then
79-
echo "Loaded pre-built global-exclude.txt from ${REPO}@${REF} (GitHub API)"
80-
elif curl -fsSL "${RAW_URL}" -o "${DEST}" 2>/dev/null && [[ -s "${DEST}" ]]; then
81-
echo "Loaded pre-built global-exclude.txt from raw.githubusercontent.com (${REF})"
63+
echo "Loaded exclude patterns from ${REPO}@main (GitHub API)"
64+
elif curl -fsSL "https://raw.githubusercontent.com/${REPO}/main/${FILE_PATH}" \
65+
-o "${DEST}" 2>/dev/null && [[ -s "${DEST}" ]]; then
66+
echo "Loaded exclude patterns from raw.githubusercontent.com"
8267
else
83-
echo "::warning::Could not fetch or rebuild TruffleHog excludes from ${REPO}@${REF}. Using last-resort bundled file — merge to main or fix token access."
84-
# Last resort only: must match stdout of python3 trufflehog/build_exclude_file.py (same commit).
85-
cat > "${DEST}" <<'EOF'
86-
# Built from prefixes.txt + this script (CI does this on every run).
87-
# Optional local copy: ./trufflehog/update-excludes.sh > trufflehog/global-exclude.txt
88-
#
89-
# --- directory prefixes (from prefixes.txt) ---
90-
# prefix: content/grafana/dashboards
91-
(^|\./|[/\\])content/grafana/dashboards([/\\]|$)
92-
93-
# --- static path patterns ---
94-
# Lock files and checksums (contain hashes, not secrets)
95-
path:go\.sum$
96-
path:go\.mod$
97-
98-
# Dependency manifests (contain URLs that trigger false positives)
99-
path:package\.json$
100-
path:package-lock\.json$
101-
path:pnpm-lock\.yaml$
102-
path:yarn\.lock$
103-
path:poetry\.lock$
104-
path:Pipfile\.lock$
105-
path:uv\.lock$
106-
path:Cargo\.lock$
107-
path:Gemfile\.lock$
108-
109-
# Grafana plugin metadata
110-
path:grafana\.json$
111-
EOF
68+
echo "::warning::Could not fetch TruffleHog exclude patterns from ${REPO}. Scanning without exclusions."
69+
touch "${DEST}"
11270
fi
113-
echo "--- effective exclude file ---"
71+
72+
echo "--- effective exclude patterns ---"
11473
cat "${DEST}"
11574
11675
- name: Install TruffleHog
@@ -136,147 +95,17 @@ jobs:
13695
set +e
13796
echo "[]" > results.json
13897
139-
# Classify paths vs /tmp/trufflehog-exclude.txt (Python re ~ TruffleHog Go regexp for our patterns).
140-
cat > /tmp/trufflehog_exclude_helpers.py <<'ENDHELPER'
141-
import re
142-
import subprocess
143-
import sys
144-
from pathlib import Path
145-
146-
MANIFEST = frozenset({
147-
"go.sum", "go.mod", "package.json", "package-lock.json", "pnpm-lock.yaml",
148-
"yarn.lock", "poetry.lock", "Pipfile.lock", "uv.lock", "Cargo.lock",
149-
"Gemfile.lock", "grafana.json",
150-
})
151-
152-
_PATS = None
153-
154-
def load_patterns():
155-
global _PATS
156-
if _PATS is not None:
157-
return _PATS
158-
pats = []
159-
p = Path("/tmp/trufflehog-exclude.txt")
160-
if not p.is_file():
161-
_PATS = pats
162-
return pats
163-
for line in p.read_text().splitlines():
164-
line = line.strip()
165-
if not line or line.startswith("#"):
166-
continue
167-
try:
168-
pats.append(re.compile(line))
169-
except re.error:
170-
print(f"::warning::Invalid regex in exclude file (ignored for skip log): {line!r}", file=sys.stderr)
171-
_PATS = pats
172-
return pats
173-
174-
def vendor_skip(path: str) -> bool:
175-
return (
176-
path.startswith("vendor/")
177-
or path.startswith("./vendor/")
178-
or "/vendor/" in path
179-
)
180-
181-
def manifest_skip(path: str) -> bool:
182-
return Path(path).name in MANIFEST
183-
184-
def exclude_match(path: str):
185-
for r in load_patterns():
186-
if r.search(path):
187-
return r.pattern
188-
return None
189-
190-
def main():
191-
cmd = sys.argv[1]
192-
if cmd == "match":
193-
path = sys.argv[2]
194-
m = exclude_match(path)
195-
if m:
196-
print(m)
197-
sys.exit(0)
198-
sys.exit(1)
199-
if cmd == "report-pr":
200-
cfp = Path("changed-files.txt")
201-
lines = [ln.strip() for ln in cfp.read_text().splitlines() if ln.strip()] if cfp.is_file() else []
202-
sk_v, sk_m, sk_e, scan = [], [], [], []
203-
for f in lines:
204-
if vendor_skip(f):
205-
sk_v.append(f)
206-
elif manifest_skip(f):
207-
sk_m.append(f)
208-
else:
209-
pat = exclude_match(f)
210-
if pat:
211-
sk_e.append((f, pat))
212-
else:
213-
scan.append(f)
214-
def block(title, items, detail=False):
215-
print(f"{title} ({len(items)}):")
216-
for x in items[:500]:
217-
if detail:
218-
print(f" - {x[0]} (regex: {x[1]})")
219-
else:
220-
print(f" - {x}")
221-
if len(items) > 500:
222-
print(f" ... and {len(items) - 500} more")
223-
224-
print("")
225-
print("=== TruffleHog exclusion report (PR changed files) ===")
226-
block("Skipped — Go vendor/", sk_v)
227-
block("Skipped — manifest / lock basename", sk_m)
228-
block("Skipped — exclude file regex", sk_e, detail=True)
229-
block("Will run TruffleHog on", scan)
230-
print("=== End exclusion report ===")
231-
print("")
232-
return
233-
if cmd == "report-push":
234-
r = subprocess.run(["git", "ls-files"], capture_output=True, text=True, check=False)
235-
paths = [ln.strip() for ln in r.stdout.splitlines() if ln.strip()]
236-
matched = [(p, exclude_match(p)) for p in paths if exclude_match(p)]
237-
print("")
238-
print(f"=== Sample: tracked paths matching exclude regexes ({len(matched)} total, show first 200) ===")
239-
for p, pat in matched[:200]:
240-
print(f" - {p} (regex: {pat})")
241-
if len(matched) > 200:
242-
print(f" ... and {len(matched) - 200} more")
243-
print("(Untracked files can also match; TruffleHog still applies excludes on full scan.)")
244-
print("=== End exclude sample ===")
245-
print("")
246-
return
247-
print("unknown command", cmd, file=sys.stderr)
248-
sys.exit(1)
249-
250-
if __name__ == "__main__":
251-
main()
252-
ENDHELPER
98+
# Extract non-comment, non-blank patterns for the shell pre-filter.
99+
grep -vE '^\s*#|^\s*$' /tmp/trufflehog-exclude.txt > /tmp/exclude-regexes.txt 2>/dev/null || true
253100
254101
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
255-
# PR: Scan only changed files (using two-dot diff with explicit base SHA)
256102
echo "Scanning changed files in PR..."
257103
git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > changed-files.txt
258104
259105
if [[ -s changed-files.txt ]]; then
260-
python3 /tmp/trufflehog_exclude_helpers.py report-pr
261106
while IFS= read -r file; do
262-
if [[ "$file" == vendor/* || "$file" == */vendor/* || "$file" == ./vendor/* ]]; then
263-
echo "Skipping: ${file} (Go vendor directory)"
264-
continue
265-
fi
266-
267-
# Get just the filename
268-
filename=$(basename "$file")
269-
270-
# Skip excluded files (use case statement for cleaner matching)
271-
case "$filename" in
272-
go.sum|go.mod|package.json|package-lock.json|pnpm-lock.yaml|yarn.lock|poetry.lock|Pipfile.lock|uv.lock|Cargo.lock|Gemfile.lock|grafana.json)
273-
echo "Skipping: ${file} (excluded manifest/lock file)"
274-
continue
275-
;;
276-
esac
277-
278-
if pat=$(python3 /tmp/trufflehog_exclude_helpers.py match "$file" 2>/dev/null); then
279-
echo "Skipping: ${file} (exclude file regex: ${pat})"
107+
if [[ -s /tmp/exclude-regexes.txt ]] && echo "$file" | grep -qEf /tmp/exclude-regexes.txt 2>/dev/null; then
108+
echo "Skipping: ${file} (matches exclude pattern)"
280109
continue
281110
fi
282111
@@ -289,9 +118,7 @@ jobs:
289118
echo "No files changed"
290119
fi
291120
else
292-
# Push to main: Scan current filesystem
293121
echo "Scanning current filesystem..."
294-
python3 /tmp/trufflehog_exclude_helpers.py report-push
295122
trufflehog filesystem . --exclude-paths /tmp/trufflehog-exclude.txt --concurrency 16 --json --no-update --results=verified,unverified > results.ndjson || true
296123
fi
297124

trufflehog/build_exclude_file.py

Lines changed: 0 additions & 67 deletions
This file was deleted.

trufflehog/exclude-paths.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# TruffleHog path exclusions — one Go regex per line.
2+
# Edit this file and merge to main. CI fetches it at runtime and passes
3+
# it directly to `trufflehog --exclude-paths`.
4+
#
5+
# Syntax: Go regexp (https://pkg.go.dev/regexp/syntax).
6+
# A file is excluded when ANY pattern matches its path.
7+
# Lines starting with # and blank lines are ignored.
8+
9+
# Go vendor directory (third-party code, not repo secrets)
10+
vendor/
11+
12+
# Lock files and checksums (contain hashes, not secrets)
13+
go\.sum$
14+
go\.mod$
15+
16+
# Dependency manifests (contain URLs / hashes that trigger false positives)
17+
package\.json$
18+
package-lock\.json$
19+
pnpm-lock\.yaml$
20+
yarn\.lock$
21+
poetry\.lock$
22+
Pipfile\.lock$
23+
uv\.lock$
24+
Cargo\.lock$
25+
Gemfile\.lock$
26+
27+
# Grafana plugin metadata
28+
grafana\.json$
29+
30+
# Grafana dashboards (user-supplied site content, full of base64/hashes)
31+
content/grafana/dashboards

trufflehog/global-exclude.txt

Lines changed: 0 additions & 25 deletions
This file was deleted.

trufflehog/prefixes.txt

Lines changed: 0 additions & 9 deletions
This file was deleted.

trufflehog/update-excludes.sh

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)