Skip to content

Commit 09b01b9

Browse files
authored
Merge pull request #1348 from DDMAL/weekly-job
ci: convert daily e2e test to weekly
2 parents e05e94c + 85bbcde commit 09b01b9

File tree

2 files changed

+92
-99
lines changed

2 files changed

+92
-99
lines changed
Lines changed: 91 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
name: Check for Broken Links
2-
on: [push, pull_request]
2+
on:
3+
pull_request:
4+
push:
5+
branches:
6+
- main
7+
- master
8+
- develop
9+
310
jobs:
411
build_and_check:
512
runs-on: ubuntu-latest
@@ -33,115 +40,101 @@ jobs:
3340
TEMPORARY_WEBSITE_URL: 'http://127.0.0.1:8080'
3441
ACTUAL_WEBSITE_URL: 'https://ddmal.ca/Neon/'
3542
run: |
36-
# Function to retry URLs with retryable errors
37-
retry_urls() {
38-
local urls="$1"
39-
while IFS= read -r url; do
40-
[ -z "$url" ] && continue
41-
echo "🔄 Retrying: $url"
42-
43-
for attempt in 1 2 3; do
44-
echo " Attempt $attempt/3..."
45-
http_code=$(curl -L -s -o /dev/null -w "%{http_code}" \
46-
-H "User-Agent: Mozilla/5.0 (compatible; BrokenLinkChecker)" \
47-
--connect-timeout 30 --max-time 60 "$url" 2>/dev/null)
48-
49-
if echo "$http_code" | grep -E "^(200|301|302|303)$" > /dev/null; then
50-
echo " ✅ Success! HTTP $http_code"
51-
echo "RETRY_SUCCESS:$url" >> /tmp/retry_results
52-
break
53-
elif [ $attempt -eq 3 ]; then
54-
echo " ❌ Failed after 3 attempts (HTTP $http_code)"
55-
echo "RETRY_FAILED:$url" >> /tmp/retry_results
56-
else
57-
echo " ⏳ Failed with HTTP $http_code, retrying in 5 seconds..."
58-
sleep 5
59-
fi
60-
done
61-
echo ""
62-
done <<< "$urls"
63-
}
64-
65-
# Initialize retry results file
66-
> /tmp/retry_results
67-
68-
# Run broken link checker and filter output
69-
echo "Running broken link check..."
70-
output=$(blc $TEMPORARY_WEBSITE_URL --filter-level=3 | \
71-
grep -v -E '├───OK───|└───OK───' | \
72-
awk '
73-
BEGIN { buf="" }
74-
/^Getting links from:/ { buf=$0; next }
75-
/^Finished!.*0 broken\./ {
76-
if (length(buf)>0) { buf=""; next }
77-
}
78-
{
79-
if(length(buf)>0) print buf
80-
if (NF > 0) print
81-
buf=""
82-
}
83-
/^Finished!/ { print "" }
84-
' | sed "s|$TEMPORARY_WEBSITE_URL|$ACTUAL_WEBSITE_URL|g")
85-
86-
echo "Initial link check results:"
87-
echo "$output"
88-
89-
# Handle retryable errors
90-
retryable_urls=$(echo "$output" | grep -E "(BLC_UNKNOWN|HTTP_429)" | \
91-
sed -n 's/.*├─BROKEN─ \(https\?:\/\/[^[:space:]]*\).*/\1/p')
92-
93-
if [ -n "$retryable_urls" ]; then
94-
echo ""
95-
echo "🔄 Found URLs with retryable errors, starting retry process..."
96-
retry_urls "$retryable_urls"
43+
echo "Running broken link check with rate limiting..."
44+
45+
# Run blc with CLI options to avoid rate limiting
46+
# --filter-level 3: Check all link types including metadata
47+
# --ordered: Check links sequentially (helps avoid rate limiting)
48+
# --get: Use GET requests instead of HEAD (more compatible)
49+
# --user-agent: Use realistic browser user agent
50+
# --host-requests 1: Limit to 1 concurrent request per host (key for avoiding 429)
51+
set +e # Don't exit on blc failure, we'll handle it
52+
blc $TEMPORARY_WEBSITE_URL \
53+
--filter-level 3 \
54+
--ordered \
55+
--get \
56+
--user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" \
57+
--host-requests 1 \
58+
--recursive \
59+
--verbose \
60+
> /tmp/blc_output.txt 2>&1
61+
blc_exit_code=$?
62+
set -e
63+
64+
# Display the output
65+
cat /tmp/blc_output.txt
66+
67+
# Get all broken links
68+
all_broken_links=$(grep -E "├─BROKEN─" /tmp/blc_output.txt || true)
9769
98-
# Show retry summary
99-
success_count=$(grep -c "^RETRY_SUCCESS:" /tmp/retry_results 2>/dev/null || echo "0")
100-
failed_count=$(grep -c "^RETRY_FAILED:" /tmp/retry_results 2>/dev/null || echo "0")
101-
echo "📊 Retry Summary: $success_count succeeded, $failed_count failed"
70+
echo ""
71+
echo "=== Broken Links Found by blc ==="
72+
if [ -n "$all_broken_links" ]; then
73+
echo "$all_broken_links"
74+
else
75+
echo "None"
10276
fi
10377
104-
# Determine final status
105-
has_errors=false
78+
# Function to verify links with curl
79+
verify_with_curl() {
80+
local url="$1"
81+
echo " 🔄 Verifying: $url"
82+
83+
# Use temp file instead of /dev/null to avoid truncation errors on retry
84+
temp_body=$(mktemp)
85+
86+
http_code=$(curl -L -s -o "$temp_body" -w "%{http_code}" \
87+
-H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" \
88+
--connect-timeout 30 --max-time 60 --insecure \
89+
--retry 3 --retry-delay 5 --retry-all-errors \
90+
"$url" 2>/dev/null || echo "000")
91+
92+
rm -f "$temp_body"
93+
94+
if echo "$http_code" | grep -E "^(200|301|302|303)$" > /dev/null; then
95+
echo " ✅ Success: HTTP $http_code"
96+
return 0
97+
elif [ "$http_code" = "429" ]; then
98+
echo " ⚠️ HTTP 429 (rate limited)"
99+
rate_limited_urls="${rate_limited_urls}${url}\n"
100+
return 0
101+
else
102+
echo " ❌ Failed: HTTP $http_code"
103+
failed_http_code="$http_code"
104+
return 1
105+
fi
106+
}
106107
107-
# Check for 4xx errors not resolved by retries
108-
if echo "$output" | grep -Eq 'HTTP_4[0-9]{2}'; then
109-
successful_urls=$(grep "^RETRY_SUCCESS:" /tmp/retry_results 2>/dev/null | cut -d: -f2- || echo "")
108+
# Verify all broken links with curl
109+
verified_failures=""
110+
rate_limited_urls=""
110111
111-
unresolved_4xx=$(echo "$output" | grep 'HTTP_4[0-9]{2}' | while read -r line; do
112-
url=$(echo "$line" | sed -n 's/.*├─BROKEN─ \(https\?:\/\/[^[:space:]]*\).*/\1/p')
113-
if [ -n "$url" ] && ! echo "$successful_urls" | grep -Fxq "$url"; then
114-
echo "$line"
115-
fi
116-
done)
112+
if [ -n "$all_broken_links" ]; then
113+
echo ""
114+
echo "=== Verifying Links with curl ==="
117115
118-
if [ -n "$unresolved_4xx" ]; then
119-
echo ""
120-
echo "❌ Unresolved HTTP 4xx errors:"
121-
echo "$unresolved_4xx"
122-
has_errors=true
123-
fi
116+
# Extract URLs and verify them
117+
urls_to_verify=$(echo "$all_broken_links" | sed -n 's/.*├─BROKEN─ \(https\?:\/\/[^[:space:]]*\).*/\1/p')
124118
125-
# Check for failed retries
126-
if grep -q "^RETRY_FAILED:" /tmp/retry_results 2>/dev/null; then
127-
echo ""
128-
echo "❌ URLs that failed after retries:"
129-
grep "^RETRY_FAILED:" /tmp/retry_results | cut -d: -f2-
130-
has_errors=true
131-
fi
119+
while IFS= read -r url; do
120+
[ -z "$url" ] && continue
121+
if ! verify_with_curl "$url"; then
122+
verified_failures="${verified_failures}${url} (HTTP ${failed_http_code})\n"
123+
fi
124+
done <<< "$urls_to_verify"
132125
fi
133126
134-
# Final result
127+
# Final results
135128
echo ""
136-
if [ "$has_errors" = true ]; then
137-
echo "❌ Broken links found that could not be resolved."
129+
if [ -n "$verified_failures" ]; then
130+
echo "❌ CI Failed: The following links failed:"
131+
echo -e "$verified_failures"
138132
exit 1
139133
else
140-
if grep -q "^RETRY_SUCCESS:" /tmp/retry_results 2>/dev/null; then
141-
echo "✅ All broken links resolved via retries! Successfully fixed:"
142-
grep "^RETRY_SUCCESS:" /tmp/retry_results | cut -d: -f2- | sed 's/^/ - /'
143-
else
144-
echo "✅ No broken links found."
134+
if [ -n "$rate_limited_urls" ]; then
135+
echo "⚠️ Note: These links returned HTTP 429 (rate limited, not broken):"
136+
echo -e "$rate_limited_urls"
145137
fi
138+
echo "✅ CI Passed: All links verified successfully"
146139
exit 0
147140
fi

.github/workflows/cypress_prod.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: Scheduled E2E on Chrome
22
on:
33
schedule:
4-
- cron: '0 7 * * *' # Runs at 07:00 UTC every day (02:00 AM EST in winter, 03:00 AM EST in summer)
4+
- cron: '0 7 * * 0' # Runs every Sunday at 02:00 Montreal time (EST) / 03:00 Montreal time (EDT)
55
jobs:
66
cypress-run:
77
runs-on: ubuntu-latest

0 commit comments

Comments
 (0)