Skip to content

ci: convert daily e2e test to weekly #643

ci: convert daily e2e test to weekly

ci: convert daily e2e test to weekly #643

name: Check for Broken Links
on:
pull_request:
push:
branches:
- main
- master
- develop
jobs:
build_and_check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install Node.js
uses: actions/setup-node@v2
with:
node-version: 16
- name: Install dependencies
run: yarn install
- name: Build website
run: yarn build
- name: Setup Simple HTTP Server
run: |
nohup yarn start &
- name: Check HTTP Server status
run: sleep 5 && curl -I http://127.0.0.1:8080
- name: Install Broken Link Checker
run: npm install -g broken-link-checker
- name: Execute Link Checker and Show Broken Links
env:
TEMPORARY_WEBSITE_URL: 'http://127.0.0.1:8080'
ACTUAL_WEBSITE_URL: 'https://ddmal.ca/Neon/'
run: |
echo "Running broken link check with rate limiting..."
# Run blc with CLI options to avoid rate limiting
# --filter-level 3: Check all link types including metadata
# --ordered: Check links sequentially (helps avoid rate limiting)
# --get: Use GET requests instead of HEAD (more compatible)
# --user-agent: Use realistic browser user agent
# --host-requests 1: Limit to 1 concurrent request per host (key for avoiding 429)
set +e # Don't exit on blc failure, we'll handle it
blc $TEMPORARY_WEBSITE_URL \
--filter-level 3 \
--ordered \
--get \
--user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" \
--host-requests 1 \
--recursive \
--verbose \
> /tmp/blc_output.txt 2>&1
blc_exit_code=$?
set -e
# Display the output
cat /tmp/blc_output.txt
# Get all broken links
all_broken_links=$(grep -E "├─BROKEN─" /tmp/blc_output.txt || true)
echo ""
echo "=== Broken Links Found by blc ==="
if [ -n "$all_broken_links" ]; then
echo "$all_broken_links"
else
echo "None"
fi
# Function to verify links with curl
verify_with_curl() {
local url="$1"
echo " 🔄 Verifying: $url"
http_code=$(curl -L -s -o /dev/null -w "%{http_code}" \
-H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" \
--connect-timeout 30 --max-time 60 --insecure \
--retry 3 --retry-delay 5 --retry-all-errors \
"$url" 2>"$curl_stderr" || echo "000")
# If 000, show curl error details
if [ "$http_code" = "000" ]; then
echo " ⚠️ Curl failed to connect. Error details:"
cat "$curl_stderr" | head -5
fi
rm -f "$curl_stderr"
if echo "$http_code" | grep -E "^(200|301|302|303)$" > /dev/null; then
echo " ✅ Success: HTTP $http_code"
return 0
elif [ "$http_code" = "429" ]; then
echo " ⚠️ HTTP 429 (rate limited)"
rate_limited_urls="${rate_limited_urls}${url}\n"
return 0
else
echo " ❌ Failed: HTTP $http_code"
failed_http_code="$http_code"
return 1
fi
}
# Verify all broken links with curl
verified_failures=""
rate_limited_urls=""
if [ -n "$all_broken_links" ]; then
echo ""
echo "=== Verifying Links with curl ==="
# Extract URLs and verify them
urls_to_verify=$(echo "$all_broken_links" | sed -n 's/.*├─BROKEN─ \(https\?:\/\/[^[:space:]]*\).*/\1/p')
while IFS= read -r url; do
[ -z "$url" ] && continue
if ! verify_with_curl "$url"; then
verified_failures="${verified_failures}${url} (HTTP ${failed_http_code})\n"
fi
done <<< "$urls_to_verify"
fi
# Final results
echo ""
if [ -n "$verified_failures" ]; then
echo "❌ CI Failed: The following links failed:"
echo -e "$verified_failures"
exit 1
else
if [ -n "$rate_limited_urls" ]; then
echo "⚠️ Note: These links returned HTTP 429 (rate limited, not broken):"
echo -e "$rate_limited_urls"
fi
echo "✅ CI Passed: All links verified successfully"
exit 0
fi