Update links.yml #890
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license | |
# Continuous Integration (CI) GitHub Actions tests broken link checker using https://github.com/lycheeverse/lychee | |
# Ignores the following status codes to reduce false positives: | |
# - 401(Vimeo, 'unauthorized') | |
# - 403(OpenVINO, 'forbidden') | |
# - 429(Instagram, 'too many requests') | |
# - 500(Zenodo, 'cached') | |
# - 502(Zenodo, 'bad gateway') | |
# - 999(LinkedIn, 'unknown status code') | |
name: Check Website links | |
on: | |
workflow_dispatch: | |
pull_request: | |
schedule: | |
- cron: "0 0 * * *" # runs at 00:00 UTC every day | |
jobs: | |
Links: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false # This ensures that if one job fails, the others will still run | |
matrix: | |
website: | |
[ | |
www.ultralytics.com, | |
docs.ultralytics.com, | |
community.ultralytics.com, | |
handbook.ultralytics.com, | |
] | |
steps: | |
- name: Download and install lychee | |
run: | | |
LYCHEE_URL=$(curl -s https://api.github.com/repos/lycheeverse/lychee/releases/latest | grep "browser_download_url" | grep "x86_64-unknown-linux-gnu.tar.gz" | cut -d '"' -f 4) | |
curl -L $LYCHEE_URL | tar xz -C /usr/local/bin | |
- name: Get Website URLs | |
run: | | |
# Function to parse sitemap URLs | |
parse_sitemap() { | |
cat - | tr '\n' ' ' | sed 's/<loc>/\n<loc>/g' | grep -oP '(?<=<loc>).*?(?=</loc>)' || true | |
} | |
# Download initial sitemap and process | |
echo "Downloading sitemap..." | |
SITEMAP=$(wget -qO- "https://${{ matrix.website }}/sitemap.xml") || { echo "Failed to download sitemap"; exit 1; } | |
echo "$SITEMAP" | parse_sitemap > urls.txt | |
# Process any subsitemaps if they exist | |
if grep -q 'sitemap' urls.txt; then | |
echo "Found subsitemaps, processing..." | |
grep 'sitemap' urls.txt > subsitemaps.txt | |
grep -v 'sitemap' urls.txt > urls.tmp || true | |
while read -r submap; do | |
echo "Processing submap: $submap" | |
SUBMAP_CONTENT=$(wget -qO- "$submap") || { echo "Failed to download submap: $submap"; continue; } | |
echo "$SUBMAP_CONTENT" | parse_sitemap >> urls.tmp | |
done < subsitemaps.txt | |
mv urls.tmp urls.txt || true | |
fi | |
# Count URLs | |
total_urls=$(wc -l < urls.txt) | |
echo "Total URLs to be downloaded: $total_urls" | |
- name: Download Website | |
run: | | |
# Set higher wait seconds for discourse community to avoid 429 rate limit errors | |
if [ "${{ matrix.website }}" = "community.ultralytics.com" ]; then | |
WAIT=1 | |
else | |
WAIT=0.001 | |
fi | |
# Download all URLs | |
wget \ | |
--adjust-extension \ | |
--reject "*.jpg*,*.jpeg*,*.png*,*.gif*,*.webp*,*.svg*,*.txt" \ | |
--input-file=urls.txt \ | |
--no-clobber \ | |
--no-parent \ | |
--wait=$WAIT \ | |
--random-wait \ | |
--tries=3 \ | |
--no-verbose \ | |
--force-directories | |
- name: Run codespell on downloaded pages | |
id: codespell | |
# continue-on-error: true # Still needed to skip to later steps on errors | |
run: | | |
pip install codespell | |
if ! codespell \ | |
--ignore-words-list "crate,nd,ned,strack,dota,ane,segway,fo,gool,winn,commend,bloc,nam,afterall,skelton,goin,referer,pre,uint,dto,linkedin,webp,webgl,href,onclick,github,api,http,png,svg,gif,jpg,jpeg,href,js" \ | |
--skip "*.pt,*.pth,*.torchscript,*.onnx,*.tflite,*.pb,*.bin,*.param,*.mlmodel,*.engine,*.npy,*.data*,*.csv,*pnnx*,*venv*,*translat*,*lock*,__pycache__*,*.ico,*.jpg,*.png,*.mp4,*.mov,/runs,/.git,./docs/??/*.md,./docs/mkdocs_??.yml" \ | |
--check-filenames \ | |
--check-hidden \ | |
${{ matrix.website }}; then | |
echo "Spelling errors found ⚠️" | |
echo "CODESPELL_FAILED=true" >> $GITHUB_OUTPUT | |
# Add spelling errors to GitHub Summary automatically by codespell | |
echo "## 📝 Spelling Errors" >> $GITHUB_STEP_SUMMARY | |
codespell -D ${{ matrix.website }} >> $GITHUB_STEP_SUMMARY | |
# Prepare a summary for Slack | |
CODESPELL_SUMMARY=$(codespell -D ${{ matrix.website }} | sed 's/"/\\"/g') # Escape double quotes for JSON | |
echo "CODESPELL_ERRORS<<EOF" >> $GITHUB_OUTPUT | |
echo "$CODESPELL_SUMMARY" >> $GITHUB_OUTPUT | |
echo "EOF" >> $GITHUB_OUTPUT | |
else | |
echo "No spelling errors found ✅" | |
echo "CODESPELL_FAILED=false" >> $GITHUB_OUTPUT | |
fi | |
- name: Run Broken Link Checks on Website | |
id: lychee | |
uses: ultralytics/actions/retry@main | |
with: | |
timeout_minutes: 60 | |
retry_delay_seconds: 900 | |
retries: 3 | |
run: | | |
# Count successfully downloaded files | |
downloaded_files=$(find ${{ matrix.website }} -type f | wc -l) | |
echo "Scanning $downloaded_files downloaded pages for broken links..." | |
# Create summary.txt with the total page count | |
echo "*Results for $downloaded_files pages in https://${{ matrix.website }}*" > summary.txt | |
echo "" >> summary.txt | |
rm -rf .lycheecache | |
lychee \ | |
--scheme 'https' \ | |
--timeout 60 \ | |
--insecure \ | |
--accept 401,403,429,500,502,999 \ | |
--exclude-all-private \ | |
--exclude 'https?://(www\.)?(linkedin\.com|twitter\.com|instagram\.com|kaggle\.com|tiktok\.com|fonts\.gstatic\.com|fonts\.googleapis\.com|url\.com|tesla\.com|wellfound\.com|.*\.cloudfunctions\.net|0\.0\.0\.0:5543/predict/from_files)' \ | |
--exclude-path '**/ci.yaml' \ | |
--github-token ${{ secrets.GITHUB_TOKEN }} \ | |
--header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \ | |
--header "Accept=*/*" \ | |
--header "Accept-Language=*" \ | |
--header "Accept-Encoding=*" \ | |
'./${{ matrix.website }}/**/*.html' | tee -a summary.txt | |
# Add the summary to GitHub step summary | |
cat summary.txt >> $GITHUB_STEP_SUMMARY | |
# Prepare the summary for Slack (escape newlines, remove [], remove .html, and escape special characters) | |
# Ignore lines starting with [TIMEOUT] on the next line or keep them in the following line | |
# ESCAPED_SUMMARY=$(awk '!/^\[TIMEOUT\]/ {printf "%s\\n", $0}' summary.txt | sed 's/\[//g; s/\]//g; s/\.html//g; s/"/\\"/g') | |
ESCAPED_SUMMARY=$(awk '{printf "%s\\n", $0}' summary.txt | sed 's/\[//g; s/\]//g; s/\.html//g; s/"/\\"/g') | |
echo "SUMMARY<<EOF" >> $GITHUB_ENV | |
echo "$ESCAPED_SUMMARY" >> $GITHUB_ENV | |
echo "EOF" >> $GITHUB_ENV | |
# Check if lychee found any broken links | |
if grep -q "0 Errors" summary.txt; then | |
echo "No broken links found." | |
exit 0 | |
else | |
echo "Broken links found." | |
exit 1 | |
fi | |
- name: Add spelling errors to GitHub Summary | |
if: always() && steps.codespell.outputs.CODESPELL_FAILED == 'true' | |
run: | | |
echo "${{ env.CODESPELL_SUMMARY }}" >> $GITHUB_STEP_SUMMARY | |
- name: Notify Slack for broken links | |
if: always() && steps.lychee.outcome == 'failure' && github.event_name == 'schedule' && github.run_attempt == '1' | |
uses: slackapi/[email protected] | |
with: | |
webhook-type: incoming-webhook | |
webhook: ${{ matrix.website == 'www.ultralytics.com' && secrets.SLACK_WEBHOOK_URL_WEBSITE || secrets.SLACK_WEBHOOK_URL_YOLO }} | |
payload: | | |
text: "GitHub Actions: Errors found in ${{ github.workflow }} for ${{ matrix.website }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n\n\n${{ env.SUMMARY }}\n" | |
- name: Notify Slack for spelling errors | |
if: always() && steps.codespell.outputs.CODESPELL_FAILED == 'true' && github.event_name == 'schedule' && github.run_attempt == '1' | |
uses: slackapi/[email protected] | |
with: | |
webhook-type: incoming-webhook | |
webhook: ${{ matrix.website == 'www.ultralytics.com' && secrets.SLACK_WEBHOOK_URL_WEBSITE || secrets.SLACK_WEBHOOK_URL_YOLO }} | |
payload: | | |
{ | |
"text": "GitHub Actions: Spelling errors found in ${{ github.workflow }} for ${{ matrix.website }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n\n\n*Misspelled words:*\n${{ steps.codespell.outputs.CODESPELL_ERRORS }}\n" | |
} |