From e348bf9c50e2c84ff4d26d5f343585f091d97da0 Mon Sep 17 00:00:00 2001 From: Daniele Nicolodi Date: Sat, 11 Jan 2025 17:13:14 +0100 Subject: [PATCH] docs: Add a test to validate URLs in markdown/Users.md Avoid piling up dead URLs. --- .github/workflows/website.yml | 2 +- docs/meson.build | 4 +++- docs/validatelinks.py | 42 +++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 docs/validatelinks.py diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index fdb7d1400919..13c690207a91 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -45,7 +45,7 @@ jobs: - name: Install package run: | sudo apt-get -y install python3-pip ninja-build libjson-glib-dev - pip install hotdoc chevron strictyaml + pip install hotdoc chevron strictyaml aiohttp - uses: actions/cache/save@v4 with: diff --git a/docs/meson.build b/docs/meson.build index 3ad12b7fd71f..0ce884062e96 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -1,7 +1,7 @@ project('Meson documentation', version: '1.0') yaml_modname = get_option('unsafe_yaml') ? 'yaml' : 'strictyaml' -py = import('python').find_installation('python3', modules: [yaml_modname], required: false) +py = import('python').find_installation('python3', modules: [yaml_modname, 'aiohttp'], required: false) if not py.found() error(f'Cannot build documentation without yaml support') endif @@ -145,3 +145,5 @@ run_target('upload', ], depends: documentation, ) + +test('validate_links', find_program('./validatelinks.py'), args: meson.current_source_dir() / 'markdown' / 'Users.md') diff --git a/docs/validatelinks.py b/docs/validatelinks.py new file mode 100644 index 000000000000..69544ab0e98b --- /dev/null +++ b/docs/validatelinks.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2025 The Meson development team + +import sys +import re +import aiohttp +import asyncio + +LINK = re.compile(r'\[(?P[A-Za-z0-9 ]+)\]\((?P.*?)\)') + + +async def fetch(session, name, url, timeout): + try: + async with session.get(url, timeout=timeout) as r: + if not r.ok: + return (name, url, r.status) + except Exception as e: + return (name, url, str(e)) + + +async def main(filename): + with open(filename) as f: + text = f.read() + timeout = aiohttp.ClientTimeout(total=60) + async with aiohttp.ClientSession() as session: + tasks = [] + for link in LINK.finditer(text): + name, url = link.groups() + task = asyncio.ensure_future(fetch(session, name, url, timeout)) + tasks.append(task) + responses = asyncio.gather(*tasks) + errors = [r for r in await responses if r is not None] + for name, url, result in errors: + print(f'"{name}" {url} {result}') + if errors: + sys.exit(1) + + +if __name__ == '__main__': + asyncio.run(main(sys.argv[1]))