2020-06-16 19:00:27 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
#
|
|
|
|
# Checks that all links in the readme markdown files are valid
|
|
|
|
#
|
2025-09-11 18:47:03 +08:00
|
|
|
# SPDX-FileCopyrightText: 2020-2025 Espressif Systems (Shanghai) CO LTD
|
2022-06-15 16:46:55 +02:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2020-06-16 19:00:27 +08:00
|
|
|
#
|
|
|
|
|
2021-01-26 10:49:01 +08:00
|
|
|
import argparse
|
|
|
|
import concurrent.futures
|
2020-06-16 19:00:27 +08:00
|
|
|
import os
|
|
|
|
import os.path
|
2021-01-26 10:49:01 +08:00
|
|
|
import re
|
2021-04-26 15:36:30 +08:00
|
|
|
import sys
|
2020-06-16 19:00:27 +08:00
|
|
|
import urllib.error
|
2021-01-26 10:49:01 +08:00
|
|
|
import urllib.request
|
2025-09-11 18:47:03 +08:00
|
|
|
from collections import defaultdict
|
|
|
|
from collections import namedtuple
|
2020-06-16 19:00:27 +08:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
Link = namedtuple('Link', ['file', 'url'])
|
|
|
|
|
|
|
|
|
|
|
|
class ReadmeLinkError(Exception):
|
2022-06-28 19:00:12 +02:00
|
|
|
def __init__(self, file: str, url: str) -> None:
|
2020-06-16 19:00:27 +08:00
|
|
|
self.file = file
|
|
|
|
self.url = url
|
|
|
|
|
|
|
|
|
|
|
|
class RelativeLinkError(ReadmeLinkError):
|
2022-06-28 19:00:12 +02:00
|
|
|
def __str__(self) -> str:
|
2025-09-11 18:47:03 +08:00
|
|
|
return f'Relative link error, file - {self.url} not found, linked from {self.file}'
|
2020-06-16 19:00:27 +08:00
|
|
|
|
|
|
|
|
|
|
|
class UrlLinkError(ReadmeLinkError):
|
2022-06-28 19:00:12 +02:00
|
|
|
def __init__(self, file: str, url: str, error_code: str):
|
2020-06-16 19:00:27 +08:00
|
|
|
self.error_code = error_code
|
|
|
|
super().__init__(file, url)
|
|
|
|
|
2022-06-28 19:00:12 +02:00
|
|
|
def __str__(self) -> str:
|
2020-06-16 19:00:27 +08:00
|
|
|
files = [str(f) for f in self.file]
|
2025-09-11 18:47:03 +08:00
|
|
|
files_str = ', '.join(files)
|
|
|
|
return (
|
|
|
|
f'URL error, url - {self.url} in files - {files_str} is not accessible, request returned {self.error_code}'
|
|
|
|
)
|
2020-06-16 19:00:27 +08:00
|
|
|
|
|
|
|
|
|
|
|
# we do not want a failed test just due to bad network conditions, for non 404 errors we simply print a warning
|
2022-06-28 19:00:12 +02:00
|
|
|
def check_url(url: str, files: str, timeout: float) -> None:
|
2020-06-16 19:00:27 +08:00
|
|
|
try:
|
|
|
|
with urllib.request.urlopen(url, timeout=timeout):
|
|
|
|
return
|
|
|
|
except urllib.error.HTTPError as e:
|
|
|
|
if e.code == 404:
|
|
|
|
raise UrlLinkError(files, url, str(e))
|
|
|
|
else:
|
2025-09-11 18:47:03 +08:00
|
|
|
print(f'Unable to access {url}, err = {str(e)}')
|
2020-06-16 19:00:27 +08:00
|
|
|
except Exception as e:
|
2025-09-11 18:47:03 +08:00
|
|
|
print(f'Unable to access {url}, err = {str(e)}')
|
2020-06-16 19:00:27 +08:00
|
|
|
|
|
|
|
|
2025-09-11 18:47:03 +08:00
|
|
|
def check_web_links(web_links: defaultdict) -> list:
|
2020-06-16 19:00:27 +08:00
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
|
|
errors = []
|
2025-09-11 18:47:03 +08:00
|
|
|
future_to_url = {
|
|
|
|
executor.submit(check_url, url, files, timeout=30): (url, files) for url, files in web_links.items()
|
|
|
|
}
|
2020-06-16 19:00:27 +08:00
|
|
|
for future in concurrent.futures.as_completed(future_to_url):
|
|
|
|
try:
|
|
|
|
future.result()
|
|
|
|
except UrlLinkError as e:
|
|
|
|
errors.append(e)
|
|
|
|
|
|
|
|
return errors
|
|
|
|
|
|
|
|
|
2025-09-11 18:47:03 +08:00
|
|
|
def check_file_links(file_links: list) -> list:
|
2020-06-16 19:00:27 +08:00
|
|
|
errors = []
|
|
|
|
|
|
|
|
for link in file_links:
|
|
|
|
link_path = link.file.parent / link.url
|
|
|
|
|
|
|
|
if not Path.exists(link_path):
|
|
|
|
errors.append(RelativeLinkError(link.file, link.url))
|
|
|
|
|
2025-09-11 18:47:03 +08:00
|
|
|
print(f'Found {len(errors)} errors with relative links')
|
2020-06-16 19:00:27 +08:00
|
|
|
return errors
|
|
|
|
|
|
|
|
|
2025-09-11 18:47:03 +08:00
|
|
|
def get_md_links(folder: str) -> list:
|
2021-01-26 10:49:01 +08:00
|
|
|
MD_LINK_RE = r'\[.+?\]\((.+?)(#.+)?\)'
|
2020-06-16 19:00:27 +08:00
|
|
|
|
2022-06-28 19:00:12 +02:00
|
|
|
idf_path_str = os.getenv('IDF_PATH')
|
|
|
|
if idf_path_str is None:
|
|
|
|
raise RuntimeError("Environment variable 'IDF_PATH' wasn't set.")
|
|
|
|
idf_path = Path(idf_path_str)
|
2020-06-16 19:00:27 +08:00
|
|
|
links = []
|
|
|
|
|
|
|
|
for path in (idf_path / folder).rglob('*.md'):
|
|
|
|
with path.open(encoding='utf8') as f:
|
|
|
|
content = f.read()
|
|
|
|
|
|
|
|
for url in re.findall(MD_LINK_RE, content):
|
|
|
|
link = Link(path, url[0].lstrip())
|
|
|
|
# Ignore "local" links
|
|
|
|
if not link.url.startswith('#'):
|
|
|
|
links.append(link)
|
|
|
|
|
|
|
|
return links
|
|
|
|
|
|
|
|
|
2022-06-28 19:00:12 +02:00
|
|
|
def check_readme_links(args: argparse.Namespace) -> int:
|
2020-06-16 19:00:27 +08:00
|
|
|
links = get_md_links('examples')
|
2025-09-11 18:47:03 +08:00
|
|
|
print(f'Found {len(links)} links')
|
2020-06-16 19:00:27 +08:00
|
|
|
|
|
|
|
errors = []
|
|
|
|
|
|
|
|
web_links = defaultdict(list)
|
|
|
|
file_links = []
|
|
|
|
|
|
|
|
# Sort links into file and web links
|
|
|
|
for link in links:
|
|
|
|
if link.url.startswith('http'):
|
2025-09-11 18:47:03 +08:00
|
|
|
web_links[link.url].append(link.file)
|
2020-06-16 19:00:27 +08:00
|
|
|
else:
|
|
|
|
file_links.append(link)
|
|
|
|
|
|
|
|
errors.extend(check_file_links(file_links))
|
|
|
|
|
|
|
|
if not args.skip_weburl:
|
|
|
|
errors.extend(check_web_links(web_links))
|
|
|
|
|
2025-09-11 18:47:03 +08:00
|
|
|
print(f'Found {len(errors)} errors:')
|
2020-06-16 19:00:27 +08:00
|
|
|
for e in errors:
|
|
|
|
print(e)
|
2021-04-26 15:36:30 +08:00
|
|
|
|
|
|
|
return 1 if len(errors) > 0 else 0
|
2020-06-16 19:00:27 +08:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2025-09-11 18:47:03 +08:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='check_readme_links.py: Checks for dead links in example READMEs', prog='check_readme_links.py'
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--skip-weburl', '-w', action='store_true', help='Skip checking of web URLs, only check links to local files'
|
|
|
|
)
|
2020-06-16 19:00:27 +08:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2021-04-26 15:36:30 +08:00
|
|
|
sys.exit(check_readme_links(args))
|