mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-07-05 13:40:49 +02:00
6d1d750ad3
- Reject the config names "aux" and "test": build-aux/ is autotools' aux-script dir and build-test/ a legacy build dir, neither the script's to wipe and rebuild over. - Add type hints throughout. - Reword the shard-partition comment (the LPT bound was unparseable) and replace the zip-over-pool.map result pairing with a run_one() helper so the pool returns complete result rows.
505 lines
22 KiB
Python
Executable File
505 lines
22 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Build and "make check" a set of configurations, each in its own out-of-tree
|
|
# (VPATH) build directory, on a pool of worker threads (default: one per
|
|
# CPU); each thread takes the next pending config as soon as it is free.
|
|
# The final summary reports how efficiently the pool used the machine
|
|
# (thread occupancy and CPU utilization).
|
|
#
|
|
# The configurations come from a JSON file ("-" for stdin): a list of
|
|
# objects, one per configuration. Recognized keys, all optional except
|
|
# "name" (unknown keys are an error, so typos do not pass silently):
|
|
#
|
|
# name unique identifier; the config builds in build-<name>/
|
|
# ("aux" and "test" are reserved: build-aux/, build-test/)
|
|
# configure list of extra ./configure arguments
|
|
# cc compiler passed to configure as CC=, overriding --cc
|
|
# ("" leaves CC entirely to configure / the environment)
|
|
# cflags CFLAGS for make, overriding --cflags
|
|
# ldflags LDFLAGS for make, overriding --ldflags
|
|
# minutes expected duration, from the Minutes column of a previous
|
|
# run's summary (default 1.0). Schedule weight only - configs
|
|
# run longest-first and --shard balances shards by it; a stale
|
|
# value just packs the schedule a little worse.
|
|
# user_settings header staged as <builddir>/user_settings.h before
|
|
# configure (path relative to the source root); pair it with
|
|
# --enable-usersettings in "configure"
|
|
# check false skips the make-check phase entirely (default true)
|
|
# prepare list of argv lists run in the build dir before configure
|
|
# run list of argv lists run in the build dir after the build and
|
|
# checks, e.g. [["wolfcrypt/test/testwolfcrypt"]]
|
|
# comment ignored; JSON has no comment syntax, so notes go here
|
|
#
|
|
# For example:
|
|
#
|
|
# [
|
|
# {"name": "default"},
|
|
# {"name": "all-asan", "configure": ["--enable-all"],
|
|
# "cflags": "-fsanitize=address", "ldflags": "-fsanitize=address"}
|
|
# ]
|
|
#
|
|
# Driven by CI workflows, which keep their config lists next to the
|
|
# invocation (see .github/workflows/smoke-test.yml), but also runnable
|
|
# locally - copy the JSON block out of the workflow into a file:
|
|
#
|
|
# .github/scripts/parallel-make-check.py configs.json # all configs
|
|
# .github/scripts/parallel-make-check.py configs.json default all-asan
|
|
# .github/scripts/parallel-make-check.py --list configs.json
|
|
#
|
|
# Concurrent "make check" runs are safe because the test scripts re-exec
|
|
# themselves under "bwrap --unshare-net" when bubblewrap is installed (one
|
|
# network namespace each) and the remaining test outputs land in the build
|
|
# directory; see --private-dir for the exception.
|
|
#
|
|
# The first failing config aborts the others (pending configs are skipped,
|
|
# in-flight ones get SIGTERM, then SIGKILL after a 10 s grace period) so CI
|
|
# fails fast; pass --no-fail-fast to run everything and report every
|
|
# failure.
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import shutil
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from collections.abc import Callable
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import NoReturn
|
|
|
|
# cflags/ldflags are applied at make time only (never to ./configure) so
|
|
# autoconf feature detection is not poisoned by benign warnings in
|
|
# conftest probes. They are omitted entirely when empty so a plain config
|
|
# keeps the configure-chosen defaults.
|
|
@dataclass
|
|
class Config:
|
|
name: str
|
|
configure: list[str] = field(default_factory=list)
|
|
cc: str = ""
|
|
cflags: str = ""
|
|
ldflags: str = ""
|
|
minutes: float = 1.0
|
|
user_settings: str = ""
|
|
check: bool = True
|
|
prepare: list[list[str]] = field(default_factory=list)
|
|
run: list[list[str]] = field(default_factory=list)
|
|
|
|
SRCDIR = Path(__file__).resolve().parents[2]
|
|
ON_GITHUB = os.environ.get("GITHUB_ACTIONS") == "true"
|
|
print_lock = threading.Lock()
|
|
|
|
# Fail-fast state: the first failure sets stop_event (under fail_lock, so
|
|
# exactly one config is reported as the origin) and kills the other
|
|
# workers' in-flight process groups.
|
|
stop_event = threading.Event()
|
|
fail_lock = threading.Lock()
|
|
live_procs: set[subprocess.Popen] = set()
|
|
procs_lock = threading.Lock()
|
|
|
|
|
|
def kill_group(p: subprocess.Popen, sig: signal.Signals) -> None:
|
|
# Every subprocess starts its own session, so signalling the process
|
|
# group takes down the whole make/test tree under it.
|
|
try:
|
|
os.killpg(p.pid, sig)
|
|
except (ProcessLookupError, PermissionError):
|
|
try:
|
|
p.send_signal(sig)
|
|
except ProcessLookupError:
|
|
pass
|
|
|
|
|
|
def abort_others() -> None:
|
|
with procs_lock:
|
|
procs = list(live_procs)
|
|
for p in procs:
|
|
kill_group(p, signal.SIGTERM)
|
|
# Bounded escalation: SIGKILL whatever ignored the SIGTERM, so
|
|
# fail-fast cannot hang behind a test that traps/ignores SIGTERM.
|
|
deadline = time.monotonic() + 10
|
|
while any(p.poll() is None for p in procs):
|
|
if time.monotonic() > deadline:
|
|
for p in procs:
|
|
if p.poll() is None:
|
|
kill_group(p, signal.SIGKILL)
|
|
break
|
|
time.sleep(0.2)
|
|
|
|
|
|
def nproc() -> int:
|
|
# Like nproc(1): CPUs usable by this process, falling back to all online.
|
|
try:
|
|
return len(os.sched_getaffinity(0))
|
|
except AttributeError:
|
|
return os.cpu_count() or 1
|
|
|
|
|
|
def load_configs(opts: argparse.Namespace,
|
|
error: Callable[[str], NoReturn]) -> list[Config]:
|
|
try:
|
|
if opts.json == "-":
|
|
entries = json.load(sys.stdin)
|
|
else:
|
|
entries = json.loads(Path(opts.json).read_text())
|
|
except (OSError, ValueError) as e:
|
|
error(f"{opts.json}: {e}")
|
|
if not isinstance(entries, list):
|
|
error(f"{opts.json}: expected a JSON list of config objects")
|
|
configs = []
|
|
for entry in entries:
|
|
if not isinstance(entry, dict):
|
|
error(f"{opts.json}: config entries must be objects: {entry!r}")
|
|
unknown = set(entry) - {"name", "configure", "cc", "cflags",
|
|
"ldflags", "minutes", "user_settings",
|
|
"check", "prepare", "run", "comment"}
|
|
if unknown:
|
|
error(f"{opts.json}: unknown key(s) in {entry.get('name', entry)!r}: "
|
|
f"{' '.join(sorted(unknown))}")
|
|
name = entry.get("name")
|
|
if not isinstance(name, str) or not name or "/" in name:
|
|
error(f"{opts.json}: every config needs a \"name\" usable as a "
|
|
f"directory suffix: {entry!r}")
|
|
# build-<name> dirs that are not ours to wipe: build-aux/ is
|
|
# autotools' aux-script dir (autogen.sh), build-test/ a legacy
|
|
# build dir (.gitignore).
|
|
if name in ("aux", "test"):
|
|
error(f"{opts.json}: reserved config name {name!r}: build-{name}/ "
|
|
f"belongs to other tooling")
|
|
if any(cfg.name == name for cfg in configs):
|
|
error(f"{opts.json}: duplicate config name {name!r}")
|
|
configure = entry.get("configure", [])
|
|
if not (isinstance(configure, list)
|
|
and all(isinstance(a, str) for a in configure)):
|
|
error(f"{opts.json}: \"configure\" must be a list of argument "
|
|
f"strings in {name!r}")
|
|
for key in ("cflags", "ldflags"):
|
|
if not isinstance(entry.get(key, ""), str):
|
|
error(f"{opts.json}: \"{key}\" must be a string in {name!r}")
|
|
minutes = entry.get("minutes", 1.0)
|
|
if isinstance(minutes, bool) or not isinstance(minutes, (int, float)) \
|
|
or minutes < 0:
|
|
error(f"{opts.json}: \"minutes\" must be a non-negative number "
|
|
f"in {name!r}")
|
|
user_settings = entry.get("user_settings", "")
|
|
if not isinstance(user_settings, str):
|
|
error(f"{opts.json}: \"user_settings\" must be a path string "
|
|
f"in {name!r}")
|
|
check = entry.get("check", True)
|
|
if not isinstance(check, bool):
|
|
error(f"{opts.json}: \"check\" must be a boolean in {name!r}")
|
|
cc = entry.get("cc", opts.cc or "")
|
|
if not isinstance(cc, str):
|
|
error(f"{opts.json}: \"cc\" must be a string in {name!r}")
|
|
for key in ("prepare", "run"):
|
|
cmds = entry.get(key, [])
|
|
if not (isinstance(cmds, list)
|
|
and all(isinstance(cmd, list) and cmd
|
|
and all(isinstance(a, str) for a in cmd)
|
|
for cmd in cmds)):
|
|
error(f"{opts.json}: \"{key}\" must be a list of argv lists "
|
|
f"in {name!r}")
|
|
configs.append(Config(name, list(configure), cc,
|
|
entry.get("cflags", opts.cflags),
|
|
entry.get("ldflags", opts.ldflags),
|
|
float(minutes), user_settings, check,
|
|
list(entry.get("prepare", [])),
|
|
list(entry.get("run", []))))
|
|
if not configs:
|
|
error(f"{opts.json}: no configs")
|
|
return configs
|
|
|
|
|
|
def privatize_dirs(bdir: Path, dirs: list[str]) -> None:
|
|
# Replace build-tree symlinks into the source tree with private
|
|
# per-build-dir copies: tests that write into these directories would
|
|
# otherwise write through the symlink into the shared source tree and
|
|
# race with the other parallel checks. Runs after the build steps so
|
|
# that build rules which (re)create the symlinks have already run.
|
|
for name in dirs:
|
|
d = bdir / name
|
|
if d.is_symlink():
|
|
d.unlink()
|
|
shutil.copytree(SRCDIR / name, d, symlinks=True)
|
|
|
|
|
|
def dump(title: str, path: Path) -> None:
|
|
print(f"::group::{title}" if ON_GITHUB else f"==== {title} ====")
|
|
try:
|
|
sys.stdout.write(path.read_text(errors="replace"))
|
|
except OSError as e:
|
|
print(e)
|
|
if ON_GITHUB:
|
|
print("::endgroup::")
|
|
sys.stdout.flush()
|
|
|
|
|
|
def run_config(cfg: Config, opts: argparse.Namespace) -> tuple[str | None,
|
|
float]:
|
|
if opts.fail_fast and stop_event.is_set():
|
|
return "aborted", 0.0
|
|
bdir = SRCDIR / f"build-{cfg.name}"
|
|
if bdir.exists():
|
|
shutil.rmtree(bdir)
|
|
bdir.mkdir()
|
|
configure = [str(SRCDIR / "configure")] + cfg.configure
|
|
if cfg.cc:
|
|
configure.append(f"CC={cfg.cc}")
|
|
flags = [f"CFLAGS={cfg.cflags}"] if cfg.cflags else []
|
|
flags += [f"LDFLAGS={cfg.ldflags}"] if cfg.ldflags else []
|
|
# No -j here: wolfSSL's configure enables make's jobserver by default
|
|
# (AX_AM_JOBSERVER adds AM_MAKEFLAGS += -j<nproc+1>), and that explicit
|
|
# -j on every automake sub-make overrides whatever the top-level make
|
|
# was given, so a -j here would only schedule the outermost recursion
|
|
# hop. Measured across this pool, the jobserver default also utilizes
|
|
# the CPUs better than a capped -j (configs' serial phases - configure,
|
|
# link - get backfilled by other configs' compile jobs).
|
|
make = ["make"] + flags
|
|
steps: list[tuple[str, list[str] | Callable[[], object]]] = []
|
|
if cfg.user_settings:
|
|
# Staged before configure; --enable-usersettings builds pick it up
|
|
# from the build dir via the default include path.
|
|
steps.append((f"stage {cfg.user_settings}",
|
|
lambda: shutil.copy(SRCDIR / cfg.user_settings,
|
|
bdir / "user_settings.h")))
|
|
steps += [(" ".join(cmd), cmd) for cmd in cfg.prepare]
|
|
steps += [("configure", configure), ("make", make)]
|
|
if cfg.check:
|
|
steps += [
|
|
# Prebuild the check programs without running any tests so
|
|
# "make check" below is pure test execution.
|
|
("make check TESTS=", make + ["check", "TESTS="]),
|
|
("private dirs", lambda: privatize_dirs(bdir, opts.private_dir)),
|
|
("make check", ["make"] + flags + ["check"]),
|
|
]
|
|
steps += [(" ".join(cmd), cmd) for cmd in cfg.run]
|
|
failed: str | None = None
|
|
start = time.monotonic()
|
|
log = bdir / "make-check.log"
|
|
|
|
def record_failure(step: str) -> str:
|
|
# Classify a failed step, doing the fail-fast bookkeeping: the
|
|
# first failure wins and aborts everyone else; any failure after
|
|
# the abort began is reported as aborted instead.
|
|
if not opts.fail_fast:
|
|
return step
|
|
with fail_lock:
|
|
label = "aborted" if stop_event.is_set() else step
|
|
stop_event.set()
|
|
if label != "aborted":
|
|
abort_others()
|
|
return label
|
|
|
|
with open(log, "w") as logf:
|
|
for step, cmd in steps:
|
|
if opts.fail_fast and stop_event.is_set():
|
|
failed = "aborted"
|
|
break
|
|
if callable(cmd):
|
|
try:
|
|
cmd()
|
|
except Exception as e: # one config's bug, not the run's
|
|
print(f"+ {step}: {e!r}", file=logf, flush=True)
|
|
failed = record_failure(step)
|
|
break
|
|
continue
|
|
print(f"+ {' '.join(cmd)}", file=logf, flush=True)
|
|
# stdin=DEVNULL so a test that reads stdin sees EOF (as in CI)
|
|
# instead of blocking forever on an interactive/socket stdin.
|
|
proc = subprocess.Popen(cmd, cwd=bdir, stdout=logf,
|
|
stderr=subprocess.STDOUT,
|
|
stdin=subprocess.DEVNULL,
|
|
start_new_session=True)
|
|
with procs_lock:
|
|
live_procs.add(proc)
|
|
if opts.fail_fast and stop_event.is_set():
|
|
# Close the race with abort_others(): if its sweep ran
|
|
# between our stop_event check above and the registration
|
|
# just now, this process escaped the sweep - kill it
|
|
# ourselves (the wait() below then reaps it), escalating
|
|
# like the sweep does if SIGTERM is ignored.
|
|
kill_group(proc, signal.SIGTERM)
|
|
try:
|
|
proc.wait(timeout=10)
|
|
except subprocess.TimeoutExpired:
|
|
kill_group(proc, signal.SIGKILL)
|
|
try:
|
|
rc = proc.wait()
|
|
finally:
|
|
with procs_lock:
|
|
live_procs.discard(proc)
|
|
if rc != 0:
|
|
failed = record_failure(step)
|
|
break
|
|
minutes = (time.monotonic() - start) / 60
|
|
with print_lock:
|
|
if failed == "aborted":
|
|
print(f"{cfg.name}: aborted (fail-fast) [{minutes:.1f} min]")
|
|
sys.stdout.flush()
|
|
elif not failed:
|
|
# One line per passing config; the full logs would bloat the CI
|
|
# log (they stay in build-<name>/make-check.log).
|
|
print(f"{cfg.name}: pass [{minutes:.1f} min]")
|
|
sys.stdout.flush()
|
|
else:
|
|
dump(f"{cfg.name}: FAIL ({failed}) [{minutes:.1f} min]", log)
|
|
if failed == "configure":
|
|
dump(f"{cfg.name}: config.log", bdir / "config.log")
|
|
elif failed == "make check":
|
|
dump(f"{cfg.name}: test-suite.log", bdir / "test-suite.log")
|
|
return failed, minutes
|
|
|
|
|
|
def summarize(results: list[tuple[Config, str | None, float]],
|
|
wall_min: float, cpu_min: float, nthreads: int) -> None:
|
|
lines = ["| Config | Result | Minutes |", "|---|---|---|"]
|
|
for cfg, failed, minutes in results:
|
|
if failed == "aborted":
|
|
ok = ":heavy_minus_sign: aborted (fail-fast)"
|
|
elif failed:
|
|
ok = f":x: FAIL ({failed})"
|
|
else:
|
|
ok = ":white_check_mark: pass"
|
|
lines.append(f"| {cfg.name} | {ok} | {minutes:.1f} |")
|
|
# Two views of how efficiently the pool used the machine: thread
|
|
# occupancy is the time the workers spent running configs out of the
|
|
# thread-minutes available (a long config left for last idles the other
|
|
# workers and drags it down); CPU utilization is the CPU time the build
|
|
# and test children actually consumed out of the CPU-minutes available
|
|
# (serial configure/link/test phases show up here).
|
|
busy_min = sum(minutes for _, _, minutes in results)
|
|
ncpu = nproc()
|
|
lines += [
|
|
"",
|
|
f"{len(results)} configs in {wall_min:.1f} min on {nthreads} "
|
|
f"threads / {ncpu} CPUs: "
|
|
f"thread occupancy {100 * busy_min / (wall_min * nthreads):.0f}% "
|
|
f"({busy_min:.1f} of {wall_min * nthreads:.1f} thread-min), "
|
|
f"CPU utilization {100 * cpu_min / (wall_min * ncpu):.0f}% "
|
|
f"({cpu_min:.1f} of {wall_min * ncpu:.1f} CPU-min)",
|
|
]
|
|
table = "\n".join(lines)
|
|
print(table)
|
|
summary = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
if summary:
|
|
with open(summary, "a") as f:
|
|
print(f"### make check\n\n{table}", file=f)
|
|
|
|
|
|
def main() -> int:
|
|
p = argparse.ArgumentParser(
|
|
description="Build and make check every configuration from a JSON "
|
|
"file in its own out-of-tree build directory, in "
|
|
"parallel.")
|
|
p.add_argument("json", metavar="CONFIGS.json",
|
|
help="JSON list of configs (see the script header for "
|
|
"the format), or - for stdin")
|
|
p.add_argument("configs", nargs="*", metavar="NAME",
|
|
help="configs to run (default: all)")
|
|
p.add_argument("--list", action="store_true", help="list configs")
|
|
p.add_argument("--threads", type=int, default=nproc(),
|
|
help="worker threads; each takes the next pending config "
|
|
"when it is free (default: nproc)")
|
|
p.add_argument("--shard", metavar="K/N",
|
|
help="run only the K-th (1-based) of N shards; configs "
|
|
"are dealt to shards greedily by descending "
|
|
"\"minutes\" so the shards' totals come out even")
|
|
p.add_argument("--fail-fast", action=argparse.BooleanOptionalAction,
|
|
default=True,
|
|
help="abort everything after the first failing config: "
|
|
"pending configs are skipped and in-flight ones "
|
|
"killed (--no-fail-fast runs everything and "
|
|
"reports every failure)")
|
|
p.add_argument("--cc", default="ccache gcc" if shutil.which("ccache")
|
|
else None,
|
|
help="compiler passed to configure as CC= for configs "
|
|
"that do not set their own \"cc\"")
|
|
p.add_argument("--cflags", default="",
|
|
help="CFLAGS for configs that do not set their own")
|
|
p.add_argument("--ldflags", default="",
|
|
help="LDFLAGS for configs that do not set their own")
|
|
p.add_argument("--private-dir", action="append", default=[],
|
|
metavar="DIR",
|
|
help="give each build dir a private copy of this "
|
|
"symlinked source directory before make check, for "
|
|
"tests that write into it (repeatable)")
|
|
opts = p.parse_args()
|
|
|
|
all_configs = load_configs(opts, p.error)
|
|
selected = all_configs
|
|
if opts.configs:
|
|
by_name = {cfg.name: cfg for cfg in all_configs}
|
|
unknown = [n for n in opts.configs if n not in by_name]
|
|
if unknown:
|
|
p.error(f"unknown config(s): {' '.join(unknown)}")
|
|
selected = [by_name[n] for n in opts.configs]
|
|
|
|
# Longest first, so the heavyweights never straggle on an otherwise
|
|
# idle machine. Stable: configs without "minutes" keep list order.
|
|
selected = sorted(selected, key=lambda cfg: -cfg.minutes)
|
|
if opts.shard:
|
|
try:
|
|
k, n = map(int, opts.shard.split("/"))
|
|
except ValueError:
|
|
k = n = 0
|
|
if not 1 <= k <= n:
|
|
p.error(f"--shard: expected K/N with 1 <= K <= N, "
|
|
f"got {opts.shard!r}")
|
|
# Greedy multiway partition: longest first into the least-loaded
|
|
# shard. Deterministic; if the "minutes" are accurate, the worst
|
|
# shard ends up within about one config's minutes of optimal.
|
|
shards, loads = [[] for _ in range(n)], [0.0] * n
|
|
for cfg in selected:
|
|
i = loads.index(min(loads))
|
|
shards[i].append(cfg)
|
|
loads[i] += cfg.minutes
|
|
selected = shards[k - 1]
|
|
|
|
if opts.list:
|
|
for cfg in selected:
|
|
print(f"{cfg.name} [{cfg.minutes:g} min]: "
|
|
f"{' '.join(cfg.configure)}")
|
|
return 0
|
|
if not selected:
|
|
print(f"shard {opts.shard}: no configs to run")
|
|
return 0
|
|
|
|
if not (SRCDIR / "configure").exists():
|
|
subprocess.run(["./autogen.sh"], cwd=SRCDIR, check=True)
|
|
|
|
nthreads = max(1, min(opts.threads, len(selected)))
|
|
wall_start = time.monotonic()
|
|
cpu_start = os.times()
|
|
def run_one(cfg: Config) -> tuple[Config, str | None, float]:
|
|
failed, minutes = run_config(cfg, opts)
|
|
return cfg, failed, minutes
|
|
|
|
with ThreadPoolExecutor(max_workers=nthreads) as pool:
|
|
results = list(pool.map(run_one, selected))
|
|
wall_min = (time.monotonic() - wall_start) / 60
|
|
cpu_end = os.times()
|
|
# os.times() child counters cover the waited-for configure/make
|
|
# subprocesses of every worker thread.
|
|
cpu_min = (cpu_end.children_user - cpu_start.children_user
|
|
+ cpu_end.children_system - cpu_start.children_system) / 60
|
|
summarize(results, wall_min, cpu_min, nthreads)
|
|
failed = [cfg.name for cfg, failure, _ in results
|
|
if failure and failure != "aborted"]
|
|
aborted = sum(1 for _, failure, _ in results if failure == "aborted")
|
|
if failed or aborted:
|
|
msg = f"make check failed for: {' '.join(failed)}" if failed \
|
|
else "aborted without a recorded failure"
|
|
if aborted:
|
|
msg += f" ({aborted} config(s) aborted by fail-fast)"
|
|
print(f"::error::{msg}" if ON_GITHUB else msg)
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|