mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-07-05 14:00:48 +02:00
eb59a12b36
Two fixes from the second Copilot review round: A process spawned between abort_others()' live_procs snapshot and its registration escaped the kill sweep, leaving that build/check running to completion after fail-fast had begun. Re-check stop_event right after registering the process and SIGTERM its process group if the abort already started: either the registration happened before the sweep's snapshot (the sweep kills it) or it happened after stop_event was set (the re-check sees it), so the window is closed. Exceptions from callable steps (user_settings staging, private-dir copies) used to escape the worker thread and crash the whole script with no summary. They are now recorded as that config's step failure with the exception written to its make-check.log, e.g. a bad "user_settings" path reports FAIL (stage <path>) while the other configs keep running; the fail-fast bookkeeping is shared with the nonzero-exit path via record_failure().
464 lines
20 KiB
Python
Executable File
464 lines
20 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Build and "make check" a set of configurations, each in its own out-of-tree
|
|
# (VPATH) build directory, on a pool of worker threads (default: one per
|
|
# CPU); each thread takes the next pending config as soon as it is free.
|
|
# The final summary reports how efficiently the pool used the machine
|
|
# (thread occupancy and CPU utilization).
|
|
#
|
|
# The configurations come from a JSON file ("-" for stdin): a list of
|
|
# objects, one per configuration. Recognized keys, all optional except
|
|
# "name" (unknown keys are an error, so typos do not pass silently):
|
|
#
|
|
# name unique identifier; the config builds in build-<name>/
|
|
# configure list of extra ./configure arguments
|
|
# cc compiler passed to configure as CC=, overriding --cc
|
|
# ("" leaves CC entirely to configure / the environment)
|
|
# cflags CFLAGS for make, overriding --cflags
|
|
# ldflags LDFLAGS for make, overriding --ldflags
|
|
# minutes expected duration, from the Minutes column of a previous
|
|
# run's summary (default 1.0). Schedule weight only - configs
|
|
# run longest-first and --shard balances shards by it; a stale
|
|
# value just packs the schedule a little worse.
|
|
# user_settings header staged as <builddir>/user_settings.h before
|
|
# configure (path relative to the source root); pair it with
|
|
# --enable-usersettings in "configure"
|
|
# check false skips the make-check phase entirely (default true)
|
|
# prepare list of argv lists run in the build dir before configure
|
|
# run list of argv lists run in the build dir after the build and
|
|
# checks, e.g. [["wolfcrypt/test/testwolfcrypt"]]
|
|
# comment ignored; JSON has no comment syntax, so notes go here
|
|
#
|
|
# For example:
|
|
#
|
|
# [
|
|
# {"name": "default"},
|
|
# {"name": "all-asan", "configure": ["--enable-all"],
|
|
# "cflags": "-fsanitize=address", "ldflags": "-fsanitize=address"}
|
|
# ]
|
|
#
|
|
# Driven by CI workflows, which keep their config lists next to the
|
|
# invocation (see .github/workflows/smoke-test.yml), but also runnable
|
|
# locally - copy the JSON block out of the workflow into a file:
|
|
#
|
|
# .github/scripts/parallel-make-check.py configs.json # all configs
|
|
# .github/scripts/parallel-make-check.py configs.json default all-asan
|
|
# .github/scripts/parallel-make-check.py --list configs.json
|
|
#
|
|
# Concurrent "make check" runs are safe because the test scripts re-exec
|
|
# themselves under "bwrap --unshare-net" when bubblewrap is installed (one
|
|
# network namespace each) and the remaining test outputs land in the build
|
|
# directory; see --private-dir for the exception.
|
|
#
|
|
# The first failing config aborts the others (pending configs are skipped,
|
|
# in-flight ones are killed) so CI fails fast; pass --no-fail-fast to run
|
|
# everything and report every failure.
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import shutil
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
|
|
# cflags/ldflags are applied at make time only (never to ./configure) so
|
|
# autoconf feature detection is not poisoned by benign warnings in
|
|
# conftest probes. They are omitted entirely when empty so a plain config
|
|
# keeps the configure-chosen defaults.
|
|
@dataclass
|
|
class Config:
|
|
name: str
|
|
configure: list = field(default_factory=list)
|
|
cc: str = ""
|
|
cflags: str = ""
|
|
ldflags: str = ""
|
|
minutes: float = 1.0
|
|
user_settings: str = ""
|
|
check: bool = True
|
|
prepare: list = field(default_factory=list)
|
|
run: list = field(default_factory=list)
|
|
|
|
SRCDIR = Path(__file__).resolve().parents[2]
|
|
ON_GITHUB = os.environ.get("GITHUB_ACTIONS") == "true"
|
|
print_lock = threading.Lock()
|
|
|
|
# Fail-fast state: the first failure sets stop_event (under fail_lock, so
|
|
# exactly one config is reported as the origin) and kills the other
|
|
# workers' in-flight process groups.
|
|
stop_event = threading.Event()
|
|
fail_lock = threading.Lock()
|
|
live_procs = set()
|
|
procs_lock = threading.Lock()
|
|
|
|
|
|
def abort_others():
|
|
# Every subprocess starts its own session, so killing the process
|
|
# group takes down the whole make/test tree under it.
|
|
with procs_lock:
|
|
procs = list(live_procs)
|
|
for p in procs:
|
|
try:
|
|
os.killpg(p.pid, signal.SIGTERM)
|
|
except (ProcessLookupError, PermissionError):
|
|
pass
|
|
|
|
|
|
def nproc():
|
|
# Like nproc(1): CPUs usable by this process, falling back to all online.
|
|
try:
|
|
return len(os.sched_getaffinity(0))
|
|
except AttributeError:
|
|
return os.cpu_count() or 1
|
|
|
|
|
|
def load_configs(opts, error):
|
|
try:
|
|
if opts.json == "-":
|
|
entries = json.load(sys.stdin)
|
|
else:
|
|
entries = json.loads(Path(opts.json).read_text())
|
|
except (OSError, ValueError) as e:
|
|
error(f"{opts.json}: {e}")
|
|
if not isinstance(entries, list):
|
|
error(f"{opts.json}: expected a JSON list of config objects")
|
|
configs = []
|
|
for entry in entries:
|
|
if not isinstance(entry, dict):
|
|
error(f"{opts.json}: config entries must be objects: {entry!r}")
|
|
unknown = set(entry) - {"name", "configure", "cc", "cflags",
|
|
"ldflags", "minutes", "user_settings",
|
|
"check", "prepare", "run", "comment"}
|
|
if unknown:
|
|
error(f"{opts.json}: unknown key(s) in {entry.get('name', entry)!r}: "
|
|
f"{' '.join(sorted(unknown))}")
|
|
name = entry.get("name")
|
|
if not isinstance(name, str) or not name or "/" in name:
|
|
error(f"{opts.json}: every config needs a \"name\" usable as a "
|
|
f"directory suffix: {entry!r}")
|
|
if any(cfg.name == name for cfg in configs):
|
|
error(f"{opts.json}: duplicate config name {name!r}")
|
|
configure = entry.get("configure", [])
|
|
if not (isinstance(configure, list)
|
|
and all(isinstance(a, str) for a in configure)):
|
|
error(f"{opts.json}: \"configure\" must be a list of argument "
|
|
f"strings in {name!r}")
|
|
for key in ("cflags", "ldflags"):
|
|
if not isinstance(entry.get(key, ""), str):
|
|
error(f"{opts.json}: \"{key}\" must be a string in {name!r}")
|
|
minutes = entry.get("minutes", 1.0)
|
|
if isinstance(minutes, bool) or not isinstance(minutes, (int, float)) \
|
|
or minutes < 0:
|
|
error(f"{opts.json}: \"minutes\" must be a non-negative number "
|
|
f"in {name!r}")
|
|
user_settings = entry.get("user_settings", "")
|
|
if not isinstance(user_settings, str):
|
|
error(f"{opts.json}: \"user_settings\" must be a path string "
|
|
f"in {name!r}")
|
|
check = entry.get("check", True)
|
|
if not isinstance(check, bool):
|
|
error(f"{opts.json}: \"check\" must be a boolean in {name!r}")
|
|
cc = entry.get("cc", opts.cc or "")
|
|
if not isinstance(cc, str):
|
|
error(f"{opts.json}: \"cc\" must be a string in {name!r}")
|
|
for key in ("prepare", "run"):
|
|
cmds = entry.get(key, [])
|
|
if not (isinstance(cmds, list)
|
|
and all(isinstance(cmd, list) and cmd
|
|
and all(isinstance(a, str) for a in cmd)
|
|
for cmd in cmds)):
|
|
error(f"{opts.json}: \"{key}\" must be a list of argv lists "
|
|
f"in {name!r}")
|
|
configs.append(Config(name, list(configure), cc,
|
|
entry.get("cflags", opts.cflags),
|
|
entry.get("ldflags", opts.ldflags),
|
|
float(minutes), user_settings, check,
|
|
list(entry.get("prepare", [])),
|
|
list(entry.get("run", []))))
|
|
if not configs:
|
|
error(f"{opts.json}: no configs")
|
|
return configs
|
|
|
|
|
|
def privatize_dirs(bdir, dirs):
|
|
# Replace build-tree symlinks into the source tree with private
|
|
# per-build-dir copies: tests that write into these directories would
|
|
# otherwise write through the symlink into the shared source tree and
|
|
# race with the other parallel checks. Runs after the build steps so
|
|
# that build rules which (re)create the symlinks have already run.
|
|
for name in dirs:
|
|
d = bdir / name
|
|
if d.is_symlink():
|
|
d.unlink()
|
|
shutil.copytree(SRCDIR / name, d, symlinks=True)
|
|
|
|
|
|
def dump(title, path):
|
|
print(f"::group::{title}" if ON_GITHUB else f"==== {title} ====")
|
|
try:
|
|
sys.stdout.write(path.read_text(errors="replace"))
|
|
except OSError as e:
|
|
print(e)
|
|
if ON_GITHUB:
|
|
print("::endgroup::")
|
|
sys.stdout.flush()
|
|
|
|
|
|
def run_config(cfg, opts):
|
|
if opts.fail_fast and stop_event.is_set():
|
|
return "aborted", 0.0
|
|
bdir = SRCDIR / f"build-{cfg.name}"
|
|
if bdir.exists():
|
|
shutil.rmtree(bdir)
|
|
bdir.mkdir()
|
|
configure = [str(SRCDIR / "configure")] + cfg.configure
|
|
if cfg.cc:
|
|
configure.append(f"CC={cfg.cc}")
|
|
flags = [f"CFLAGS={cfg.cflags}"] if cfg.cflags else []
|
|
flags += [f"LDFLAGS={cfg.ldflags}"] if cfg.ldflags else []
|
|
make = ["make", f"-j{opts.jobs}"] + flags
|
|
steps = []
|
|
if cfg.user_settings:
|
|
# Staged before configure; --enable-usersettings builds pick it up
|
|
# from the build dir via the default include path.
|
|
steps.append((f"stage {cfg.user_settings}",
|
|
lambda: shutil.copy(SRCDIR / cfg.user_settings,
|
|
bdir / "user_settings.h")))
|
|
steps += [(" ".join(cmd), cmd) for cmd in cfg.prepare]
|
|
steps += [("configure", configure), ("make", make)]
|
|
if cfg.check:
|
|
steps += [
|
|
# Prebuild the check programs without running any tests so
|
|
# "make check" below is pure test execution.
|
|
("make check TESTS=", make + ["check", "TESTS="]),
|
|
("private dirs", lambda: privatize_dirs(bdir, opts.private_dir)),
|
|
("make check", ["make"] + flags + ["check"]),
|
|
]
|
|
steps += [(" ".join(cmd), cmd) for cmd in cfg.run]
|
|
failed = None
|
|
start = time.monotonic()
|
|
log = bdir / "make-check.log"
|
|
|
|
def record_failure(step):
|
|
# Classify a failed step, doing the fail-fast bookkeeping: the
|
|
# first failure wins and aborts everyone else; any failure after
|
|
# the abort began is reported as aborted instead.
|
|
if not opts.fail_fast:
|
|
return step
|
|
with fail_lock:
|
|
label = "aborted" if stop_event.is_set() else step
|
|
stop_event.set()
|
|
if label != "aborted":
|
|
abort_others()
|
|
return label
|
|
|
|
with open(log, "w") as logf:
|
|
for step, cmd in steps:
|
|
if opts.fail_fast and stop_event.is_set():
|
|
failed = "aborted"
|
|
break
|
|
if callable(cmd):
|
|
try:
|
|
cmd()
|
|
except Exception as e: # one config's bug, not the run's
|
|
print(f"+ {step}: {e!r}", file=logf, flush=True)
|
|
failed = record_failure(step)
|
|
break
|
|
continue
|
|
print(f"+ {' '.join(cmd)}", file=logf, flush=True)
|
|
# stdin=DEVNULL so a test that reads stdin sees EOF (as in CI)
|
|
# instead of blocking forever on an interactive/socket stdin.
|
|
proc = subprocess.Popen(cmd, cwd=bdir, stdout=logf,
|
|
stderr=subprocess.STDOUT,
|
|
stdin=subprocess.DEVNULL,
|
|
start_new_session=True)
|
|
with procs_lock:
|
|
live_procs.add(proc)
|
|
if opts.fail_fast and stop_event.is_set():
|
|
# Close the race with abort_others(): if its sweep ran
|
|
# between our stop_event check above and the registration
|
|
# just now, this process escaped the sweep - kill it
|
|
# ourselves (the wait() below then reaps it quickly).
|
|
try:
|
|
os.killpg(proc.pid, signal.SIGTERM)
|
|
except (ProcessLookupError, PermissionError):
|
|
proc.terminate()
|
|
try:
|
|
rc = proc.wait()
|
|
finally:
|
|
with procs_lock:
|
|
live_procs.discard(proc)
|
|
if rc != 0:
|
|
failed = record_failure(step)
|
|
break
|
|
minutes = (time.monotonic() - start) / 60
|
|
with print_lock:
|
|
if failed == "aborted":
|
|
print(f"{cfg.name}: aborted (fail-fast) [{minutes:.1f} min]")
|
|
sys.stdout.flush()
|
|
elif not failed:
|
|
# One line per passing config; the full logs would bloat the CI
|
|
# log (they stay in build-<name>/make-check.log).
|
|
print(f"{cfg.name}: pass [{minutes:.1f} min]")
|
|
sys.stdout.flush()
|
|
else:
|
|
dump(f"{cfg.name}: FAIL ({failed}) [{minutes:.1f} min]", log)
|
|
if failed == "configure":
|
|
dump(f"{cfg.name}: config.log", bdir / "config.log")
|
|
elif failed == "make check":
|
|
dump(f"{cfg.name}: test-suite.log", bdir / "test-suite.log")
|
|
return failed, minutes
|
|
|
|
|
|
def summarize(results, wall_min, cpu_min, nthreads):
|
|
lines = ["| Config | Result | Minutes |", "|---|---|---|"]
|
|
for cfg, failed, minutes in results:
|
|
if failed == "aborted":
|
|
ok = ":heavy_minus_sign: aborted (fail-fast)"
|
|
elif failed:
|
|
ok = f":x: FAIL ({failed})"
|
|
else:
|
|
ok = ":white_check_mark: pass"
|
|
lines.append(f"| {cfg.name} | {ok} | {minutes:.1f} |")
|
|
# Two views of how efficiently the pool used the machine: thread
|
|
# occupancy is the time the workers spent running configs out of the
|
|
# thread-minutes available (a long config left for last idles the other
|
|
# workers and drags it down); CPU utilization is the CPU time the build
|
|
# and test children actually consumed out of the CPU-minutes available
|
|
# (too-shallow make -j and serial test phases show up here).
|
|
busy_min = sum(minutes for _, _, minutes in results)
|
|
ncpu = nproc()
|
|
lines += [
|
|
"",
|
|
f"{len(results)} configs in {wall_min:.1f} min on {nthreads} "
|
|
f"threads / {ncpu} CPUs: "
|
|
f"thread occupancy {100 * busy_min / (wall_min * nthreads):.0f}% "
|
|
f"({busy_min:.1f} of {wall_min * nthreads:.1f} thread-min), "
|
|
f"CPU utilization {100 * cpu_min / (wall_min * ncpu):.0f}% "
|
|
f"({cpu_min:.1f} of {wall_min * ncpu:.1f} CPU-min)",
|
|
]
|
|
table = "\n".join(lines)
|
|
print(table)
|
|
summary = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
if summary:
|
|
with open(summary, "a") as f:
|
|
print(f"### make check\n\n{table}", file=f)
|
|
|
|
|
|
def main():
|
|
p = argparse.ArgumentParser(
|
|
description="Build and make check every configuration from a JSON "
|
|
"file in its own out-of-tree build directory, in "
|
|
"parallel.")
|
|
p.add_argument("json", metavar="CONFIGS.json",
|
|
help="JSON list of configs (see the script header for "
|
|
"the format), or - for stdin")
|
|
p.add_argument("configs", nargs="*", metavar="NAME",
|
|
help="configs to run (default: all)")
|
|
p.add_argument("--list", action="store_true", help="list configs")
|
|
p.add_argument("--jobs", type=int, default=2,
|
|
help="make -j per config (default: 2)")
|
|
p.add_argument("--threads", type=int, default=nproc(),
|
|
help="worker threads; each takes the next pending config "
|
|
"when it is free (default: nproc)")
|
|
p.add_argument("--shard", metavar="K/N",
|
|
help="run only the K-th (1-based) of N shards; configs "
|
|
"are dealt to shards greedily by descending "
|
|
"\"minutes\" so the shards' totals come out even")
|
|
p.add_argument("--fail-fast", action=argparse.BooleanOptionalAction,
|
|
default=True,
|
|
help="abort everything after the first failing config: "
|
|
"pending configs are skipped and in-flight ones "
|
|
"killed (--no-fail-fast runs everything and "
|
|
"reports every failure)")
|
|
p.add_argument("--cc", default="ccache gcc" if shutil.which("ccache")
|
|
else None,
|
|
help="compiler passed to configure as CC= for configs "
|
|
"that do not set their own \"cc\"")
|
|
p.add_argument("--cflags", default="",
|
|
help="CFLAGS for configs that do not set their own")
|
|
p.add_argument("--ldflags", default="",
|
|
help="LDFLAGS for configs that do not set their own")
|
|
p.add_argument("--private-dir", action="append", default=[],
|
|
metavar="DIR",
|
|
help="give each build dir a private copy of this "
|
|
"symlinked source directory before make check, for "
|
|
"tests that write into it (repeatable)")
|
|
opts = p.parse_args()
|
|
|
|
all_configs = load_configs(opts, p.error)
|
|
selected = all_configs
|
|
if opts.configs:
|
|
by_name = {cfg.name: cfg for cfg in all_configs}
|
|
unknown = [n for n in opts.configs if n not in by_name]
|
|
if unknown:
|
|
p.error(f"unknown config(s): {' '.join(unknown)}")
|
|
selected = [by_name[n] for n in opts.configs]
|
|
|
|
# Longest first, so the heavyweights never straggle on an otherwise
|
|
# idle machine. Stable: configs without "minutes" keep list order.
|
|
selected = sorted(selected, key=lambda cfg: -cfg.minutes)
|
|
if opts.shard:
|
|
try:
|
|
k, n = map(int, opts.shard.split("/"))
|
|
except ValueError:
|
|
k = n = 0
|
|
if not 1 <= k <= n:
|
|
p.error(f"--shard: expected K/N with 1 <= K <= N, "
|
|
f"got {opts.shard!r}")
|
|
# Greedy multiway partition: longest first into the least-loaded
|
|
# shard. Deterministic, and with honest "minutes" within ~the
|
|
# longest config of optimal.
|
|
shards, loads = [[] for _ in range(n)], [0.0] * n
|
|
for cfg in selected:
|
|
i = loads.index(min(loads))
|
|
shards[i].append(cfg)
|
|
loads[i] += cfg.minutes
|
|
selected = shards[k - 1]
|
|
|
|
if opts.list:
|
|
for cfg in selected:
|
|
print(f"{cfg.name} [{cfg.minutes:g} min]: "
|
|
f"{' '.join(cfg.configure)}")
|
|
return 0
|
|
if not selected:
|
|
print(f"shard {opts.shard}: no configs to run")
|
|
return 0
|
|
|
|
if not (SRCDIR / "configure").exists():
|
|
subprocess.run(["./autogen.sh"], cwd=SRCDIR, check=True)
|
|
|
|
nthreads = max(1, min(opts.threads, len(selected)))
|
|
wall_start = time.monotonic()
|
|
cpu_start = os.times()
|
|
with ThreadPoolExecutor(max_workers=nthreads) as pool:
|
|
results = [(cfg, failed, minutes) for cfg, (failed, minutes)
|
|
in zip(selected, pool.map(
|
|
lambda cfg: run_config(cfg, opts), selected))]
|
|
wall_min = (time.monotonic() - wall_start) / 60
|
|
cpu_end = os.times()
|
|
# os.times() child counters cover the waited-for configure/make
|
|
# subprocesses of every worker thread.
|
|
cpu_min = (cpu_end.children_user - cpu_start.children_user
|
|
+ cpu_end.children_system - cpu_start.children_system) / 60
|
|
summarize(results, wall_min, cpu_min, nthreads)
|
|
failed = [cfg.name for cfg, failure, _ in results
|
|
if failure and failure != "aborted"]
|
|
aborted = sum(1 for _, failure, _ in results if failure == "aborted")
|
|
if failed or aborted:
|
|
msg = f"make check failed for: {' '.join(failed)}" if failed \
|
|
else "aborted without a recorded failure"
|
|
if aborted:
|
|
msg += f" ({aborted} config(s) aborted by fail-fast)"
|
|
print(f"::error::{msg}" if ON_GITHUB else msg)
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|