mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-07-05 22:00:49 +02:00
dd2f9d3ab8
The 10 GB, LRU-evicted, PR-scoped Actions cache was being thrashed - the
docker simulator buildx layers (~6 GiB), plus per-PR ccache and apt-archive
writes whose keys never hit - which kept evicting the shared ccache, while
the apt mirror timed out often enough to break PR CI. Move the heavy caches
to ghcr (free, separate pool) and make PR runs read-only against the Actions
cache.
apt dependencies from prebuilt ghcr .deb bundles
- ci-deps-image.yml resolves each package list under .github/ci-deps/ into
its .deb closure and publishes ghcr.io/<owner>/wolfssl-ci-debs:<tag> in
two tiers: <ver>-minimal (make-check family) and <ver>-full (interop
superset), for ubuntu-22.04 and 24.04.
- install-apt-deps gains a ghcr-debs-tag input: pull the bundle and install
offline (--no-download) so the apt mirror is never on the PR critical
path. Any failure (bundle missing/not public/incomplete) falls through to
the existing apt path, so it is always safe to set.
sim-test buildx layers to a shared ghcr registry cache
- the 7 docker simulator workflows switch from cache-to: type=gha to
ghcr.io/wolfssl/wolfssl-sim-cache:<scope>. cache-from reads on every run
(anonymous); cache-to writes only on the weekend cron and manual
workflow_dispatch. Per-distinct-image tags and de-duplicated writers keep
parallel matrix jobs from racing on one ref.
ccache: PRs read, the schedule writes
- ccache-setup gains read-only: PR runs restore the shared master-scoped
cache but never upload; schedule/push runs refresh it. Wired across
os-check (linux + macOS), pq-all, smoke-test and the 12 small make-check
workflows.
- parallel-make-check.py gains --build-only (compile every config, skip the
test phase) so weekday-morning seed crons warm the cache PR runs consume.
artifact retention capped at 7 days on the failure-log/result uploads that
previously defaulted to 90.
ONE-TIME SETUP: after their first publish, make the ghcr packages
wolfssl-ci-debs and wolfssl-sim-cache PUBLIC so anonymous pulls work from PR
(including fork) runs; until then everything falls back cleanly.
559 lines
25 KiB
Python
Executable File
559 lines
25 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Build and "make check" a set of configurations, each in its own out-of-tree
|
|
# (VPATH) build directory, on a pool of worker threads (default: one per
|
|
# CPU); each thread takes the next pending config as soon as it is free.
|
|
# The final summary reports how efficiently the pool used the machine
|
|
# (thread occupancy and CPU utilization).
|
|
#
|
|
# The configurations come from a JSON file ("-" for stdin): a list of
|
|
# objects, one per configuration. Recognized keys, all optional except
|
|
# "name" (unknown keys are an error, so typos do not pass silently):
|
|
#
|
|
# name unique identifier; the config builds in build-<name>/
|
|
# ("aux" and "test" are reserved: build-aux/, build-test/)
|
|
# configure list of extra ./configure arguments
|
|
# cc compiler passed to configure as CC=, overriding --cc
|
|
# ("" leaves CC entirely to configure / the environment)
|
|
# cflags CFLAGS for make, overriding --cflags
|
|
# ldflags LDFLAGS for make, overriding --ldflags
|
|
# minutes expected duration, from the Minutes column of a previous
|
|
# run's summary (default 1.0). Schedule weight only - configs
|
|
# run longest-first and --shard balances shards by it; a stale
|
|
# value just packs the schedule a little worse, but a run
|
|
# whose measured time lands more than +/-50% away from it
|
|
# draws a warning (never a failure) so it is easy to spot
|
|
# and update.
|
|
# user_settings header staged as <builddir>/user_settings.h before
|
|
# configure (path relative to the source root); pair it with
|
|
# --enable-usersettings in "configure"
|
|
# check false skips the make-check phase entirely (default true)
|
|
# prepare list of argv lists run in the build dir before configure
|
|
# run list of argv lists run in the build dir after the build and
|
|
# checks, e.g. [["wolfcrypt/test/testwolfcrypt"]]
|
|
# comment ignored; JSON has no comment syntax, so notes go here
|
|
#
|
|
# For example:
|
|
#
|
|
# [
|
|
# {"name": "default"},
|
|
# {"name": "all-asan", "configure": ["--enable-all"],
|
|
# "cflags": "-fsanitize=address", "ldflags": "-fsanitize=address"}
|
|
# ]
|
|
#
|
|
# Driven by CI workflows, which keep their config lists next to the
|
|
# invocation (see .github/workflows/smoke-test.yml), but also runnable
|
|
# locally - copy the JSON block out of the workflow into a file:
|
|
#
|
|
# .github/scripts/parallel-make-check.py configs.json # all configs
|
|
# .github/scripts/parallel-make-check.py configs.json default all-asan
|
|
# .github/scripts/parallel-make-check.py --list configs.json
|
|
#
|
|
# Concurrent "make check" runs are safe because the test scripts re-exec
|
|
# themselves under "bwrap --unshare-net" when bubblewrap is installed (one
|
|
# network namespace each) and the remaining test outputs land in the build
|
|
# directory; see --private-dir for the exception.
|
|
#
|
|
# The first failing config aborts the others (pending configs are skipped,
|
|
# in-flight ones get SIGTERM, then SIGKILL after a 10 s grace period) so CI
|
|
# fails fast; pass --no-fail-fast to run everything and report every
|
|
# failure.
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import shutil
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from collections.abc import Callable
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import NoReturn
|
|
|
|
# cflags/ldflags are applied at make time only (never to ./configure) so
|
|
# autoconf feature detection is not poisoned by benign warnings in
|
|
# conftest probes. They are omitted entirely when empty so a plain config
|
|
# keeps the configure-chosen defaults.
|
|
@dataclass
|
|
class Config:
|
|
name: str
|
|
configure: list[str] = field(default_factory=list)
|
|
cc: str = ""
|
|
cflags: str = ""
|
|
ldflags: str = ""
|
|
minutes: float = 1.0
|
|
user_settings: str = ""
|
|
check: bool = True
|
|
prepare: list[list[str]] = field(default_factory=list)
|
|
run: list[list[str]] = field(default_factory=list)
|
|
# Whether "minutes" was given in the JSON (vs the 1.0 default); only an
|
|
# explicit estimate is checked for >50% drift against the real time.
|
|
minutes_provided: bool = False
|
|
|
|
SRCDIR = Path(__file__).resolve().parents[2]
|
|
ON_GITHUB = os.environ.get("GITHUB_ACTIONS") == "true"
|
|
print_lock = threading.Lock()
|
|
|
|
# Fail-fast state: the first failure sets stop_event (under fail_lock, so
|
|
# exactly one config is reported as the origin) and kills the other
|
|
# workers' in-flight process groups.
|
|
stop_event = threading.Event()
|
|
fail_lock = threading.Lock()
|
|
live_procs: set[subprocess.Popen] = set()
|
|
procs_lock = threading.Lock()
|
|
|
|
|
|
def kill_group(p: subprocess.Popen, sig: signal.Signals) -> None:
|
|
# Every subprocess starts its own session, so signalling the process
|
|
# group takes down the whole make/test tree under it.
|
|
try:
|
|
os.killpg(p.pid, sig)
|
|
except (ProcessLookupError, PermissionError):
|
|
try:
|
|
p.send_signal(sig)
|
|
except ProcessLookupError:
|
|
pass
|
|
|
|
|
|
def abort_others() -> None:
|
|
with procs_lock:
|
|
procs = list(live_procs)
|
|
for p in procs:
|
|
kill_group(p, signal.SIGTERM)
|
|
# Bounded escalation: SIGKILL whatever ignored the SIGTERM, so
|
|
# fail-fast cannot hang behind a test that traps/ignores SIGTERM.
|
|
deadline = time.monotonic() + 10
|
|
while any(p.poll() is None for p in procs):
|
|
if time.monotonic() > deadline:
|
|
for p in procs:
|
|
if p.poll() is None:
|
|
kill_group(p, signal.SIGKILL)
|
|
break
|
|
time.sleep(0.2)
|
|
|
|
|
|
def nproc() -> int:
|
|
# Like nproc(1): CPUs usable by this process, falling back to all online.
|
|
try:
|
|
return len(os.sched_getaffinity(0))
|
|
except AttributeError:
|
|
return os.cpu_count() or 1
|
|
|
|
|
|
def load_configs(opts: argparse.Namespace,
|
|
error: Callable[[str], NoReturn]) -> list[Config]:
|
|
try:
|
|
if opts.json == "-":
|
|
entries = json.load(sys.stdin)
|
|
else:
|
|
entries = json.loads(Path(opts.json).read_text())
|
|
except (OSError, ValueError) as e:
|
|
error(f"{opts.json}: {e}")
|
|
if not isinstance(entries, list):
|
|
error(f"{opts.json}: expected a JSON list of config objects")
|
|
configs = []
|
|
for entry in entries:
|
|
if not isinstance(entry, dict):
|
|
error(f"{opts.json}: config entries must be objects: {entry!r}")
|
|
unknown = set(entry) - {"name", "configure", "cc", "cflags",
|
|
"ldflags", "minutes", "user_settings",
|
|
"check", "prepare", "run", "comment"}
|
|
if unknown:
|
|
error(f"{opts.json}: unknown key(s) in {entry.get('name', entry)!r}: "
|
|
f"{' '.join(sorted(unknown))}")
|
|
name = entry.get("name")
|
|
if not isinstance(name, str) or not name or "/" in name:
|
|
error(f"{opts.json}: every config needs a \"name\" usable as a "
|
|
f"directory suffix: {entry!r}")
|
|
# build-<name> dirs that are not ours to wipe: build-aux/ is
|
|
# autotools' aux-script dir (autogen.sh), build-test/ a legacy
|
|
# build dir (.gitignore).
|
|
if name in ("aux", "test"):
|
|
error(f"{opts.json}: reserved config name {name!r}: build-{name}/ "
|
|
f"belongs to other tooling")
|
|
if any(cfg.name == name for cfg in configs):
|
|
error(f"{opts.json}: duplicate config name {name!r}")
|
|
configure = entry.get("configure", [])
|
|
if not (isinstance(configure, list)
|
|
and all(isinstance(a, str) for a in configure)):
|
|
error(f"{opts.json}: \"configure\" must be a list of argument "
|
|
f"strings in {name!r}")
|
|
for key in ("cflags", "ldflags"):
|
|
if not isinstance(entry.get(key, ""), str):
|
|
error(f"{opts.json}: \"{key}\" must be a string in {name!r}")
|
|
minutes = entry.get("minutes", 1.0)
|
|
if isinstance(minutes, bool) or not isinstance(minutes, (int, float)) \
|
|
or minutes < 0:
|
|
error(f"{opts.json}: \"minutes\" must be a non-negative number "
|
|
f"in {name!r}")
|
|
user_settings = entry.get("user_settings", "")
|
|
if not isinstance(user_settings, str):
|
|
error(f"{opts.json}: \"user_settings\" must be a path string "
|
|
f"in {name!r}")
|
|
check = entry.get("check", True)
|
|
if not isinstance(check, bool):
|
|
error(f"{opts.json}: \"check\" must be a boolean in {name!r}")
|
|
cc = entry.get("cc", opts.cc or "")
|
|
if not isinstance(cc, str):
|
|
error(f"{opts.json}: \"cc\" must be a string in {name!r}")
|
|
for key in ("prepare", "run"):
|
|
cmds = entry.get(key, [])
|
|
if not (isinstance(cmds, list)
|
|
and all(isinstance(cmd, list) and cmd
|
|
and all(isinstance(a, str) for a in cmd)
|
|
for cmd in cmds)):
|
|
error(f"{opts.json}: \"{key}\" must be a list of argv lists "
|
|
f"in {name!r}")
|
|
configs.append(Config(name, list(configure), cc,
|
|
entry.get("cflags", opts.cflags),
|
|
entry.get("ldflags", opts.ldflags),
|
|
float(minutes), user_settings, check,
|
|
list(entry.get("prepare", [])),
|
|
list(entry.get("run", [])),
|
|
minutes_provided="minutes" in entry))
|
|
if not configs:
|
|
error(f"{opts.json}: no configs")
|
|
return configs
|
|
|
|
|
|
def privatize_dirs(bdir: Path, dirs: list[str]) -> None:
|
|
# Replace build-tree symlinks into the source tree with private
|
|
# per-build-dir copies: tests that write into these directories would
|
|
# otherwise write through the symlink into the shared source tree and
|
|
# race with the other parallel checks. Runs after the build steps so
|
|
# that build rules which (re)create the symlinks have already run.
|
|
for name in dirs:
|
|
d = bdir / name
|
|
if d.is_symlink():
|
|
d.unlink()
|
|
shutil.copytree(SRCDIR / name, d, symlinks=True)
|
|
|
|
|
|
def gh_escape(data: str) -> str:
|
|
# Percent-encode workflow-command data (GitHub's documented encoding)
|
|
# so a stray %, CR or LF - e.g. from a config name or step out of the
|
|
# JSON - can't truncate the command or be parsed as a second one.
|
|
return data.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
|
|
|
|
|
|
def dump(title: str, path: Path) -> None:
|
|
# ::group:: is a workflow command; escape its title like warn() does.
|
|
print(f"::group::{gh_escape(title)}" if ON_GITHUB else f"==== {title} ====")
|
|
try:
|
|
sys.stdout.write(path.read_text(errors="replace"))
|
|
except OSError as e:
|
|
print(e)
|
|
if ON_GITHUB:
|
|
print("::endgroup::")
|
|
sys.stdout.flush()
|
|
|
|
|
|
def warn(msg: str) -> None:
|
|
# GitHub surfaces ::warning:: as an annotation at the top of the run;
|
|
# locally it is just a line. Informational only - never fails the run.
|
|
print(f"::warning::{gh_escape(msg)}" if ON_GITHUB else f"WARNING: {msg}")
|
|
|
|
|
|
def stale_estimate(cfg: Config, minutes: float) -> bool:
|
|
# "minutes" is only a scheduling estimate (configs run longest-first;
|
|
# --shard balances by it), never a pass/fail bound. Flag a finished
|
|
# config whose real time drifted past +/-50% of an explicitly given
|
|
# estimate so stale values - which pack the schedule worse - are easy
|
|
# to find and update. Configs that omit "minutes" ride the 1.0 default
|
|
# placeholder and are left alone.
|
|
return (cfg.minutes_provided
|
|
and not 0.5 * cfg.minutes <= minutes <= 1.5 * cfg.minutes)
|
|
|
|
|
|
def run_config(cfg: Config, opts: argparse.Namespace) -> tuple[str | None,
|
|
float]:
|
|
if opts.fail_fast and stop_event.is_set():
|
|
return "aborted", 0.0
|
|
bdir = SRCDIR / f"build-{cfg.name}"
|
|
if bdir.exists():
|
|
shutil.rmtree(bdir)
|
|
bdir.mkdir()
|
|
configure = [str(SRCDIR / "configure")] + cfg.configure
|
|
if cfg.cc:
|
|
configure.append(f"CC={cfg.cc}")
|
|
flags = [f"CFLAGS={cfg.cflags}"] if cfg.cflags else []
|
|
flags += [f"LDFLAGS={cfg.ldflags}"] if cfg.ldflags else []
|
|
# No -j here: wolfSSL's configure enables make's jobserver by default
|
|
# (AX_AM_JOBSERVER adds AM_MAKEFLAGS += -j<nproc+1>), and that explicit
|
|
# -j on every automake sub-make overrides whatever the top-level make
|
|
# was given, so a -j here would only schedule the outermost recursion
|
|
# hop. Measured across this pool, the jobserver default also utilizes
|
|
# the CPUs better than a capped -j (configs' serial phases - configure,
|
|
# link - get backfilled by other configs' compile jobs).
|
|
make = ["make"] + flags
|
|
steps: list[tuple[str, list[str] | Callable[[], object]]] = []
|
|
if cfg.user_settings:
|
|
# Staged before configure; --enable-usersettings builds pick it up
|
|
# from the build dir via the default include path.
|
|
steps.append((f"stage {cfg.user_settings}",
|
|
lambda: shutil.copy(SRCDIR / cfg.user_settings,
|
|
bdir / "user_settings.h")))
|
|
steps += [(" ".join(cmd), cmd) for cmd in cfg.prepare]
|
|
steps += [("configure", configure), ("make", make)]
|
|
if cfg.check:
|
|
steps += [
|
|
# Prebuild the check programs without running any tests so
|
|
# "make check" below is pure test execution.
|
|
("make check TESTS=", make + ["check", "TESTS="]),
|
|
("private dirs", lambda: privatize_dirs(bdir, opts.private_dir)),
|
|
("make check", ["make"] + flags + ["check"]),
|
|
]
|
|
steps += [(" ".join(cmd), cmd) for cmd in cfg.run]
|
|
failed: str | None = None
|
|
start = time.monotonic()
|
|
log = bdir / "make-check.log"
|
|
|
|
def record_failure(step: str) -> str:
|
|
# Classify a failed step, doing the fail-fast bookkeeping: the
|
|
# first failure wins and aborts everyone else; any failure after
|
|
# the abort began is reported as aborted instead.
|
|
if not opts.fail_fast:
|
|
return step
|
|
with fail_lock:
|
|
label = "aborted" if stop_event.is_set() else step
|
|
stop_event.set()
|
|
if label != "aborted":
|
|
abort_others()
|
|
return label
|
|
|
|
with open(log, "w") as logf:
|
|
for step, cmd in steps:
|
|
if opts.fail_fast and stop_event.is_set():
|
|
failed = "aborted"
|
|
break
|
|
if callable(cmd):
|
|
try:
|
|
cmd()
|
|
except Exception as e: # one config's bug, not the run's
|
|
print(f"+ {step}: {e!r}", file=logf, flush=True)
|
|
failed = record_failure(step)
|
|
break
|
|
continue
|
|
print(f"+ {' '.join(cmd)}", file=logf, flush=True)
|
|
# stdin=DEVNULL so a test that reads stdin sees EOF (as in CI)
|
|
# instead of blocking forever on an interactive/socket stdin.
|
|
proc = subprocess.Popen(cmd, cwd=bdir, stdout=logf,
|
|
stderr=subprocess.STDOUT,
|
|
stdin=subprocess.DEVNULL,
|
|
start_new_session=True)
|
|
with procs_lock:
|
|
live_procs.add(proc)
|
|
if opts.fail_fast and stop_event.is_set():
|
|
# Close the race with abort_others(): if its sweep ran
|
|
# between our stop_event check above and the registration
|
|
# just now, this process escaped the sweep - kill it
|
|
# ourselves (the wait() below then reaps it), escalating
|
|
# like the sweep does if SIGTERM is ignored.
|
|
kill_group(proc, signal.SIGTERM)
|
|
try:
|
|
proc.wait(timeout=10)
|
|
except subprocess.TimeoutExpired:
|
|
kill_group(proc, signal.SIGKILL)
|
|
try:
|
|
rc = proc.wait()
|
|
finally:
|
|
with procs_lock:
|
|
live_procs.discard(proc)
|
|
if rc != 0:
|
|
failed = record_failure(step)
|
|
break
|
|
minutes = (time.monotonic() - start) / 60
|
|
with print_lock:
|
|
if failed == "aborted":
|
|
print(f"{cfg.name}: aborted (fail-fast) [{minutes:.1f} min]")
|
|
sys.stdout.flush()
|
|
elif not failed:
|
|
# One line per passing config; the full logs would bloat the CI
|
|
# log (they stay in build-<name>/make-check.log).
|
|
print(f"{cfg.name}: pass [{minutes:.1f} min]")
|
|
if stale_estimate(cfg, minutes):
|
|
warn(f"{cfg.name}: ran {minutes:.1f} min but \"minutes\" "
|
|
f"says {cfg.minutes:g} (>50% off) - update it in the "
|
|
f"config JSON")
|
|
sys.stdout.flush()
|
|
else:
|
|
dump(f"{cfg.name}: FAIL ({failed}) [{minutes:.1f} min]", log)
|
|
if failed == "configure":
|
|
dump(f"{cfg.name}: config.log", bdir / "config.log")
|
|
elif failed == "make check":
|
|
dump(f"{cfg.name}: test-suite.log", bdir / "test-suite.log")
|
|
return failed, minutes
|
|
|
|
|
|
def summarize(results: list[tuple[Config, str | None, float]],
|
|
wall_min: float, cpu_min: float, nthreads: int) -> None:
|
|
lines = ["| Config | Result | Minutes |", "|---|---|---|"]
|
|
for cfg, failed, minutes in results:
|
|
if failed == "aborted":
|
|
ok = ":heavy_minus_sign: aborted (fail-fast)"
|
|
elif failed:
|
|
ok = f":x: FAIL ({failed})"
|
|
else:
|
|
ok = ":white_check_mark: pass"
|
|
if stale_estimate(cfg, minutes):
|
|
# Non-fatal nudge mirroring the per-config warning, kept in
|
|
# the summary next to the Minutes value to copy over.
|
|
ok += (f' :warning: "minutes" {cfg.minutes:g} is >50% off, '
|
|
f"update to ~{minutes:.1f}")
|
|
lines.append(f"| {cfg.name} | {ok} | {minutes:.1f} |")
|
|
# Two views of how efficiently the pool used the machine: thread
|
|
# occupancy is the time the workers spent running configs out of the
|
|
# thread-minutes available (a long config left for last idles the other
|
|
# workers and drags it down); CPU utilization is the CPU time the build
|
|
# and test children actually consumed out of the CPU-minutes available
|
|
# (serial configure/link/test phases show up here).
|
|
busy_min = sum(minutes for _, _, minutes in results)
|
|
ncpu = nproc()
|
|
lines += [
|
|
"",
|
|
f"{len(results)} configs in {wall_min:.1f} min on {nthreads} "
|
|
f"threads / {ncpu} CPUs: "
|
|
f"thread occupancy {100 * busy_min / (wall_min * nthreads):.0f}% "
|
|
f"({busy_min:.1f} of {wall_min * nthreads:.1f} thread-min), "
|
|
f"CPU utilization {100 * cpu_min / (wall_min * ncpu):.0f}% "
|
|
f"({cpu_min:.1f} of {wall_min * ncpu:.1f} CPU-min)",
|
|
]
|
|
table = "\n".join(lines)
|
|
print(table)
|
|
summary = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
if summary:
|
|
with open(summary, "a") as f:
|
|
print(f"### make check\n\n{table}", file=f)
|
|
|
|
|
|
def main() -> int:
|
|
p = argparse.ArgumentParser(
|
|
description="Build and make check every configuration from a JSON "
|
|
"file in its own out-of-tree build directory, in "
|
|
"parallel.")
|
|
p.add_argument("json", metavar="CONFIGS.json",
|
|
help="JSON list of configs (see the script header for "
|
|
"the format), or - for stdin")
|
|
p.add_argument("configs", nargs="*", metavar="NAME",
|
|
help="configs to run (default: all)")
|
|
p.add_argument("--list", action="store_true", help="list configs")
|
|
p.add_argument("--threads", type=int, default=nproc(),
|
|
help="worker threads; each takes the next pending config "
|
|
"when it is free (default: nproc)")
|
|
p.add_argument("--shard", metavar="K/N",
|
|
help="run only the K-th (1-based) of N shards; configs "
|
|
"are dealt to shards greedily by descending "
|
|
"\"minutes\" so the shards' totals come out even")
|
|
p.add_argument("--fail-fast", action=argparse.BooleanOptionalAction,
|
|
default=True,
|
|
help="abort everything after the first failing config: "
|
|
"pending configs are skipped and in-flight ones "
|
|
"killed (--no-fail-fast runs everything and "
|
|
"reports every failure)")
|
|
p.add_argument("--cc", default="ccache gcc" if shutil.which("ccache")
|
|
else None,
|
|
help="compiler passed to configure as CC= for configs "
|
|
"that do not set their own \"cc\"")
|
|
p.add_argument("--cflags", default="",
|
|
help="CFLAGS for configs that do not set their own")
|
|
p.add_argument("--ldflags", default="",
|
|
help="LDFLAGS for configs that do not set their own")
|
|
p.add_argument("--private-dir", action="append", default=[],
|
|
metavar="DIR",
|
|
help="give each build dir a private copy of this "
|
|
"symlinked source directory before make check, for "
|
|
"tests that write into it (repeatable)")
|
|
p.add_argument("--build-only", action="store_true",
|
|
help="build every config but skip the make-check phase "
|
|
"and any post-build \"run\" commands: the compile "
|
|
"still populates ccache, which is the point when "
|
|
"seeding a shared cache on a schedule")
|
|
opts = p.parse_args()
|
|
|
|
all_configs = load_configs(opts, p.error)
|
|
if opts.build_only:
|
|
# Pure build: drop the check phase (and post-build "run" steps) for
|
|
# every config. The compile alone fully populates ccache, so a
|
|
# scheduled --build-only pass on the default branch warms the
|
|
# shared cache that PR runs restore, without spending time on tests.
|
|
for cfg in all_configs:
|
|
cfg.check = False
|
|
cfg.run = []
|
|
selected = all_configs
|
|
if opts.configs:
|
|
by_name = {cfg.name: cfg for cfg in all_configs}
|
|
unknown = [n for n in opts.configs if n not in by_name]
|
|
if unknown:
|
|
p.error(f"unknown config(s): {' '.join(unknown)}")
|
|
selected = [by_name[n] for n in opts.configs]
|
|
|
|
# Longest first, so the heavyweights never straggle on an otherwise
|
|
# idle machine. Stable: configs without "minutes" keep list order.
|
|
selected = sorted(selected, key=lambda cfg: -cfg.minutes)
|
|
if opts.shard:
|
|
try:
|
|
k, n = map(int, opts.shard.split("/"))
|
|
except ValueError:
|
|
k = n = 0
|
|
if not 1 <= k <= n:
|
|
p.error(f"--shard: expected K/N with 1 <= K <= N, "
|
|
f"got {opts.shard!r}")
|
|
# Greedy multiway partition: longest first into the least-loaded
|
|
# shard. Deterministic; if the "minutes" are accurate, the worst
|
|
# shard ends up within about one config's minutes of optimal.
|
|
shards, loads = [[] for _ in range(n)], [0.0] * n
|
|
for cfg in selected:
|
|
i = loads.index(min(loads))
|
|
shards[i].append(cfg)
|
|
loads[i] += cfg.minutes
|
|
selected = shards[k - 1]
|
|
|
|
if opts.list:
|
|
for cfg in selected:
|
|
print(f"{cfg.name} [{cfg.minutes:g} min]: "
|
|
f"{' '.join(cfg.configure)}")
|
|
return 0
|
|
if not selected:
|
|
print(f"shard {opts.shard}: no configs to run")
|
|
return 0
|
|
|
|
if not (SRCDIR / "configure").exists():
|
|
subprocess.run(["./autogen.sh"], cwd=SRCDIR, check=True)
|
|
|
|
nthreads = max(1, min(opts.threads, len(selected)))
|
|
wall_start = time.monotonic()
|
|
cpu_start = os.times()
|
|
def run_one(cfg: Config) -> tuple[Config, str | None, float]:
|
|
failed, minutes = run_config(cfg, opts)
|
|
return cfg, failed, minutes
|
|
|
|
with ThreadPoolExecutor(max_workers=nthreads) as pool:
|
|
results = list(pool.map(run_one, selected))
|
|
wall_min = (time.monotonic() - wall_start) / 60
|
|
cpu_end = os.times()
|
|
# os.times() child counters cover the waited-for configure/make
|
|
# subprocesses of every worker thread.
|
|
cpu_min = (cpu_end.children_user - cpu_start.children_user
|
|
+ cpu_end.children_system - cpu_start.children_system) / 60
|
|
summarize(results, wall_min, cpu_min, nthreads)
|
|
failed = [cfg.name for cfg, failure, _ in results
|
|
if failure and failure != "aborted"]
|
|
aborted = sum(1 for _, failure, _ in results if failure == "aborted")
|
|
if failed or aborted:
|
|
msg = f"make check failed for: {' '.join(failed)}" if failed \
|
|
else "aborted without a recorded failure"
|
|
if aborted:
|
|
msg += f" ({aborted} config(s) aborted by fail-fast)"
|
|
print(f"::error::{gh_escape(msg)}" if ON_GITHUB else msg)
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|