Compare commits

...

3 Commits

Author SHA1 Message Date
Robert Resch 80bf3d0f04 update lock file 2026-05-19 18:38:14 +00:00
Robert Resch 4261b7244e Merge branch 'dev' into edenhaus/dependencies-security-check 2026-05-19 18:33:04 +00:00
Robert Resch a6e30029aa Add basic security check to dependency workflow 2026-05-18 18:55:35 +00:00
2 changed files with 210 additions and 36 deletions
+15 -15
View File
@@ -1,4 +1,4 @@
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"62eb6e3d38092bd041a0c1ddfdaef94cf4b9c694b2d2bcac6cbbecd6810230ca","compiler_version":"v0.74.4","strict":true,"agent_id":"copilot"}
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"1520b9c030f2826a211027ca57be8e2af424502aa190380c6860d9f777425eac","compiler_version":"v0.74.4","strict":true,"agent_id":"copilot"}
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"d3abfe96a194bce3a523ed2093ddedd5704cdf62","version":"v0.74.4"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.46"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.46"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.46"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.9","digest":"sha256:64828b42a4482f58fab16509d7f8f495a6d97c972a98a68aff20543531ac0388","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.9@sha256:64828b42a4482f58fab16509d7f8f495a6d97c972a98a68aff20543531ac0388"},{"image":"ghcr.io/github/github-mcp-server:v1.0.4"},{"image":"node:lts-alpine","digest":"sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f","pinned_image":"node:lts-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f"}]}
# ___ _ _
# / _ \ | | (_)
@@ -22,7 +22,7 @@
#
# For more information: https://github.github.com/gh-aw/introduction/overview/
#
# Checks changed Python package requirements on PRs targeting the core repo (including PRs opened from forks) and verifies licenses match PyPI metadata, source repositories are publicly accessible, PyPI releases were uploaded via automated CI (Trusted Publisher attestation), the package's release pipeline uses OIDC or equivalent automated credentials (not static tokens), and the PR description contains the required links.
# Checks changed Python package requirements on PRs targeting the core repo (including PRs opened from forks) and verifies licenses match PyPI metadata, source repositories are publicly accessible, PyPI releases were uploaded via automated CI (Trusted Publisher attestation), the package's release pipeline uses OIDC or equivalent automated credentials (not static tokens), the package source does not show obvious signs of malicious behavior (Home Assistant secret/config access, download-and-execute, install-time arbitrary code, credential exfiltration, obfuscated dynamic execution), and the PR description contains the required links.
#
# Secrets used:
# - COPILOT_GITHUB_TOKEN
@@ -188,20 +188,20 @@ jobs:
run: |
bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
{
cat << 'GH_AW_PROMPT_2df1318dbe2d4011_EOF'
cat << 'GH_AW_PROMPT_3604055927054880_EOF'
<system>
GH_AW_PROMPT_2df1318dbe2d4011_EOF
GH_AW_PROMPT_3604055927054880_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
cat << 'GH_AW_PROMPT_2df1318dbe2d4011_EOF'
cat << 'GH_AW_PROMPT_3604055927054880_EOF'
<safe-output-tools>
Tools: add_comment, missing_tool, missing_data, noop
</safe-output-tools>
GH_AW_PROMPT_2df1318dbe2d4011_EOF
GH_AW_PROMPT_3604055927054880_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/mcp_cli_tools_prompt.md"
cat << 'GH_AW_PROMPT_2df1318dbe2d4011_EOF'
cat << 'GH_AW_PROMPT_3604055927054880_EOF'
<github-context>
The following GitHub context information is available for this workflow:
{{#if github.actor}}
@@ -230,12 +230,12 @@ jobs:
{{/if}}
</github-context>
GH_AW_PROMPT_2df1318dbe2d4011_EOF
GH_AW_PROMPT_3604055927054880_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
cat << 'GH_AW_PROMPT_2df1318dbe2d4011_EOF'
cat << 'GH_AW_PROMPT_3604055927054880_EOF'
</system>
{{#runtime-import .github/workflows/check-requirements.md}}
GH_AW_PROMPT_2df1318dbe2d4011_EOF
GH_AW_PROMPT_3604055927054880_EOF
} > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
@@ -439,9 +439,9 @@ jobs:
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
mkdir -p /tmp/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << GH_AW_SAFE_OUTPUTS_CONFIG_c7878b8b9775118a_EOF
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << GH_AW_SAFE_OUTPUTS_CONFIG_38b421c33811530a_EOF
{"add_comment":{"max":1,"target":"${GH_AW_INPUT_PULL_REQUEST_NUMBER}"},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}}
GH_AW_SAFE_OUTPUTS_CONFIG_c7878b8b9775118a_EOF
GH_AW_SAFE_OUTPUTS_CONFIG_38b421c33811530a_EOF
- name: Generate Safe Outputs Tools
env:
GH_AW_TOOLS_META_JSON: |
@@ -633,7 +633,7 @@ jobs:
mkdir -p /home/runner/.copilot
GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node)
cat << GH_AW_MCP_CONFIG_103328ae7b98b0c7_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs"
cat << GH_AW_MCP_CONFIG_73bab7f9eb3c0972_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs"
{
"mcpServers": {
"github": {
@@ -677,7 +677,7 @@ jobs:
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
}
}
GH_AW_MCP_CONFIG_103328ae7b98b0c7_EOF
GH_AW_MCP_CONFIG_73bab7f9eb3c0972_EOF
- name: Mount MCP servers as CLIs
id: mount-mcp-clis
continue-on-error: true
@@ -1161,7 +1161,7 @@ jobs:
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
env:
WORKFLOW_NAME: "Check requirements"
WORKFLOW_DESCRIPTION: "Checks changed Python package requirements on PRs targeting the core repo (including PRs opened from forks) and verifies licenses match PyPI metadata, source repositories are publicly accessible, PyPI releases were uploaded via automated CI (Trusted Publisher attestation), the package's release pipeline uses OIDC or equivalent automated credentials (not static tokens), and the PR description contains the required links."
WORKFLOW_DESCRIPTION: "Checks changed Python package requirements on PRs targeting the core repo (including PRs opened from forks) and verifies licenses match PyPI metadata, source repositories are publicly accessible, PyPI releases were uploaded via automated CI (Trusted Publisher attestation), the package's release pipeline uses OIDC or equivalent automated credentials (not static tokens), the package source does not show obvious signs of malicious behavior (Home Assistant secret/config access, download-and-execute, install-time arbitrary code, credential exfiltration, obfuscated dynamic execution), and the PR description contains the required links."
HAS_PATCH: ${{ needs.agent.outputs.has_patch }}
with:
script: |
+195 -21
View File
@@ -45,8 +45,11 @@ description: >
(including PRs opened from forks) and verifies licenses match PyPI metadata, source
repositories are publicly accessible, PyPI releases were uploaded via
automated CI (Trusted Publisher attestation), the package's release pipeline
uses OIDC or equivalent automated credentials (not static tokens), and the PR
description contains the required links.
uses OIDC or equivalent automated credentials (not static tokens), the
package source does not show obvious signs of malicious behavior (Home
Assistant secret/config access, download-and-execute, install-time
arbitrary code, credential exfiltration, obfuscated dynamic execution),
and the PR description contains the required links.
---
# Check requirements
@@ -307,7 +310,158 @@ Bitbucket, Codeberg, Gitea, Sourcehut):
3. If no CI configuration can be retrieved, mark ⚠️ — "Release pipeline could
not be inspected; hosting provider is not GitHub or GitLab."
## Step 7 — Post a Review Comment
## Step 7 — Security Sanity Check
This step is a **baseline check only** — a cheap first pass intended to catch
the most obvious supply-chain red flags. It is not a security review, not a
malware audit, and not a substitute for human judgement. A clean result here
means "nothing obvious stood out in a quick scan", not "this package is
safe". Reviewers and maintainers remain responsible for the real security
assessment.
For each new or bumped package, perform a lightweight scan of the package's
source for behavior patterns that have historically appeared in supply-chain
attacks. The goal is to surface suspicious code paths so a human reviewer can
decide.
Use the source repository URL recorded in Step 3. Skip this step and mark `—`
when:
- The repository is not publicly accessible (Step 5 failed), **or**
- The host is neither GitHub nor GitLab and no CI configuration files were
retrievable in Step 6.
### 7a — Fetch a representative slice of the source
- For **GitHub** repos:
1. Resolve the default branch via `GET /repos/{owner}/{repo}`.
2. List the tree with
`GET /repos/{owner}/{repo}/git/trees/{default_branch}?recursive=1`.
3. Identify the package's actual Python module directory (`{package_name}/`
or `src/{package_name}/`, normalising `-``_`).
- For **GitLab** repos use the equivalent REST API calls; for any non-GitHub
host fall back to `web-fetch` of raw file URLs.
- Fetch the **raw contents** of:
- `setup.py` if present — install-time code runs on every consumer machine.
- `pyproject.toml` — inspect `[build-system]` and any custom build backend.
- The package's `__init__.py`.
- Up to **8** additional Python files inside the package directory,
prioritising files referenced from `entry_points`, plus any file whose
name suggests bootstrap, loader, or self-update behavior
(`update*.py`, `loader*.py`, `bootstrap*.py`, `_native.py`,
`_post_install*.py`, etc.).
If the source tree is too large to inspect within the available API budget,
inspect at least `setup.py`, `pyproject.toml`, and the package's
`__init__.py`, then mark this step ⚠️ with a note that only a partial scan
was performed.
### 7b — Patterns to flag
Reason from principles, not a fixed checklist. For each fetched file, ask:
*would a well-behaved Python library that does what this package's PyPI
description claims to do need to do this?* If the answer is "no" or
"unclear", record a finding. The categories below describe the **shape** of
concerning behavior; the specific APIs, filenames, and storage keys mentioned
are illustrative examples — treat any equivalent construct (including ones
that did not exist when this workflow was written) the same way.
For every finding include the file path, line number, a snippet
(≤ 120 chars), a permalink of the form
`https://github.com/{owner}/{repo}/blob/{sha}/{path}#L{line}` (or the
GitLab equivalent), and one sentence explaining why the behavior is out of
scope for the package's stated purpose.
1. **Reaches outside the package's declared scope into Home Assistant
internals.**
A third-party library should interact with Home Assistant only through
the public, documented Python API it imports — never by touching the
filesystem of `config_dir` or by reading internal authentication /
session state. Flag any code that opens, reads, writes, or resolves paths
to artifacts it does not own (top-level YAML it did not create, anything
under `.storage/`, files owned by other integrations / domains), or that
reads tokens, refresh tokens, auth providers, or other internal session
state. Examples like `secrets.yaml`, `.storage/auth*`, `hass.auth`, or
`hass.config.path("secrets.yaml")` are illustrative — the principle is
*out-of-scope access*, not a static list of names.
2. **Network input flows into an execution sink (download-and-execute).**
Bytes obtained from a remote source must never reach an interpreter.
Flag any data-flow path where the response body of a network call
(any HTTP / WebSocket / raw-socket client, sync or async) ends up at
*any* execution sink: `exec`, `eval`, `compile`, `marshal.loads`,
`pickle.loads`, `types.FunctionType`, `importlib.util.spec_from_loader`,
`subprocess.*`, `os.system`, shell pipelines such as `curl … | sh`, or
a file that is subsequently imported or executed. The same applies to
package-manager invocations (`pip install`, `pip download`, …) whose
arguments are resolved from network responses at runtime. Future
language or stdlib features that achieve the same effect should be
treated identically.
3. **Build-time or install-time code is non-deterministic or non-local.**
`setup.py`, `setup.cfg` `cmdclass`, custom PEP 517 backends, and any
other build hook must be self-contained: they may only compile and copy
files that ship in the source distribution. Flag any build-stage code
that opens a network socket, shells out to external binaries, writes
outside the build / install tree, or pulls in a build backend whose
source is not on PyPI (e.g. referenced via Git URL or local path).
4. **Reads user secrets and combines them with an egress path.**
The concerning shape is *secret-source → outbound-channel*, not any
single API. Flag code that reads credential / authentication material
from the host (environment variables that look like tokens or API keys,
files under the user's home that store credentials, OS keychain APIs,
browser-profile directories, Home Assistant token stores) **and** in the
same code path sends that data to a destination the package does not
need to talk to. Reading secrets alone is not enough; sending data out
alone is not enough; the *combination* is the signal.
5. **Hides what it does from a reader.**
Source that a maintainer cannot reasonably review is itself a smell.
Flag any pattern where opaque data flows into an execution sink: large
encoded / compressed / hex strings (decoded via `base64`, `codecs`,
`zlib`, `lzma`, `bytes.fromhex`, or any future equivalent) passed to
`exec` / `eval` / `compile` / `__import__`; identifiers assembled at
runtime from non-literal pieces and then imported; or any other
construct whose evident purpose is to make the real behavior unreadable.
6. **Hard-coded network destination that does not match the package's
stated purpose.**
Flag outbound URLs or hosts that do not appear in the package's PyPI
`project_urls` and have no obvious connection to its function —
especially short-link / paste services, ephemeral tunnels, raw IP
addresses, or non-default ports against unknown hosts — and any
network call originating from module top-level / `__init__.py` (which
executes on import for every consumer).
If a behavior is clearly out of scope for the package's stated purpose but
does not fit any of the categories above, flag it under whichever category
fits best and explain in the finding. The list of categories is meant to
guide reasoning, not bound it.
### 7c — Outcome
Aggregate the findings per package. **The Security column uses a different
icon set from the other columns** to make clear that a "pass" here is a
baseline result, not a trust signal:
- ☑️ — baseline scan complete and nothing obvious stood out. This is **not**
a ✅ and must not be reported as one. It means "the cheap checks found
nothing"; it does not mean the package is safe. Always use ☑️ in this
column for the success case — never ✅.
- ⚠️ — flagged patterns with plausible legitimate uses (e.g. an integration
helper that legitimately reads `configuration.yaml`, or a self-update
feature documented upstream). List every match so the reviewer can decide.
- ❌ — patterns with no legitimate explanation for a Python dependency, for
example: install-time network execution, decode-and-exec of opaque blobs,
reads of `secrets.yaml` or `.storage/auth*`, or env-var / token
exfiltration to an external host.
Be precise. Include the file path, line number, snippet, and reasoning for
every finding. False positives are expected — when in doubt, prefer ⚠️ with
context over ❌. This step is informational and never blocks the workflow on
its own; a human reviewer decides whether to merge.
## Step 8 — Post a Review Comment
**Always** post a review comment using `add_comment`, regardless of whether
packages pass or fail. Use the following structure:
@@ -323,34 +477,43 @@ Begin every comment with the HTML marker `<!-- requirements-check -->` on its
own line (this is used by the workflow to find the previous comment and update
it on the next run).
### 7a — Overall summary line
### 8a — Overall summary line
Begin the comment with a single summary line, before anything else:
- If everything passed: `All requirements checks passed. ✅`
- If there are failures or warnings: `⚠️ Some checks require attention — see the details below.`
### 7b — Summary table
### 8b — Summary table
Render a compact table where every check column contains **only the status
icon** (✅, ⚠️, or ❌). No explanatory text belongs inside the table cells —
all detail goes in the per-package sections below.
icon**. No explanatory text belongs inside the table cells — all detail goes
in the per-package sections below.
Use `—` (em dash) when a check was skipped (e.g. Release Pipeline is skipped
when the repository is not publicly accessible).
Icon set:
- All columns except **Security** use ✅, ⚠️, or ❌.
- The **Security** column uses ☑️ (baseline scan passed, *not* a trust
signal — see Step 7c), ⚠️, or ❌. Never write ✅ in the Security column.
- Use `—` (em dash) in any column when a check was skipped (e.g. Release
Pipeline and Security are skipped when the repository is not publicly
accessible).
```
<!-- requirements-check -->
## Check requirements
| Package | Type | Old→New | License | Repo Public | CI Upload | Release Pipeline | PR Link | Diff Consistent |
|---------|------|---------|---------|-------------|-----------|------------------|---------|-----------------|
| PackageA | bump | 1.2.3→1.3.0 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| PackageB | new | —→4.5.6 | ❌ | ✅ | ❌ | ⚠️ | ❌ | ✅ |
| PackageC | bump | 2.0.0→2.1.0 | ✅ | ❌ | — | — | ⚠️ | ✅ |
> ☑️ in the **Security** column means a baseline scan found nothing
> obvious — it is **not** an endorsement. See Step 7 for what the scan
> covers and, importantly, what it does not.
| Package | Type | Old→New | License | Repo Public | CI Upload | Release Pipeline | Security | PR Link | Diff Consistent |
|---------|------|---------|---------|-------------|-----------|------------------|----------|---------|-----------------|
| PackageA | bump | 1.2.3→1.3.0 | ✅ | ✅ | ✅ | ✅ | ☑️ | ✅ | ✅ |
| PackageB | new | —→4.5.6 | ❌ | ✅ | ❌ | ⚠️ | ❌ | ❌ | ✅ |
| PackageC | bump | 2.0.0→2.1.0 | ✅ | ❌ | — | — | — | ⚠️ | ✅ |
```
### 7c — Per-package detail sections
### 8c — Per-package detail sections
After the table, add one collapsible `<details>` block per package.
@@ -361,10 +524,11 @@ After the table, add one collapsible `<details>` block per package.
Each block must include the full detail for every check: the license found, the
repository URL, whether a provenance attestation was found, the release
pipeline findings, the PR link found (or missing), and whether the diff is
consistent. For failed or warned checks, explain exactly what the contributor
must fix, including the expected source repository URL, expected version range,
etc.
pipeline findings, the security-scan findings (with file paths, line numbers,
and snippets for any matches), the PR link found (or missing), and whether the
diff is consistent. For failed or warned checks, explain exactly what the
contributor must fix, including the expected source repository URL, expected
version range, etc.
Template (repeat for each package):
@@ -376,6 +540,7 @@ Template (repeat for each package):
- **Repository Public**: ✅ https://github.com/example/packageb is publicly accessible.
- **CI Upload**: ❌ No provenance attestation found for any distribution file. The release may have been uploaded manually.
- **Release Pipeline**: ⚠️ No publish workflow found in the repository; it is unclear how this package is released to PyPI.
- **Security**: ❌ `setup.py` performs a network download and executes the result at install time — see https://github.com/example/packageb/blob/abc123/setup.py#L42 (`exec(urlopen("https://...").read())`).
- **PR Link**: ❌ PR description must link to the source repository at https://github.com/example/packageb (a PyPI page link is not sufficient).
- **Diff Consistent**: ✅
@@ -392,6 +557,7 @@ Collapsed example (all checks passed):
- **Repository Public**: ✅ https://github.com/example/packagea
- **CI Upload**: ✅ Trusted Publisher attestation found (GitHub Actions).
- **Release Pipeline**: ✅ OIDC via `pypa/gh-action-pypi-publish`; triggered on `release: published`; `environment: release` gate.
- **Security**: ☑️ Baseline scan found nothing obvious in `setup.py`, `pyproject.toml`, `__init__.py`, and 6 additional inspected files. This is not a security review — see Step 7 for what was and was not checked.
- **PR Link**: ✅ https://github.com/example/packagea/compare/v1.2.3...v1.3.0
- **Diff Consistent**: ✅
@@ -408,8 +574,16 @@ Collapsed example (all checks passed):
`pyproject.toml` without being tied to a specific integration, the PR
description link requirement still applies.
- When checking test-only packages (from `requirements_test.txt` or
`requirements_test_all.txt`), apply the same license, repository, and PR
description checks as for production dependencies.
`requirements_test_all.txt`), apply the same license, repository, security,
and PR description checks as for production dependencies. Malicious test
dependencies still execute on contributor and CI machines.
- The Security Sanity Check (Step 7) is a **baseline** check, not a full
security review. It is informational only — it surfaces findings for a
human reviewer but never blocks the workflow on its own. The Security
column intentionally uses ☑️ (and *never* ✅) for the success case so
that a passing scan does not read as an endorsement: it only means
nothing obvious stood out. A ❌ is a strong signal to a reviewer, not an
automatic rejection.
- A package that appears in both a production file and a test file should only
be reported once; use the production file entry as the canonical one.
- This workflow is invoked exclusively via `workflow_dispatch`. The stage-1