Skip to content

Commit 46f6157

Browse files
authored
Extract context patterns from webhook schemata (#745)
1 parent 855aa0b commit 46f6157

39 files changed

+5496
-386
lines changed

.github/workflows/codegen.yml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
name: Code generation 🤖
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: '0 12 * * 1'
7+
8+
permissions: {}
9+
10+
env:
11+
PR_ASSIGNEES: woodruffw
12+
13+
jobs:
14+
refresh-schemas:
15+
name: Refresh JSON schemas 📈
16+
runs-on: ubuntu-latest
17+
18+
permissions:
19+
contents: write # for creating branches
20+
pull-requests: write # for opening PRs
21+
22+
steps:
23+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
24+
with:
25+
persist-credentials: false
26+
27+
- name: try to refresh schemas
28+
run: |
29+
make refresh-schemas
30+
31+
- name: create PR
32+
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
33+
with:
34+
commit-message: "[BOT] update JSON schemas from SchemaStore"
35+
branch: refresh-schemas
36+
branch-suffix: timestamp
37+
title: "[BOT] update JSON schemas from SchemaStore"
38+
body: |
39+
:robot: :warning: :robot:
40+
41+
This is an automated pull request, updating the embedded JSON
42+
schemas after a SchemaStore change was detected.
43+
44+
Please review manually before merging.
45+
assignees: ${{ env.PR_ASSIGNEES }}
46+
reviewers: ${{ env.PR_ASSIGNEES }}
47+
48+
refresh-context-capabilities:
49+
name: Refresh context capabilities *️⃣
50+
runs-on: ubuntu-latest
51+
52+
permissions:
53+
contents: write # for creating branches
54+
pull-requests: write # for opening PRs
55+
56+
steps:
57+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
58+
with:
59+
persist-credentials: false
60+
61+
- uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
62+
63+
- name: try to refresh context capabilities
64+
run: |
65+
make webhooks-to-contexts
66+
67+
- name: create PR
68+
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
69+
with:
70+
commit-message: "[BOT] update context capabilities"
71+
branch: refresh-context-capabilities
72+
branch-suffix: timestamp
73+
title: "[BOT] update context-capabilities from GitHub webhooks"
74+
body: |
75+
:robot: :warning: :robot:
76+
77+
This is an automated pull request, updating the
78+
context capabilities CSV after a change to GitHub's
79+
webhooks was detected.
80+
81+
Please review manually before merging.
82+
assignees: ${{ env.PR_ASSIGNEES }}
83+
reviewers: ${{ env.PR_ASSIGNEES }}

.github/workflows/refresh-schemas.yml

Lines changed: 0 additions & 40 deletions
This file was deleted.

Cargo.lock

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ clap = "4.5.38"
3030
clap-verbosity-flag = { version = "3.0.2", default-features = false }
3131
clap_complete = "4.5.50"
3232
clap_complete_nushell = "4.5.5"
33+
csv = "1.3.1"
3334
etcetera = "0.10.0"
3435
flate2 = "1.1.1"
36+
fst = "0.4.7"
3537
http-cache-reqwest = "0.15.1"
3638
human-panic = "2.0.1"
3739
ignore = "0.4.23"

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ refresh-schemas:
3131
curl https://json.schemastore.org/github-workflow.json > crates/zizmor/src/data/github-workflow.json
3232
curl https://json.schemastore.org/github-action.json > crates/zizmor/src/data/github-action.json
3333

34+
.PHONY: webhooks-to-contexts
35+
webhooks-to-contexts: support/known-safe-contexts.txt
36+
uv run --script --only-group codegen ./support/webhooks-to-contexts.py
37+
3438
.PHONY: pinact
3539
pinact:
3640
pinact run --update --verify

crates/github-actions-expressions/src/context.rs

Lines changed: 105 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Parsing and matching APIs for GitHub Actions expressions
22
//! contexts (e.g. `github.event.name`).
3+
34
use super::Expr;
45

56
/// Represents a context in a GitHub Actions expression.
@@ -47,6 +48,54 @@ impl<'src> Context<'src> {
4748
_ => None,
4849
}
4950
}
51+
52+
/// Returns the "pattern equivalent" of this context.
53+
///
54+
/// This is a string that can be used to efficiently match the context,
55+
/// such as is done in `zizmor`'s template-injection audit via a
56+
/// finite state transducer.
57+
///
58+
/// Returns None if the context doesn't have a sensible pattern
59+
/// equivalent, e.g. if it starts with a call.
60+
pub fn as_pattern(&self) -> Option<String> {
61+
fn push_part(part: &Expr<'_>, pattern: &mut String) {
62+
match part {
63+
Expr::Identifier(ident) => pattern.push_str(ident.0),
64+
Expr::Star => pattern.push('*'),
65+
Expr::Index(idx) => match idx.as_ref() {
66+
// foo['bar'] -> foo.bar
67+
Expr::String(idx) => pattern.push_str(idx),
68+
// any kind of numeric or computed index, e.g.:
69+
// foo[0], foo[1 + 2], foo[bar]
70+
_ => pattern.push('*'),
71+
},
72+
_ => unreachable!("unexpected part in context pattern"),
73+
}
74+
}
75+
76+
// TODO: Optimization ideas:
77+
// 1. Add a happy path for contexts that contain only
78+
// identifiers? Problem: case normalization.
79+
// 2. Use `regex-automata` to return a case insensitive
80+
// automation here?
81+
let mut pattern = String::with_capacity(self.raw.len());
82+
83+
let mut parts = self.parts.iter().peekable();
84+
85+
let head = parts.next()?;
86+
if matches!(head, Expr::Call { .. }) {
87+
return None;
88+
}
89+
90+
push_part(head, &mut pattern);
91+
for part in parts {
92+
pattern.push('.');
93+
push_part(part, &mut pattern);
94+
}
95+
96+
pattern.make_ascii_lowercase();
97+
Some(pattern)
98+
}
5099
}
51100

52101
impl PartialEq for Context<'_> {
@@ -120,33 +169,28 @@ impl<'src> ContextPattern<'src> {
120169
}
121170
}
122171

172+
fn compare_part(pattern: &str, part: &Expr<'src>) -> bool {
173+
if pattern == "*" {
174+
true
175+
} else {
176+
match part {
177+
Expr::Identifier(part) => pattern.eq_ignore_ascii_case(part.0),
178+
Expr::Index(part) => match part.as_ref() {
179+
Expr::String(part) => pattern.eq_ignore_ascii_case(part),
180+
_ => false,
181+
},
182+
_ => false,
183+
}
184+
}
185+
}
186+
123187
fn compare(&self, ctx: &Context<'src>) -> Option<Comparison> {
124188
let mut pattern_parts = self.0.split('.').peekable();
125189
let mut ctx_parts = ctx.parts.iter().peekable();
126190

127191
while let (Some(pattern), Some(part)) = (pattern_parts.peek(), ctx_parts.peek()) {
128-
// TODO: Refactor this; it's way too hard to read.
129-
match (*pattern, part) {
130-
// Calls can't be compared to patterns.
131-
(_, Expr::Call { .. }) => return None,
132-
// "*" matches any part.
133-
("*", _) => {}
134-
(_, Expr::Star) => return None,
135-
(pattern, Expr::Identifier(part)) if !pattern.eq_ignore_ascii_case(part.0) => {
136-
return None;
137-
}
138-
(pattern, Expr::Index(idx)) => {
139-
// Anything other than a string index is invalid
140-
// for part-wise comparison.
141-
let Expr::String(part) = idx.as_ref() else {
142-
return None;
143-
};
144-
145-
if !pattern.eq_ignore_ascii_case(part) {
146-
return None;
147-
}
148-
}
149-
_ => {}
192+
if !Self::compare_part(pattern, part) {
193+
return None;
150194
}
151195

152196
pattern_parts.next();
@@ -253,6 +297,45 @@ mod tests {
253297
}
254298
}
255299

300+
#[test]
301+
fn test_context_as_pattern() {
302+
for (case, expected) in &[
303+
// Basic cases.
304+
("foo", Some("foo")),
305+
("foo.bar", Some("foo.bar")),
306+
("foo.bar.baz", Some("foo.bar.baz")),
307+
("foo.bar.baz_baz", Some("foo.bar.baz_baz")),
308+
("foo.bar.baz-baz", Some("foo.bar.baz-baz")),
309+
("foo.*", Some("foo.*")),
310+
("foo.bar.*", Some("foo.bar.*")),
311+
("foo.*.baz", Some("foo.*.baz")),
312+
("foo.*.*", Some("foo.*.*")),
313+
// Case sensitivity.
314+
("FOO", Some("foo")),
315+
("FOO.BAR", Some("foo.bar")),
316+
("FOO.BAR.BAZ", Some("foo.bar.baz")),
317+
("FOO.BAR.BAZ_BAZ", Some("foo.bar.baz_baz")),
318+
("FOO.BAR.BAZ-BAZ", Some("foo.bar.baz-baz")),
319+
("FOO.*", Some("foo.*")),
320+
("FOO.BAR.*", Some("foo.bar.*")),
321+
("FOO.*.BAZ", Some("foo.*.baz")),
322+
("FOO.*.*", Some("foo.*.*")),
323+
// Indexes.
324+
("foo.bar.baz[0]", Some("foo.bar.baz.*")),
325+
("foo.bar.baz['abc']", Some("foo.bar.baz.abc")),
326+
("foo.bar.baz[0].qux", Some("foo.bar.baz.*.qux")),
327+
("foo.bar.baz[0].qux[1]", Some("foo.bar.baz.*.qux.*")),
328+
("foo[1][2][3]", Some("foo.*.*.*")),
329+
("foo.bar[abc]", Some("foo.bar.*")),
330+
("foo.bar[abc()]", Some("foo.bar.*")),
331+
// Invalid cases
332+
("foo().bar", None),
333+
] {
334+
let ctx = Context::try_from(*case).unwrap();
335+
assert_eq!(ctx.as_pattern().as_deref(), *expected);
336+
}
337+
}
338+
256339
#[test]
257340
fn test_contextpattern_new() {
258341
for (case, expected) in &[

0 commit comments

Comments
 (0)