4 files changed, 368 insertions, 0 deletions
diff --git a/.github/scripts/compare_benchmarks.py b/.github/scripts/compare_benchmarks.py
new file mode 100644
index 00000000..5d4176f6
--- /dev/null
+++ b/.github/scripts/compare_benchmarks.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+"""
+Compare JMH benchmark results (with heapMetric) against a stored baseline and
+print a markdown table suitable for a GitHub PR comment.
+
+Usage:
+    python compare_benchmarks.py <current_results.json> [<baseline_latest.json>]
+
+If the baseline file is absent or empty the table shows current values only.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def short_name(full: str) -> str:
+    """Strip package prefix, keep ClassName.methodName."""
+    parts = full.rsplit(".", 2)
+    return ".".join(parts[-2:]) if len(parts) >= 2 else full
+
+
+def fmt_delta(current: float, baseline: float) -> str:
+    if baseline == 0:
+        return "-"
+    pct = (current - baseline) / baseline * 100
+    return f"{pct:+.1f}%"
+
+
+def fmt_float(value: float | None, decimals: int = 1) -> str:
+    if value is None:
+        return "N/A"
+    return f"{value:,.{decimals}f}"
+
+
+def load_results(path: Path) -> dict[str, dict]:
+    """Return dict keyed by benchmark name.
+
+    Accepts either a raw results array or a benchmark-history array, in which
+    case the last entry's results are used as the baseline.
+    """
+    if not path.exists():
+        return {}
+    data = json.loads(path.read_text())
+    if not isinstance(data, list) or not data:
+        return {}
+    # History format: list of {commit, timestamp, results: [...]}
+    if "results" in data[-1]:
+        data = data[-1]["results"]
+    return {entry["benchmark"]: entry for entry in data if "benchmark" in entry}
+
+
+def main() -> None:
+    if len(sys.argv) < 2:
+        print("Usage: compare_benchmarks.py <current.json> [baseline.json]", file=sys.stderr)
+        sys.exit(1)
+
+    current_path = Path(sys.argv[1])
+    baseline_path = Path(sys.argv[2]) if len(sys.argv) > 2 else None
+
+    current = load_results(current_path)
+    baseline = load_results(baseline_path) if baseline_path else {}
+
+    if not current:
+        print("## Benchmark Results\n\n_No benchmark results found._")
+        return
+
+    has_baseline = bool(baseline)
+
+    lines = ["## Benchmark Results\n"]
+    if has_baseline:
+        # Extract the baseline commit SHA if stored in the file (update_history stores it)
+        first = next(iter(baseline.values()), {})
+        sha = first.get("_meta_commit", "")
+        lines.append(f"> Compared against master{f' @ `{sha[:7]}`' if sha else ''}\n")
+    else:
+        lines.append("> No baseline found — showing current values only.\n")
+
+    header = (
+        "| Benchmark | Time (ms) | Δ Time"
+        " | Avg Heap (MB) | Δ Avg Heap"
+        " | Max Heap (MB) | Δ Max Heap |"
+    )
+    separator = (
+        "|-----------|----------:|-------:"
+        "|--------------:|-----------:"
+        "|--------------:|-----------:|"
+    )
+    lines += [header, separator]
+
+    for name, entry in sorted(current.items(), key=lambda x: x[0]):
+        score = entry.get("primaryMetric", {}).get("score")
+        heap = entry.get("heapMetric", {})
+        avg_mb = heap.get("avgMb")
+        max_mb = heap.get("maxMb")
+
+        base = baseline.get(name, {})
+        base_score = base.get("primaryMetric", {}).get("score") if base else None
+        base_heap = base.get("heapMetric", {}) if base else {}
+        base_avg = base_heap.get("avgMb")
+        base_max = base_heap.get("maxMb")
+
+        delta_time = fmt_delta(score, base_score) if (has_baseline and score is not None and base_score is not None) else "-"
+        delta_avg = fmt_delta(avg_mb, base_avg) if (has_baseline and avg_mb is not None and base_avg is not None) else "-"
+        delta_max = fmt_delta(max_mb, base_max) if (has_baseline and max_mb is not None and base_max is not None) else "-"
+
+        lines.append(
+            f"| {short_name(name)}"
+            f" | {fmt_float(score)}"
+            f" | {delta_time}"
+            f" | {fmt_float(avg_mb)}"
+            f" | {delta_avg}"
+            f" | {fmt_float(max_mb)}"
+            f" | {delta_max} |"
+        )
+
+    lines.append("\n_Positive Δ = slower / more memory. No threshold gate — informational only._")
+    print("\n".join(lines))
+
+
+if __name__ == "__main__":
+    main()
+\ No newline at end of file
diff --git a/.github/scripts/update_history.py b/.github/scripts/update_history.py
new file mode 100644
index 00000000..6fae798c
--- /dev/null
+++ b/.github/scripts/update_history.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+Append the current JMH results to benchmark-history.json and overwrite
+benchmark-latest.json on the benchmark-data branch working directory.
+
+Usage:
+    python update_history.py <results.json> <output_dir> <commit_sha> [<pr_number>]
+
+<output_dir> is the directory where benchmark-history.json and
+benchmark-latest.json will be written (the benchmark-data checkout root).
+"""
+
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def main() -> None:
+    if len(sys.argv) < 4:
+        print("Usage: update_history.py <results.json> <output_dir> <commit_sha> [<pr_number>]", file=sys.stderr)
+        sys.exit(1)
+
+    results_path = Path(sys.argv[1])
+    output_dir = Path(sys.argv[2])
+    commit_sha = sys.argv[3]
+    pr_number = int(sys.argv[4]) if len(sys.argv) > 4 else None
+
+    results = json.loads(results_path.read_text())
+
+    # Tag each result entry with the commit so compare_benchmarks.py can display it
+    for entry in results:
+        entry["_meta_commit"] = commit_sha
+
+    history_path = output_dir / "benchmark-history.json"
+    history = json.loads(history_path.read_text()) if history_path.exists() else []
+
+    history.append(
+        {
+            "commit": commit_sha,
+            "pr": pr_number,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "results": results,
+        }
+    )
+
+    history_path.write_text(json.dumps(history, indent=2))
+
+    print(f"Stored results for commit {commit_sha[:7]} ({len(results)} benchmark(s)).")
+
+
+if __name__ == "__main__":
+    main()
+\ No newline at end of file
diff --git a/.github/workflows/benchmark-comment.yml b/.github/workflows/benchmark-comment.yml
new file mode 100644
index 00000000..c095162e
--- /dev/null
+++ b/.github/workflows/benchmark-comment.yml
@@ -0,0 +1,90 @@
+name: Benchmark Comment
+
+on:
+  workflow_run:
+    workflows: [Benchmark]
+    types: [completed]
+
+jobs:
+  comment-pr:
+    name: Post benchmark comment
+    # Only run when triggered by a pull_request event and the benchmark succeeded.
+    if: >
+      github.event.workflow_run.conclusion == 'success' &&
+      github.event.workflow_run.event == 'pull_request'
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: read
+      actions: read
+      pull-requests: write
+      issues: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download benchmark results
+        uses: actions/download-artifact@v4
+        with:
+          run-id: ${{ github.event.workflow_run.id }}
+          name: benchmark-results
+          path: current-results
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Download PR number
+        uses: actions/download-artifact@v4
+        with:
+          run-id: ${{ github.event.workflow_run.id }}
+          name: pr-number
+          path: .
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Read PR number
+        id: pr
+        run: echo "number=$(cat pr-number.txt)" >> "$GITHUB_OUTPUT"
+
+      - name: Fetch baseline from benchmark-data branch
+        run: |
+          git fetch origin benchmark-data 2>/dev/null || true
+          git show origin/benchmark-data:benchmark-history.json > baseline.json 2>/dev/null \
+            || echo "[]" > baseline.json
+
+      - name: Generate comparison comment
+        run: |
+          python .github/scripts/compare_benchmarks.py \
+            current-results/results.json \
+            baseline.json \
+            > comment.md
+
+      - name: Post or update PR comment
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('comment.md', 'utf8');
+            const marker = '<!-- opendc-benchmark-comment -->';
+            const fullBody = marker + '\n' + body;
+            const prNumber = parseInt(fs.readFileSync('pr-number.txt', 'utf8').trim());
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+            });
+
+            const existing = comments.find(c => c.body.includes(marker));
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body: fullBody,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: fullBody,
+              });
+            }
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 00000000..69c3164f
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,103 @@
+name: Benchmark
+
+on:
+  pull_request:
+    branches: [master]
+  push:
+    branches: [master]
+  workflow_dispatch:
+
+# Cancel in-flight benchmark runs for the same PR/branch so only the latest commit is benchmarked.
+concurrency:
+  group: benchmark-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark:
+    name: Run CIBenchmark
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: read
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up JDK 21
+        uses: actions/setup-java@v4
+        with:
+          java-version: "21"
+          distribution: temurin
+
+      - name: Set up Gradle
+        uses: gradle/actions/setup-gradle@v4
+
+      - name: Run CIBenchmark
+        run: >
+          ./gradlew :opendc-experiments:opendc-experiments-base:jmh
+          -PjmhIncludes=.*CIBenchmark.*
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: opendc-experiments/opendc-experiments-base/build/results/jmh/results.json
+          retention-days: 7
+
+      # Store the PR number so the comment workflow (which runs with write permissions)
+      # knows which PR to comment on. Only needed for pull_request runs.
+      - name: Save PR number
+        if: github.event_name == 'pull_request'
+        run: echo "${{ github.event.pull_request.number }}" > pr-number.txt
+
+      - name: Upload PR number
+        if: github.event_name == 'pull_request'
+        uses: actions/upload-artifact@v4
+        with:
+          name: pr-number
+          path: pr-number.txt
+          retention-days: 1
+
+  # ── Master push: store results on the benchmark-data branch ──────────────────
+  store-results:
+    name: Store benchmark results
+    if: github.event_name == 'push'
+    needs: benchmark
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download current results
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-results
+          path: current-results
+
+      - name: Check out or create benchmark-data branch
+        run: |
+          git fetch origin benchmark-data 2>/dev/null || true
+          if git rev-parse --verify origin/benchmark-data 2>/dev/null; then
+            git worktree add benchmark-data-dir origin/benchmark-data
+          else
+            git worktree add --orphan -b benchmark-data benchmark-data-dir
+          fi
+
+      - name: Update history
+        run: |
+          python .github/scripts/update_history.py \
+            current-results/results.json \
+            benchmark-data-dir \
+            "${{ github.sha }}"
+
+      - name: Commit and push
+        run: |
+          git -C benchmark-data-dir config user.name  "github-actions[bot]"
+          git -C benchmark-data-dir config user.email "github-actions[bot]@users.noreply.github.com"
+          git -C benchmark-data-dir add benchmark-history.json
+          git -C benchmark-data-dir commit \
+            -m "chore: benchmark results for ${{ github.sha }}"
+          git -C benchmark-data-dir push origin HEAD:benchmark-data