File size: 9,594 Bytes

9d02cc2

"""Run local checks that mirror the required GitHub PR gates.



This is intentionally conservative: it uses the same path classifier as

`.github/workflows/test.yml`, runs the local equivalents of required jobs, and

prints any CI-only coverage that cannot be reproduced on the current OS.

"""

from __future__ import annotations

import argparse
from dataclasses import dataclass
import os
from pathlib import Path
import shutil
import subprocess
import sys
import time

REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from scripts.ci_classifier import classify_paths  # noqa: E402


GRAPH_VALIDATE_ARGS = (
    "src/validate_graph_artifacts.py",
    "--graph-dir",
    "graph",
    "--deep",
    "--min-nodes",
    "100000",
    "--min-edges",
    "2000000",
    "--min-skills-sh-nodes",
    "89000",
    "--min-semantic-edges",
    "1000000",
    "--expected-nodes",
    "102928",
    "--expected-edges",
    "2913960",
    "--expected-semantic-edges",
    "1683193",
    "--expected-harness-nodes",
    "207",
    "--expected-skills-sh-nodes",
    "89471",
    "--expected-skills-sh-catalog-entries",
    "89465",
    "--expected-skills-sh-converted",
    "89465",
    "--expected-skill-pages",
    "91464",
    "--expected-agent-pages",
    "467",
    "--expected-mcp-pages",
    "10790",
    "--expected-harness-pages",
    "207",
    "--line-threshold",
    "180",
    "--max-stage-lines",
    "40",
)


@dataclass(frozen=True)
class Check:
    name: str
    argv: tuple[str, ...]
    env: dict[str, str] | None = None


def _run_git(args: list[str], *, allow_failure: bool = False) -> list[str]:
    proc = subprocess.run(
        ["git", *args],
        check=False,
        capture_output=True,
        text=True,
    )
    if proc.returncode != 0:
        if allow_failure:
            return []
        raise RuntimeError(proc.stderr.strip() or proc.stdout.strip())
    return [line.strip() for line in proc.stdout.splitlines() if line.strip()]


def changed_files(base_ref: str) -> list[str]:
    merge_base = _run_git(["merge-base", base_ref, "HEAD"], allow_failure=True)
    base = merge_base[0] if merge_base else base_ref
    paths = set(_run_git(["diff", "--name-only", base, "HEAD"], allow_failure=True))
    paths.update(_run_git(["diff", "--name-only"], allow_failure=True))
    paths.update(_run_git(["diff", "--cached", "--name-only"], allow_failure=True))
    paths.update(
        _run_git(["ls-files", "--others", "--exclude-standard"], allow_failure=True)
    )
    return sorted(path.replace("\\", "/") for path in paths)


def select_checks(

    *,

    base_ref: str,

    files: list[str],

    profile: str,

    python: str,

) -> tuple[list[Check], list[str]]:
    flags = classify_paths(files)
    checks: list[Check] = [
        Check("whitespace", ("git", "diff", "--check")),
        Check("repo stats", (python, "src/update_repo_stats.py", "--check")),
    ]
    notes = [
        "GitHub still runs Windows/macOS matrix jobs; local preflight covers the "
        "same contracts on this host."
    ]

    source_required = profile == "full" or (
        not flags["docs_only"] and not flags["graph_only"]
    )
    if source_required:
        checks.extend(
            [
                Check("ruff", (python, "-m", "ruff", "check", "src", "hooks", "scripts")),
                Check("mypy", (python, "-m", "mypy", "src")),
                Check("pip check", (python, "-m", "pip", "check")),
                Check(
                    "unit-linux equivalent",
                    (
                        python,
                        "-m",
                        "pytest",
                        "-q",
                        "-m",
                        "not browser and not integration",
                        "--cov=src",
                        "--cov-report=term-missing",
                        "--cov-fail-under=40",
                    ),
                ),
                Check(
                    "A-Z canary",
                    (
                        python,
                        "-m",
                        "pytest",
                        "-q",
                        "--no-cov",
                        "src/tests/test_alive_loop_e2e.py",
                        "src/tests/test_fuzz_yaml_rendering.py",
                    ),
                ),
                Check(
                    "contract compatibility local",
                    (
                        python,
                        "-m",
                        "pytest",
                        "-q",
                        "--no-cov",
                        "src/tests/test_clean_host_contract.py",
                        "src/tests/test_package_scaffold.py",
                    ),
                ),
                Check(
                    "clean host contract",
                    (python, "scripts/clean_host_contract.py", "--fast"),
                ),
            ]
        )

    if flags["docs_changed"]:
        checks.append(
            Check("docs strict build", (python, "-m", "mkdocs", "build", "--strict"))
        )

    if flags["graph_artifact_changed"]:
        checks.append(Check("graph artifact validation", (python, *GRAPH_VALIDATE_ARGS)))

    if source_required and flags["similarity_changed"]:
        checks.append(
            Check(
                "similarity precision/recall",
                (
                    python,
                    "-m",
                    "pytest",
                    "-q",
                    "--no-cov",
                    "-m",
                    "integration",
                    "src/tests/test_similarity_precision_recall.py",
                ),
                env={"CTX_REQUIRE_SIMILARITY_EVAL": "1"},
            )
        )

    if source_required and flags["browser_changed"]:
        checks.append(
            Check(
                "browser monitor security",
                (
                    python,
                    "-m",
                    "pytest",
                    "-q",
                    "--no-cov",
                    "-m",
                    "browser",
                    "src/tests/test_ctx_monitor_browser.py",
                ),
            )
        )

    if source_required:
        out_dir = ".ci-preflight-dist"
        twine_script = (
            "import glob, subprocess, sys; "
            f"files=glob.glob({str(out_dir + '/*')!r}); "
            "sys.exit(2 if not files else subprocess.call("
            "[sys.executable, '-m', 'twine', 'check', *files]))"
        )
        checks.extend(
            [
                Check(
                    "clean preflight dist",
                    (
                        python,
                        "-c",
                        f"import shutil; shutil.rmtree({out_dir!r}, ignore_errors=True)",
                    ),
                ),
                Check("build wheel", (python, "-m", "build", "--outdir", out_dir)),
                Check("twine check", (python, "-c", twine_script)),
            ]
        )

    if files:
        notes.insert(0, f"Changed files vs {base_ref}: {len(files)}")
    else:
        notes.insert(0, "No changed files detected; running baseline cheap checks only.")

    return checks, notes


def run_checks(checks: list[Check], *, dry_run: bool) -> int:
    for index, check in enumerate(checks, start=1):
        print(f"[{index}/{len(checks)}] {check.name}: {' '.join(check.argv)}", flush=True)
        if dry_run:
            continue
        env = os.environ.copy()
        if check.env:
            env.update(check.env)
        start = time.monotonic()
        proc = subprocess.run(check.argv, check=False, env=env)
        elapsed = time.monotonic() - start
        if proc.returncode != 0:
            print(
                f"[fail] {check.name} exited {proc.returncode} after {elapsed:.1f}s",
                file=sys.stderr,
            )
            return proc.returncode
        print(f"[pass] {check.name} in {elapsed:.1f}s", flush=True)
    return 0


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--base",
        default="origin/main",
        help="base ref for changed-file detection",
    )
    parser.add_argument(
        "--profile",
        choices=("pr", "full"),
        default="pr",
        help="pr mirrors required PR checks; full forces source gates for any change set",
    )
    parser.add_argument(
        "--python",
        default=sys.executable,
        help="Python executable to run checks with",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="print selected checks without running them",
    )
    args = parser.parse_args(argv)

    if not shutil.which("git"):
        raise SystemExit("git is required for ci_preflight")

    files = changed_files(args.base)
    checks, notes = select_checks(
        base_ref=args.base,
        files=files,
        profile=args.profile,
        python=args.python,
    )
    for note in notes:
        print(f"[note] {note}")
    return run_checks(checks, dry_run=args.dry_run)


if __name__ == "__main__":
    raise SystemExit(main())