ICSAC Claude Opus 4.7 (1M context) commited on
Commit
415cfd5
Β·
1 Parent(s): ea90e44

Retire gemini-cli panel entry + fail-closed blind-review compaction

Browse files

Two intertwined changes ahead of the gemini-cli free-tier sunset (2026-06-18):

1. Bare "gemini" tail-of-chain entries removed from all 4 panel slots in
config.example.py and the gemini-special-case branch deleted from
_run_panel_chain in review.py. Slots 1/2/3 now end on
or|google/gemma-4-31b-it:free; slot 4 ends on or|google/gemma-4-26b-a4b-it:free
to preserve model-family diversity at the tail. run_gemini_review and
the GEMINI_BIN config knob are kept only as dead code for external-fork
import compatibility and marked DEPRECATED.

2. review_compaction.compact_paper now FAILS CLOSED: on any failure
(model timeout, exception, non-zero exit, unparseable output, empty
input) it returns an EMPTY redacted text, never the original. review.py
detects "_failure" in the manifest, withholds the paper from the panel,
fires a pain signal to ntfy /pain, and returns a PAUSED_AI_FAILURE
aggregate so the worker routes the submission to paused_panel_failure
for curator attention. The previous fail-OPEN behavior would have sent
the un-stripped (author-identified) manuscript to the panel β€” the exact
integrity violation compaction exists to prevent.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (4) hide show
  1. config.example.py +9 -4
  2. editorial_workflow.py +5 -3
  3. review.py +87 -22
  4. review_compaction.py +56 -45
config.example.py CHANGED
@@ -87,7 +87,7 @@ OPENROUTER_MODELS = [
87
  "hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
88
  "or|openai/gpt-oss-120b:free",
89
  "or|z-ai/glm-4.5-air:free",
90
- "gemini",
91
  ],
92
  # Slot 2: Groq gpt-oss-120b β†’ Cerebras Qwen3-235B β†’ OR Nvidia/Hermes.
93
  # nemotron-3-super-120b-a12b excluded (won't emit JSON reliably).
@@ -96,7 +96,7 @@ OPENROUTER_MODELS = [
96
  "hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
97
  "or|nvidia/nemotron-nano-12b-v2-vl:free",
98
  "or|nousresearch/hermes-3-llama-3.1-405b:free",
99
- "gemini",
100
  ],
101
  # Slot 3: Cerebras primary β†’ Groq Llama β†’ OR Google/cross-family.
102
  [
@@ -104,7 +104,7 @@ OPENROUTER_MODELS = [
104
  "hf|meta-llama/Llama-3.3-70B-Instruct:groq",
105
  "or|google/gemma-4-26b-a4b-it:free",
106
  "or|z-ai/glm-4.5-air:free",
107
- "gemini",
108
  ],
109
  # Slot 4: HF Groq primary, HF Cerebras fallback, OR tail. Reordered
110
  # 2026-04-27 after qwen3-next-80b-a3b-instruct:free failed all 4
@@ -122,7 +122,7 @@ OPENROUTER_MODELS = [
122
  "hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
123
  "or|minimax/minimax-m2.5:free",
124
  "or|google/gemma-4-31b-it:free",
125
- "gemini",
126
  ],
127
  ]
128
  OPENROUTER_MODELS_API_URL = "https://openrouter.ai/api/v1/models"
@@ -171,6 +171,11 @@ TEMPLATES_DIR = os.path.join(BASE_DIR, "templates")
171
  SITE_BASE_URL = "https://icsacinstitute.org"
172
 
173
  CLAUDE_CMD = "claude"
 
 
 
 
 
174
  GEMINI_CMD = "gemini"
175
 
176
  RUBRIC_DIMENSIONS = [
 
87
  "hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
88
  "or|openai/gpt-oss-120b:free",
89
  "or|z-ai/glm-4.5-air:free",
90
+ "or|google/gemma-4-31b-it:free",
91
  ],
92
  # Slot 2: Groq gpt-oss-120b β†’ Cerebras Qwen3-235B β†’ OR Nvidia/Hermes.
93
  # nemotron-3-super-120b-a12b excluded (won't emit JSON reliably).
 
96
  "hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
97
  "or|nvidia/nemotron-nano-12b-v2-vl:free",
98
  "or|nousresearch/hermes-3-llama-3.1-405b:free",
99
+ "or|google/gemma-4-31b-it:free",
100
  ],
101
  # Slot 3: Cerebras primary β†’ Groq Llama β†’ OR Google/cross-family.
102
  [
 
104
  "hf|meta-llama/Llama-3.3-70B-Instruct:groq",
105
  "or|google/gemma-4-26b-a4b-it:free",
106
  "or|z-ai/glm-4.5-air:free",
107
+ "or|google/gemma-4-31b-it:free",
108
  ],
109
  # Slot 4: HF Groq primary, HF Cerebras fallback, OR tail. Reordered
110
  # 2026-04-27 after qwen3-next-80b-a3b-instruct:free failed all 4
 
122
  "hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
123
  "or|minimax/minimax-m2.5:free",
124
  "or|google/gemma-4-31b-it:free",
125
+ "or|google/gemma-4-26b-a4b-it:free",
126
  ],
127
  ]
128
  OPENROUTER_MODELS_API_URL = "https://openrouter.ai/api/v1/models"
 
171
  SITE_BASE_URL = "https://icsacinstitute.org"
172
 
173
  CLAUDE_CMD = "claude"
174
+ # DEPRECATED (2026-05-22): unused. The gemini-cli free tier sunsets
175
+ # 2026-06-18. Blind-review compaction now uses CLAUDE_CMD, and the panel's
176
+ # Gemini-family voice is served via OpenRouter google/gemma :free models.
177
+ # No remaining code path invokes this binary. Kept only to avoid an
178
+ # AttributeError in any external fork that still references it.
179
  GEMINI_CMD = "gemini"
180
 
181
  RUBRIC_DIMENSIONS = [
editorial_workflow.py CHANGED
@@ -76,7 +76,8 @@ def check_model_availability(timeout: int = 15) -> dict:
76
  catalog (the only backend this catalog covers).
77
  "hf|<model>:<prov>" β†’ HF Router (Groq/Cerebras) β€” not in OR's catalog,
78
  so it can't be disproven here; treated reachable.
79
- "gemini" β†’ subscription gemini-cli; always reachable.
 
80
  The pre-2026-05-16 version compared raw prefixed strings against the
81
  unprefixed OR catalog, so every tagged entry mismatched and all slots
82
  read 'dead' β€” falsely skipping reviews on every tick.
@@ -106,8 +107,9 @@ def check_model_availability(timeout: int = 15) -> dict:
106
 
107
  def _entry_reachable(entry):
108
  # Mirror review._run_panel_chain's parsing: bare entries are OR.
109
- if entry == "gemini":
110
- return True
 
111
  kind, sep, model = entry.partition("|")
112
  if not sep:
113
  kind, model = "or", entry
 
76
  catalog (the only backend this catalog covers).
77
  "hf|<model>:<prov>" β†’ HF Router (Groq/Cerebras) β€” not in OR's catalog,
78
  so it can't be disproven here; treated reachable.
79
+ (The "gemini" gemini-cli entry was retired 2026-05-22 ahead of the
80
+ gemini-cli sunset; no slot ships a bare "gemini" anymore.)
81
  The pre-2026-05-16 version compared raw prefixed strings against the
82
  unprefixed OR catalog, so every tagged entry mismatched and all slots
83
  read 'dead' β€” falsely skipping reviews on every tick.
 
107
 
108
  def _entry_reachable(entry):
109
  # Mirror review._run_panel_chain's parsing: bare entries are OR.
110
+ # (The "gemini" gemini-cli special case was retired 2026-05-22; a
111
+ # stray "gemini" entry now parses as an OR model id and reads as
112
+ # unreachable, matching the panel chain's post-retirement behavior.)
113
  kind, sep, model = entry.partition("|")
114
  if not sep:
115
  kind, model = "or", entry
review.py CHANGED
@@ -1,4 +1,10 @@
1
- """Multi-model reviewer panel engine using CLI-based AI tooling (claude -p, gemini)."""
 
 
 
 
 
 
2
 
3
  import json
4
  import os
@@ -302,7 +308,13 @@ def run_claude_review(prompt: str, capture_path: str = None) -> dict:
302
 
303
 
304
  def run_gemini_review(prompt: str, capture_path: str = None) -> dict:
305
- """Run review via gemini CLI (subscription-backed, no API spend).
 
 
 
 
 
 
306
 
307
  Used as the tail-of-chain fallback for panel slots whose external
308
  routes (HF Groq, HF Cerebras, OR free) all 413/429 on oversized
@@ -577,6 +589,11 @@ def _run_panel_chain(prompt: str, chain, capture_path: str = None) -> dict:
577
  return None
578
 
579
  for entry in chain:
 
 
 
 
 
580
  kind, sep, model = entry.partition("|")
581
  if not sep:
582
  kind, model = "or", entry # legacy bare entry β†’ OR
@@ -1378,6 +1395,32 @@ def _append_citation_verify_audit(record_id: str, citations: list[dict], error)
1378
  pass
1379
 
1380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1381
  def review_paper(review_data: dict) -> tuple[str, dict]:
1382
  """Run full multi-model review with self-heal + multi-pass aggregation.
1383
 
@@ -1411,33 +1454,55 @@ def review_paper(review_data: dict) -> tuple[str, dict]:
1411
  log=lambda m: print(m, file=__import__("sys").stderr),
1412
  )
1413
  if compaction_manifest.get("_failure"):
 
 
 
 
 
 
 
1414
  print(
1415
- f" compaction: skipped ({compaction_manifest['_failure']}); "
1416
- f"panel will see un-stripped paper",
1417
- file=__import__("sys").stderr,
1418
- )
1419
- else:
1420
- pct = compaction_manifest.get("reduction_pct", 0)
1421
- print(
1422
- f" compaction: applied ({compaction_manifest.get('original_chars', 0)} -> "
1423
- f"{compaction_manifest.get('redacted_chars', 0)} chars, {pct}% reduction)",
1424
- file=__import__("sys").stderr,
1425
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1426
 
1427
  # Build the panel-facing review_data view: redacted text + blinded
1428
  # creators in the SUBMISSION metadata block. The original review_data
1429
  # is left untouched (worker still needs the real creators for audit
1430
- # and the apply_decision email path).
 
1431
  compacted_data = dict(review_data)
1432
- if compaction_manifest.get("_failure"):
1433
- # On failure, keep original text but still blind the creators
1434
- # metadata so the panel does not see author names in the prompt
1435
- # header even when compaction itself failed.
1436
- pass
1437
- else:
1438
- compacted_data["full_text"] = (
1439
- review_compaction.panel_notice() + redacted_text
1440
- )
1441
  compacted_data["creators"] = [
1442
  {"name": "[author identity withheld for blind review]"}
1443
  ]
 
1
+ """Multi-model reviewer panel engine (claude -p, OpenRouter, HF Router).
2
+
3
+ The gemini-cli tail-of-chain panelist was retired 2026-05-22 ahead of the
4
+ gemini-cli free-tier sunset (2026-06-18); the Gemini-family voice is now an
5
+ OpenRouter google/gemma :free entry. `run_gemini_review` below is retained
6
+ as dead code only for fork compatibility β€” no live code path calls it.
7
+ """
8
 
9
  import json
10
  import os
 
308
 
309
 
310
  def run_gemini_review(prompt: str, capture_path: str = None) -> dict:
311
+ """DEPRECATED / DEAD CODE (2026-05-22). No live caller β€” the gemini-cli
312
+ panelist was retired ahead of the 2026-06-18 free-tier sunset and
313
+ _run_panel_chain no longer dispatches to it. Retained only so external
314
+ forks importing this symbol don't break. Will fail at runtime once the
315
+ gemini binary is gone; do not re-wire it.
316
+
317
+ Run review via gemini CLI (subscription-backed, no API spend).
318
 
319
  Used as the tail-of-chain fallback for panel slots whose external
320
  routes (HF Groq, HF Cerebras, OR free) all 413/429 on oversized
 
589
  return None
590
 
591
  for entry in chain:
592
+ # Backend tag parsing. "hf|<model>:<prov>" β†’ HF Router; everything
593
+ # else (including legacy untagged entries) β†’ OpenRouter. The bare
594
+ # "gemini" gemini-cli tail-of-chain was retired 2026-05-22 ahead of
595
+ # the gemini-cli sunset; the panel's Gemini-family voice is now an
596
+ # OpenRouter google/gemma :free entry, dispatched like any "or|".
597
  kind, sep, model = entry.partition("|")
598
  if not sep:
599
  kind, model = "or", entry # legacy bare entry β†’ OR
 
1395
  pass
1396
 
1397
 
1398
+ def _fire_compaction_pain(review_data: dict, reason: str) -> None:
1399
+ """Fire a pain signal when blind-review compaction fails closed.
1400
+
1401
+ Direct ntfy /pain POST so a curator investigates the withheld paper.
1402
+ Best-effort, never raises β€” the fail-closed PAUSE has already protected
1403
+ author identity by the time this is called; the alert is observability.
1404
+ """
1405
+ url = getattr(config, "NTFY_PAIN_URL", "")
1406
+ if not url:
1407
+ return
1408
+ import urllib.request
1409
+ rec_id = review_data.get("record_id", "?")
1410
+ title = review_data.get("title", "Untitled")
1411
+ body = (
1412
+ f"Blind-review compaction FAILED for {rec_id} ({title}): {reason}. "
1413
+ f"Paper withheld from the panel (fail-closed) and submission PAUSED. "
1414
+ f"Curator must investigate before any review can proceed."
1415
+ )
1416
+ try:
1417
+ req = urllib.request.Request(url, data=body.encode())
1418
+ req.add_header("Title", "ICSAC compaction failure β€” paper withheld")
1419
+ urllib.request.urlopen(req, timeout=5)
1420
+ except Exception:
1421
+ pass
1422
+
1423
+
1424
  def review_paper(review_data: dict) -> tuple[str, dict]:
1425
  """Run full multi-model review with self-heal + multi-pass aggregation.
1426
 
 
1454
  log=lambda m: print(m, file=__import__("sys").stderr),
1455
  )
1456
  if compaction_manifest.get("_failure"):
1457
+ # FAIL CLOSED. Blind-review compaction failed, so the manuscript
1458
+ # still carries author identifiers. Letting it reach the panel
1459
+ # would leak author identity into a supposedly blind review β€” the
1460
+ # exact integrity violation compaction exists to prevent. Withhold
1461
+ # the paper, fire a pain signal for a curator, and return a PAUSED
1462
+ # aggregate (the worker routes this to paused_panel_failure).
1463
+ reason = compaction_manifest["_failure"]
1464
  print(
1465
+ f" compaction: FAILED ({reason}); paper WITHHELD from panel "
1466
+ f"(fail-closed β€” author identity protected)",
1467
+ file=sys.stderr,
 
 
 
 
 
 
 
1468
  )
1469
+ _fire_compaction_pain(review_data, reason)
1470
+ aggregate = {
1471
+ "recommendation": "PAUSED_AI_FAILURE",
1472
+ "models_used": [],
1473
+ "failed_models": [],
1474
+ "reason": (
1475
+ f"Blind-review compaction failed ({reason}); manuscript "
1476
+ f"withheld from the panel to prevent author-identity leakage"
1477
+ ),
1478
+ "disagreement": False,
1479
+ "dimension_scores": {},
1480
+ "pass_aggregates": [],
1481
+ "dimension_stdev": {},
1482
+ "passes": 0,
1483
+ "compaction_manifest": compaction_manifest,
1484
+ }
1485
+ markdown = generate_review_markdown(review_data, [], aggregate)
1486
+ path = save_review(review_data, markdown)
1487
+ print(f" PAUSED β€” compaction failed, review withheld: {path}")
1488
+ return markdown, aggregate
1489
+
1490
+ pct = compaction_manifest.get("reduction_pct", 0)
1491
+ print(
1492
+ f" compaction: applied ({compaction_manifest.get('original_chars', 0)} -> "
1493
+ f"{compaction_manifest.get('redacted_chars', 0)} chars, {pct}% reduction)",
1494
+ file=sys.stderr,
1495
+ )
1496
 
1497
  # Build the panel-facing review_data view: redacted text + blinded
1498
  # creators in the SUBMISSION metadata block. The original review_data
1499
  # is left untouched (worker still needs the real creators for audit
1500
+ # and the apply_decision email path). Reached only on compaction
1501
+ # success β€” the failure path above already returned.
1502
  compacted_data = dict(review_data)
1503
+ compacted_data["full_text"] = (
1504
+ review_compaction.panel_notice() + redacted_text
1505
+ )
 
 
 
 
 
 
1506
  compacted_data["creators"] = [
1507
  {"name": "[author identity withheld for blind review]"}
1508
  ]
review_compaction.py CHANGED
@@ -16,25 +16,31 @@ Two intentions, one mechanism:
16
  Trimming them lets larger manuscripts fit the panel's per-route
17
  context budgets.
18
 
19
- Implementation strategy (extract-not-echo, 2026-05-18 refactor):
 
20
 
21
- Gemini is asked to IDENTIFY spans to remove (short snippets, section
22
  start/end anchors) β€” never to echo the redacted manuscript back. Python
23
  performs the actual removal via string operations against the original
24
- text. This keeps gemini's output small (a few KB regardless of paper
25
  size) and avoids the output-truncation / content-filter trips that hit
26
  large-paper echo-style runs (see the DET-paper failure mode at 28K
27
- tokens: gemini's invalid-content retry exhaustion mid-echo).
28
 
29
- Tradeoff: if gemini returns a snippet that doesn't string-match in the
30
  original (whitespace drift, hyphenation, OCR artifacts), that category
31
  silently does not redact. Logged in the manifest as a `match_failures`
32
  list so curators can spot egregious cases.
33
 
34
- Returns (redacted_text, manifest_dict). On gemini failure or empty extract
35
- output, returns (original_text, {"_failure": "<reason>"}) β€” the worker
36
- treats that as a non-fatal warning and proceeds with the un-stripped
37
- paper. A failed compaction never blocks a real submission.
 
 
 
 
 
38
  """
39
 
40
  from __future__ import annotations
@@ -133,32 +139,32 @@ _EMPTY_MANIFEST = {
133
  }
134
 
135
 
136
- def _gemini_call(prompt_input: str, *, timeout_sec: int = 600) -> tuple[str, str, int]:
137
- """Invoke gemini-cli, returning (stdout, stderr, returncode).
138
 
139
- Sets GEMINI_CLI_TRUST_WORKSPACE so headless invocations don't trip the
140
- trusted-folders gate. Uses EXTRACT_PROMPT as -p; passes the manuscript
141
- on stdin.
 
 
142
  """
143
- env = {**os.environ, "GEMINI_CLI_TRUST_WORKSPACE": "true"}
144
  proc = subprocess.run(
145
- [config.GEMINI_CMD, "-p", EXTRACT_PROMPT],
146
- input=prompt_input,
147
  capture_output=True,
148
  text=True,
149
  timeout=timeout_sec,
150
- env=env,
151
  )
152
  return proc.stdout, proc.stderr, proc.returncode
153
 
154
 
155
  def _extract_json(raw: str) -> dict | None:
156
- """Pull a single JSON object out of gemini's stdout.
157
 
158
- gemini-cli prepends warning lines on first use (256-color, ripgrep
159
- missing, etc.). We tolerate those β€” find the first '{' and parse from
160
- there. If the output is wrapped in ```json ... ``` despite the prompt
161
- asking otherwise, strip those fences first.
162
  """
163
  text = raw.strip()
164
  if text.startswith("```"):
@@ -317,10 +323,12 @@ def _apply_removals(text: str, spans: dict) -> tuple[str, dict, list]:
317
  def compact_paper(paper_text: str, *, log=None) -> tuple[str, dict]:
318
  """Strip author/identifier metadata from a manuscript for blind review.
319
 
320
- Returns (redacted_text, manifest). On any failure path the original
321
- text is returned with a manifest carrying a "_failure" reason β€” the
322
- caller treats that as a non-fatal warning and proceeds with the
323
- un-stripped paper. Compaction MUST NEVER block a submission.
 
 
324
  """
325
  def _log(msg: str) -> None:
326
  if log:
@@ -331,37 +339,38 @@ def compact_paper(paper_text: str, *, log=None) -> tuple[str, dict]:
331
  if not paper_text or not paper_text.strip():
332
  manifest = dict(_EMPTY_MANIFEST)
333
  manifest["_failure"] = "empty input"
334
- return paper_text, manifest
335
 
336
  try:
337
- stdout, stderr, rc = _gemini_call(paper_text)
338
  except subprocess.TimeoutExpired:
339
- _log(" compaction: gemini timed out; proceeding with un-stripped paper")
340
  manifest = dict(_EMPTY_MANIFEST)
341
- manifest["_failure"] = "gemini timeout"
342
- return paper_text, manifest
343
  except Exception as exc:
344
- _log(f" compaction: gemini call raised {type(exc).__name__}: {exc}")
 
345
  manifest = dict(_EMPTY_MANIFEST)
346
- manifest["_failure"] = f"gemini exception: {type(exc).__name__}"
347
- return paper_text, manifest
348
 
349
  if rc != 0:
350
- # Gemini-cli failed but we may still have partial output. Surface
351
- # the rc so curators can investigate; treat as a failure.
352
  snippet = (stderr or "")[:240].replace("\n", " ")
353
- _log(f" compaction: gemini exited {rc}; stderr: {snippet}")
354
  manifest = dict(_EMPTY_MANIFEST)
355
- manifest["_failure"] = f"gemini exit {rc}"
356
- return paper_text, manifest
357
 
358
  spans = _extract_json(stdout)
359
  if spans is None:
360
- _log(f" compaction: gemini output not parseable as JSON "
361
- f"({len(stdout)} chars stdout)")
362
  manifest = dict(_EMPTY_MANIFEST)
363
- manifest["_failure"] = "gemini output unparseable"
364
- return paper_text, manifest
365
 
366
  redacted, manifest, match_failures = _apply_removals(paper_text, spans)
367
 
@@ -386,7 +395,9 @@ def render_manifest(manifest: dict) -> str:
386
  """Render a manifest dict as a plain-text bulleted list for the
387
  decision email's compaction disclosure block."""
388
  if manifest.get("_failure"):
389
- return f"(Compaction not applied: {manifest['_failure']}. The panel reviewed the un-stripped manuscript.)"
 
 
390
 
391
  lines = []
392
  if manifest.get("author_names"):
 
16
  Trimming them lets larger manuscripts fit the panel's per-route
17
  context budgets.
18
 
19
+ Implementation strategy (extract-not-echo, 2026-05-18 refactor; repointed
20
+ from gemini-cli to `claude -p` 2026-05-22 ahead of the gemini-cli sunset):
21
 
22
+ The model is asked to IDENTIFY spans to remove (short snippets, section
23
  start/end anchors) β€” never to echo the redacted manuscript back. Python
24
  performs the actual removal via string operations against the original
25
+ text. This keeps the model's output small (a few KB regardless of paper
26
  size) and avoids the output-truncation / content-filter trips that hit
27
  large-paper echo-style runs (see the DET-paper failure mode at 28K
28
+ tokens: invalid-content retry exhaustion mid-echo).
29
 
30
+ Tradeoff: if the model returns a snippet that doesn't string-match in the
31
  original (whitespace drift, hyphenation, OCR artifacts), that category
32
  silently does not redact. Logged in the manifest as a `match_failures`
33
  list so curators can spot egregious cases.
34
 
35
+ Returns (redacted_text, manifest_dict).
36
+
37
+ FAIL CLOSED (2026-05-22): on ANY compaction failure (model timeout,
38
+ exception, non-zero exit, unparseable output, empty input) this returns
39
+ ("", {"_failure": "<reason>"}) β€” an EMPTY redacted text, never the
40
+ original. Author identity must never reach the blind panel. The caller
41
+ (review.review_paper) detects the "_failure" key, withholds the paper from
42
+ the panel, fires a pain signal, and pauses the submission for a curator.
43
+ A failed compaction blocks the paper rather than leaking identity.
44
  """
45
 
46
  from __future__ import annotations
 
139
  }
140
 
141
 
142
+ def _claude_call(manuscript: str, *, timeout_sec: int = 600) -> tuple[str, str, int]:
143
+ """Invoke `claude -p`, returning (stdout, stderr, returncode).
144
 
145
+ `claude -p` reads its prompt from stdin, so EXTRACT_PROMPT and the
146
+ manuscript are concatenated (prompt first, blank line, then manuscript)
147
+ and fed via input=. EXTRACT_PROMPT is deliberately NOT passed as a
148
+ positional arg. Claude returns the same JSON manifest schema the rest of
149
+ this module expects (_extract_json parses it).
150
  """
 
151
  proc = subprocess.run(
152
+ [config.CLAUDE_CMD, "-p"],
153
+ input=EXTRACT_PROMPT + "\n\n" + manuscript,
154
  capture_output=True,
155
  text=True,
156
  timeout=timeout_sec,
 
157
  )
158
  return proc.stdout, proc.stderr, proc.returncode
159
 
160
 
161
  def _extract_json(raw: str) -> dict | None:
162
+ """Pull a single JSON object out of the model's stdout.
163
 
164
+ The CLI may prepend incidental lines before the JSON. We tolerate
165
+ those β€” find the first '{' and parse from there. If the output is
166
+ wrapped in ```json ... ``` despite the prompt asking otherwise, strip
167
+ those fences first.
168
  """
169
  text = raw.strip()
170
  if text.startswith("```"):
 
323
  def compact_paper(paper_text: str, *, log=None) -> tuple[str, dict]:
324
  """Strip author/identifier metadata from a manuscript for blind review.
325
 
326
+ Returns (redacted_text, manifest). FAIL CLOSED: on any failure path the
327
+ redacted text is the EMPTY STRING (never the original) and the manifest
328
+ carries a "_failure" reason. The caller (review.review_paper) must treat
329
+ a "_failure" manifest as a hard stop β€” withhold the paper from the panel
330
+ and pause the submission β€” so author identity can never leak into a
331
+ "blind" review. Compaction failure BLOCKS the paper.
332
  """
333
  def _log(msg: str) -> None:
334
  if log:
 
339
  if not paper_text or not paper_text.strip():
340
  manifest = dict(_EMPTY_MANIFEST)
341
  manifest["_failure"] = "empty input"
342
+ return "", manifest
343
 
344
  try:
345
+ stdout, stderr, rc = _claude_call(paper_text)
346
  except subprocess.TimeoutExpired:
347
+ _log(" compaction: claude timed out; FAILING CLOSED (paper withheld from panel)")
348
  manifest = dict(_EMPTY_MANIFEST)
349
+ manifest["_failure"] = "claude timeout"
350
+ return "", manifest
351
  except Exception as exc:
352
+ _log(f" compaction: claude call raised {type(exc).__name__}: {exc}; "
353
+ f"FAILING CLOSED (paper withheld from panel)")
354
  manifest = dict(_EMPTY_MANIFEST)
355
+ manifest["_failure"] = f"claude exception: {type(exc).__name__}"
356
+ return "", manifest
357
 
358
  if rc != 0:
359
+ # claude -p failed. Surface the rc so curators can investigate;
360
+ # treat as a hard failure (fail closed).
361
  snippet = (stderr or "")[:240].replace("\n", " ")
362
+ _log(f" compaction: claude exited {rc}; stderr: {snippet}; FAILING CLOSED")
363
  manifest = dict(_EMPTY_MANIFEST)
364
+ manifest["_failure"] = f"claude exit {rc}"
365
+ return "", manifest
366
 
367
  spans = _extract_json(stdout)
368
  if spans is None:
369
+ _log(f" compaction: claude output not parseable as JSON "
370
+ f"({len(stdout)} chars stdout); FAILING CLOSED")
371
  manifest = dict(_EMPTY_MANIFEST)
372
+ manifest["_failure"] = "claude output unparseable"
373
+ return "", manifest
374
 
375
  redacted, manifest, match_failures = _apply_removals(paper_text, spans)
376
 
 
395
  """Render a manifest dict as a plain-text bulleted list for the
396
  decision email's compaction disclosure block."""
397
  if manifest.get("_failure"):
398
+ return (f"(Compaction failed: {manifest['_failure']}. The manuscript was "
399
+ f"WITHHELD from the panel to prevent author-identity leakage; "
400
+ f"the submission was paused for curator review.)")
401
 
402
  lines = []
403
  if manifest.get("author_names"):