//
··
1#!/usr/bin/env python3
2"""Merge edges from build/main.rrxiv.aux into build/main.cir.json.
3
4The rrxiv-python parser only extracts claim-to-claim edges where the
5\\dependson{}{} arguments are already in the canonical paper:label
6form. This paper uses short-form labels (I.1, I.47, etc.) because
7the proof DAG is dense and short labels keep the source readable.
8
9This post-processor reads the sidecar, filters to claim-to-claim
10edges only (drops post:*, def:*, cn:* targets), and prefixes them
11with the canonical paper id.
12
13Usage:
14 scripts/merge-sidecar-edges.py
15"""
16
17from __future__ import annotations
18
19import json
20import re
21from pathlib import Path
22
23PAPER_ID = "01923f8e-0009-7c4d-9e1f-3a2b1c0d4e5f"
24ROOT = Path(__file__).resolve().parent.parent
25CIR_PATH = ROOT / "build" / "main.cir.json"
26AUX_PATH = ROOT / "build" / "main.rrxiv.aux"
27META_PATH = ROOT / "rrxiv-meta.json"
28
29# Claim labels in book*.tex are uppercase Roman.Arabic — I.1, II.12, etc.
30# (Not post:N, cn:N, def:I.N — those are postulates/common notions/defs.)
31CLAIM_LABEL_RE = re.compile(r"^[IVXLC]+\.\d+(\.\d+)?$")
32EDGE_RE = re.compile(r"^RRXIV:edge:(depends_on|supports|contradicts|extends):([^|]+)\|(.+)$")
33
34
35def main() -> int:
36 if not CIR_PATH.is_file():
37 raise SystemExit(f"missing {CIR_PATH}")
38 if not AUX_PATH.is_file():
39 raise SystemExit(f"missing {AUX_PATH}")
40
41 cir = json.loads(CIR_PATH.read_text())
42
43 # Rewrite the canonical paper-level fields. The parser sets paper_id /
44 # claim.id prefixes to the rrxiv-meta slug ("rrxiv-paper-euclid-elements")
45 # which is fine for build artefacts but the deployed instance keys
46 # everything off the canonical UUID. Patch both top-level + each
47 # claim so re-ingest finds them by paper_id.
48 cir["id"] = PAPER_ID
49 cir.setdefault("id_slug", "rrxiv:2605.00009")
50
51 # Overlay structured authors + based_on + license + topics from
52 # rrxiv-meta.json. The parser captures the LaTeX \author{} arg
53 # verbatim — for Euclid that includes a \\\and\small annotation
54 # we want kept in the rendered PDF but cleaned out of the CIR.
55 # rrxiv-meta.json carries the canonical structured author list
56 # (one entry per author, with orcid + is_agent + agent_handle),
57 # so use it as the source of truth here.
58 if META_PATH.is_file():
59 meta = json.loads(META_PATH.read_text())
60 if isinstance(meta.get("authors"), list) and meta["authors"]:
61 cir["authors"] = meta["authors"]
62 for key in ("license", "topics", "based_on"):
63 if key in meta and meta[key] is not None:
64 cir[key] = meta[key]
65 # `version` from meta is authoritative too (e.g. "v2" after a
66 # revision); fall back to whatever the parser set otherwise.
67 if meta.get("version"):
68 cir["version"] = meta["version"]
69 for c in cir.get("claims", []):
70 c["paper_id"] = PAPER_ID
71 # `id` may be either parser-shape ("rrxiv-paper-euclid-elements:prop:I.1")
72 # or already canonical. Normalise to canonical.
73 idx = c["id"].rfind(":prop:")
74 if idx >= 0:
75 short = c["id"][idx + len(":prop:") :]
76 c["id"] = f"{PAPER_ID}:prop:{short}"
77 # Same rewriting for any inter-claim edges already on the claim.
78 for key in ("depends_on", "supports", "contradicts", "extends"):
79 c.setdefault(key, [])
80 c[key] = [
81 t if ":prop:" not in t
82 else f"{PAPER_ID}:prop:{t.rsplit(':prop:', 1)[1]}"
83 for t in c[key]
84 ]
85
86 claims_by_short: dict[str, dict] = {}
87 for c in cir.get("claims", []):
88 idx = c["id"].rfind(":prop:")
89 if idx >= 0:
90 short = c["id"][idx + len(":prop:") :]
91 claims_by_short[short] = c
92
93 merged = 0
94 skipped = 0
95 for line in AUX_PATH.read_text().splitlines():
96 m = EDGE_RE.match(line)
97 if not m:
98 continue
99 kind, src, tgt = m.group(1), m.group(2).strip(), m.group(3).strip()
100 # Only claim → claim edges.
101 if not (CLAIM_LABEL_RE.match(src) and CLAIM_LABEL_RE.match(tgt)):
102 skipped += 1
103 continue
104 claim = claims_by_short.get(src)
105 if claim is None:
106 skipped += 1
107 continue
108 full_target = f"{PAPER_ID}:prop:{tgt}"
109 if full_target not in claim[kind]:
110 claim[kind].append(full_target)
111 merged += 1
112
113 CIR_PATH.write_text(json.dumps(cir, indent=2) + "\n")
114 print(f"merged {merged} claim-to-claim edges; skipped {skipped} non-claim edges")
115 return 0
116
117
118if __name__ == "__main__":
119 raise SystemExit(main())
120