Static-analysis tool for the pre-1.0 API freeze: walks src/**/*.cpp with
tree-sitter-cpp, locates every PyMethodDef and PyGetSetDef array initializer,
and classifies each entry's docstring slot as MACRO / RAW_STRING / NULL /
MISSING. The MACRO classification (MCRF_METHOD or MCRF_PROPERTY from
McRFPy_Doc.h) is the project's compliance target; raw string literals and
NULL docs predate the macro system and need migration before the 1.0 freeze.
Features:
- Per-file table (file:line, array, entry, classification) plus summary
footer with totals, per-kind breakdown, % MACRO compliance, and a
ranked list of the worst-offender files.
- --strict exits nonzero when any non-MACRO entries are present (CI use).
- --quiet suppresses per-file tables and prints only the summary.
- --paths PATH [PATH ...] limits the scan to specific files / directories.
- Sentinel terminator entries ({NULL}, {0}) are filtered out.
- Concatenated string literals and parenthesized expressions are handled.
Dependencies live in a project-local .venv-audit (added to .gitignore) so
no system Python pollution. tools/generate_all_docs.sh now invokes the
auditor at the end (informational, no --strict) so doc regeneration
surfaces compliance drift alongside the generated docs themselves.
Initial baseline on the current tree: 755 entries scanned, 340 MACRO
compliant (45.0%). Top offenders: UIEntity.cpp (51), 3d/PyVoxelGrid.cpp
(36), 3d/Viewport3D.cpp (31), UIGridPyProperties.cpp (30),
3d/Entity3D.cpp (28).
Refs pre-1.0 documentation freeze.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
463 lines
16 KiB
Python
Executable file
463 lines
16 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
audit_pymethoddef.py - Static-analysis tool for McRogueFace Python bindings.
|
|
|
|
Walks src/**/*.cpp, parses each file with tree-sitter-cpp, and locates every
|
|
`PyMethodDef <name>[] = {...}` and `PyGetSetDef <name>[] = {...}` declaration.
|
|
For each entry inside those array initializers, classifies the docstring slot:
|
|
|
|
MACRO - uses MCRF_METHOD(...) or MCRF_PROPERTY(...)
|
|
RAW_STRING - inline C string literal (or concatenated string literals)
|
|
NULL - explicit NULL literal
|
|
MISSING - entry too short to have a doc field (probably malformed)
|
|
|
|
The `MACRO` classification is the project's compliance target. RAW_STRING and
|
|
NULL entries should be migrated to the macro system before the 1.0 API freeze.
|
|
|
|
Sentinel terminator entries (e.g. `{NULL}`, `{0}`) are skipped.
|
|
|
|
Usage:
|
|
python3 tools/audit_pymethoddef.py [--strict] [--quiet]
|
|
[--paths PATH [PATH ...]]
|
|
|
|
Flags:
|
|
--strict Exit nonzero if any non-MACRO entries are found (CI mode).
|
|
--quiet Suppress per-file output, print only the summary.
|
|
--paths Restrict scan to the given files/directories. Defaults to src/.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
from collections import Counter, defaultdict
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable, List, Optional, Tuple
|
|
|
|
try:
|
|
import tree_sitter_cpp
|
|
from tree_sitter import Language, Parser
|
|
except ImportError as e:
|
|
sys.stderr.write(
|
|
"ERROR: tree-sitter / tree-sitter-cpp not installed.\n"
|
|
"Activate the audit venv first:\n"
|
|
" source .venv-audit/bin/activate\n"
|
|
f"(import error: {e})\n"
|
|
)
|
|
sys.exit(2)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tree-sitter setup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_LANG = Language(tree_sitter_cpp.language())
|
|
_PARSER = Parser(_LANG)
|
|
|
|
# Indices of the docstring field in the struct initializer.
|
|
# PyMethodDef: {ml_name, ml_meth, ml_flags, ml_doc} -> idx 3
|
|
# PyGetSetDef: {name, get, set, doc, closure} -> idx 3
|
|
DOC_FIELD_INDEX = 3
|
|
EXPECTED_MIN_FIELDS = {
|
|
"PyMethodDef": 4, # need at least 4 to have ml_doc
|
|
"PyGetSetDef": 4, # need at least 4 to have doc (closure can be omitted)
|
|
}
|
|
|
|
# Punctuation/structural child node types we ignore when walking entry fields.
|
|
_PUNCT_TYPES = {"{", "}", ",", "(", ")", "[", "]", ";"}
|
|
|
|
# Macros that mark a docstring slot as compliant.
|
|
_COMPLIANT_MACROS = {"MCRF_METHOD", "MCRF_PROPERTY"}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data records
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class EntryRecord:
|
|
file_path: Path
|
|
line: int # 1-based
|
|
array_kind: str # "PyMethodDef" or "PyGetSetDef"
|
|
array_name: str # e.g. "PyAnimation::methods"
|
|
entry_name: str # ml_name / name string, or "<unknown>"
|
|
classification: str # MACRO / RAW_STRING / NULL / MISSING
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tree-sitter helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _node_text(src: bytes, node) -> str:
|
|
return src[node.start_byte:node.end_byte].decode("utf-8", errors="replace")
|
|
|
|
|
|
def _meaningful_children(node) -> List:
|
|
"""Return children of an initializer_list, skipping punctuation tokens."""
|
|
return [c for c in node.children if c.type not in _PUNCT_TYPES]
|
|
|
|
|
|
def _classify_doc_field(src: bytes, doc_node) -> str:
|
|
"""Map a docstring AST node to a classification string."""
|
|
t = doc_node.type
|
|
if t == "null":
|
|
return "NULL"
|
|
if t in ("string_literal", "concatenated_string", "raw_string_literal"):
|
|
return "RAW_STRING"
|
|
if t == "call_expression":
|
|
# The first child of call_expression is the callee identifier.
|
|
callee = doc_node.child_by_field_name("function")
|
|
if callee is None and doc_node.children:
|
|
callee = doc_node.children[0]
|
|
if callee is not None:
|
|
name = _node_text(src, callee).strip()
|
|
if name in _COMPLIANT_MACROS:
|
|
return "MACRO"
|
|
return "RAW_STRING" # call expression to something non-MACRO
|
|
if t == "identifier":
|
|
# Bare identifier in the doc slot - could be a #define alias. Treat as
|
|
# raw (non-compliant) so the user investigates it.
|
|
text = _node_text(src, doc_node).strip()
|
|
if text == "NULL":
|
|
return "NULL"
|
|
return "RAW_STRING"
|
|
# Anything else (parenthesized_expression, etc.) - inspect text fallback.
|
|
text = _node_text(src, doc_node).strip()
|
|
stripped = text.lstrip("(").lstrip()
|
|
for macro in _COMPLIANT_MACROS:
|
|
if stripped.startswith(macro + "("):
|
|
return "MACRO"
|
|
if stripped == "NULL":
|
|
return "NULL"
|
|
return "RAW_STRING"
|
|
|
|
|
|
def _entry_name(src: bytes, entry_node) -> str:
|
|
"""Return the first string literal in an entry initializer (the ml_name)."""
|
|
for c in _meaningful_children(entry_node):
|
|
if c.type == "string_literal":
|
|
# string_literal contains string_content children
|
|
for sub in c.children:
|
|
if sub.type == "string_content":
|
|
return _node_text(src, sub)
|
|
return _node_text(src, c).strip('"')
|
|
if c.type == "concatenated_string":
|
|
for lit in c.children:
|
|
if lit.type == "string_literal":
|
|
for sub in lit.children:
|
|
if sub.type == "string_content":
|
|
return _node_text(src, sub)
|
|
return _node_text(src, c)
|
|
# Stop at the first non-string field - the name should come first.
|
|
break
|
|
return "<unknown>"
|
|
|
|
|
|
def _is_sentinel(src: bytes, entry_node) -> bool:
|
|
"""True if the entry looks like a sentinel terminator (e.g. {NULL} / {0})."""
|
|
fields = _meaningful_children(entry_node)
|
|
if not fields:
|
|
return True
|
|
if len(fields) == 1:
|
|
only = fields[0]
|
|
if only.type == "null":
|
|
return True
|
|
if only.type == "number_literal" and _node_text(src, only).strip() == "0":
|
|
return True
|
|
# Some codebases write {NULL, NULL, NULL, NULL}. Treat all-NULL/0 as sentinel.
|
|
for f in fields:
|
|
if f.type == "null":
|
|
continue
|
|
if f.type == "number_literal" and _node_text(src, f).strip() == "0":
|
|
continue
|
|
return False
|
|
return True
|
|
|
|
|
|
def _array_name_from_init_declarator(src: bytes, init_decl) -> Optional[str]:
|
|
"""Extract the array name from an init_declarator containing array_declarator."""
|
|
arr = None
|
|
for c in init_decl.children:
|
|
if c.type == "array_declarator":
|
|
arr = c
|
|
break
|
|
if arr is None:
|
|
return None
|
|
# The first child is the declarator (identifier or qualified_identifier).
|
|
for c in arr.children:
|
|
if c.type in ("identifier", "qualified_identifier", "field_identifier"):
|
|
return _node_text(src, c)
|
|
return None
|
|
|
|
|
|
def _outer_initializer_list(init_decl):
|
|
"""Get the top-level initializer_list child of an init_declarator, if any."""
|
|
for c in init_decl.children:
|
|
if c.type == "initializer_list":
|
|
return c
|
|
return None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-file scan
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _walk_declarations(node, out: list) -> None:
|
|
"""Collect all `declaration` nodes under `node` (recursive)."""
|
|
if node.type == "declaration":
|
|
out.append(node)
|
|
for c in node.children:
|
|
_walk_declarations(c, out)
|
|
|
|
|
|
def scan_file(path: Path) -> List[EntryRecord]:
|
|
try:
|
|
src = path.read_bytes()
|
|
except OSError as e:
|
|
sys.stderr.write(f"WARNING: cannot read {path}: {e}\n")
|
|
return []
|
|
|
|
tree = _PARSER.parse(src)
|
|
decls: list = []
|
|
_walk_declarations(tree.root_node, decls)
|
|
|
|
records: List[EntryRecord] = []
|
|
for decl in decls:
|
|
# Find the type_identifier child to determine if this is one of ours.
|
|
type_kind = None
|
|
for c in decl.children:
|
|
if c.type == "type_identifier":
|
|
txt = _node_text(src, c).strip()
|
|
if txt in EXPECTED_MIN_FIELDS:
|
|
type_kind = txt
|
|
break
|
|
if type_kind is None:
|
|
continue
|
|
|
|
# Each declaration may have multiple init_declarators (rare for arrays
|
|
# but cheap to handle).
|
|
for c in decl.children:
|
|
if c.type != "init_declarator":
|
|
continue
|
|
outer_init = _outer_initializer_list(c)
|
|
if outer_init is None:
|
|
continue # forward decl or extern - no initializer
|
|
array_name = _array_name_from_init_declarator(src, c) or "<anon>"
|
|
|
|
# Each direct child initializer_list is an entry.
|
|
for entry in outer_init.children:
|
|
if entry.type != "initializer_list":
|
|
continue
|
|
if _is_sentinel(src, entry):
|
|
continue
|
|
|
|
fields = _meaningful_children(entry)
|
|
line = entry.start_point[0] + 1 # tree-sitter is 0-based
|
|
name = _entry_name(src, entry)
|
|
|
|
if len(fields) <= DOC_FIELD_INDEX:
|
|
records.append(EntryRecord(
|
|
file_path=path,
|
|
line=line,
|
|
array_kind=type_kind,
|
|
array_name=array_name,
|
|
entry_name=name,
|
|
classification="MISSING",
|
|
))
|
|
continue
|
|
|
|
doc_node = fields[DOC_FIELD_INDEX]
|
|
classification = _classify_doc_field(src, doc_node)
|
|
records.append(EntryRecord(
|
|
file_path=path,
|
|
line=line,
|
|
array_kind=type_kind,
|
|
array_name=array_name,
|
|
entry_name=name,
|
|
classification=classification,
|
|
))
|
|
return records
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Path resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _iter_cpp_files(roots: Iterable[Path]) -> Iterable[Path]:
|
|
for root in roots:
|
|
if root.is_file():
|
|
if root.suffix == ".cpp":
|
|
yield root
|
|
continue
|
|
if not root.exists():
|
|
sys.stderr.write(f"WARNING: path does not exist: {root}\n")
|
|
continue
|
|
for dirpath, _dirnames, filenames in os.walk(root):
|
|
for fn in filenames:
|
|
if fn.endswith(".cpp"):
|
|
yield Path(dirpath) / fn
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Reporting
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _print_file_table(records: List[EntryRecord], project_root: Path) -> None:
|
|
by_file: dict[Path, List[EntryRecord]] = defaultdict(list)
|
|
for r in records:
|
|
by_file[r.file_path].append(r)
|
|
|
|
for path in sorted(by_file):
|
|
try:
|
|
rel = path.relative_to(project_root)
|
|
except ValueError:
|
|
rel = path
|
|
entries = sorted(by_file[path], key=lambda r: r.line)
|
|
|
|
# Compute column widths for this file.
|
|
loc_w = max(len(f"{rel}:{e.line}") for e in entries)
|
|
arr_w = max(len(e.array_name) for e in entries)
|
|
ent_w = max(len(e.entry_name) for e in entries)
|
|
loc_w = max(loc_w, len("file:line"))
|
|
arr_w = max(arr_w, len("array"))
|
|
ent_w = max(ent_w, len("entry"))
|
|
|
|
header = (
|
|
f"{'file:line':<{loc_w}} "
|
|
f"{'array':<{arr_w}} "
|
|
f"{'entry':<{ent_w}} "
|
|
f"classification"
|
|
)
|
|
print(header)
|
|
print("-" * len(header))
|
|
for e in entries:
|
|
loc = f"{rel}:{e.line}"
|
|
print(
|
|
f"{loc:<{loc_w}} "
|
|
f"{e.array_name:<{arr_w}} "
|
|
f"{e.entry_name:<{ent_w}} "
|
|
f"{e.classification}"
|
|
)
|
|
print()
|
|
|
|
|
|
def _print_summary(records: List[EntryRecord]) -> None:
|
|
total = len(records)
|
|
counts = Counter(r.classification for r in records)
|
|
macro = counts.get("MACRO", 0)
|
|
raw = counts.get("RAW_STRING", 0)
|
|
null = counts.get("NULL", 0)
|
|
missing = counts.get("MISSING", 0)
|
|
pct = (macro / total * 100.0) if total else 0.0
|
|
|
|
print("=" * 60)
|
|
print("PyMethodDef / PyGetSetDef Documentation Audit Summary")
|
|
print("=" * 60)
|
|
print(f"Total entries scanned : {total}")
|
|
print(f" MACRO compliant : {macro}")
|
|
print(f" RAW_STRING : {raw}")
|
|
print(f" NULL : {null}")
|
|
print(f" MISSING : {missing}")
|
|
print(f"MACRO compliance : {pct:.1f}%")
|
|
|
|
# Per-kind breakdown.
|
|
by_kind = defaultdict(Counter)
|
|
for r in records:
|
|
by_kind[r.array_kind][r.classification] += 1
|
|
if by_kind:
|
|
print()
|
|
print("Breakdown by kind:")
|
|
for kind in sorted(by_kind):
|
|
kc = by_kind[kind]
|
|
kt = sum(kc.values())
|
|
kp = (kc.get("MACRO", 0) / kt * 100.0) if kt else 0.0
|
|
print(
|
|
f" {kind:<13} total={kt:<4} "
|
|
f"MACRO={kc.get('MACRO', 0):<4} "
|
|
f"RAW={kc.get('RAW_STRING', 0):<4} "
|
|
f"NULL={kc.get('NULL', 0):<4} "
|
|
f"MISSING={kc.get('MISSING', 0):<4} "
|
|
f"({kp:.1f}% compliant)"
|
|
)
|
|
|
|
# Top offenders.
|
|
offenders: dict[Path, int] = defaultdict(int)
|
|
for r in records:
|
|
if r.classification != "MACRO":
|
|
offenders[r.file_path] += 1
|
|
if offenders:
|
|
print()
|
|
print("Top non-compliant files:")
|
|
ranked = sorted(offenders.items(), key=lambda kv: kv[1], reverse=True)
|
|
for path, count in ranked[:10]:
|
|
try:
|
|
rel = path.relative_to(Path.cwd())
|
|
except ValueError:
|
|
rel = path
|
|
print(f" {count:>4} {rel}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _default_roots() -> List[Path]:
|
|
cwd = Path.cwd()
|
|
src = cwd / "src"
|
|
return [src] if src.exists() else [cwd]
|
|
|
|
|
|
def main(argv: Optional[List[str]] = None) -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"Audit PyMethodDef / PyGetSetDef entries in McRogueFace C++ "
|
|
"sources for MCRF_METHOD / MCRF_PROPERTY documentation macro use."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--strict", action="store_true",
|
|
help="Exit nonzero if any non-MACRO entries are found (CI mode)."
|
|
)
|
|
parser.add_argument(
|
|
"--quiet", action="store_true",
|
|
help="Print only the summary, omit per-file tables."
|
|
)
|
|
parser.add_argument(
|
|
"--paths", nargs="+", type=Path, default=None,
|
|
help="Files or directories to scan (default: ./src)."
|
|
)
|
|
args = parser.parse_args(argv)
|
|
|
|
roots = args.paths if args.paths else _default_roots()
|
|
project_root = Path.cwd()
|
|
|
|
files = sorted(set(_iter_cpp_files(roots)))
|
|
if not files:
|
|
sys.stderr.write("WARNING: no .cpp files found.\n")
|
|
return 0
|
|
|
|
all_records: List[EntryRecord] = []
|
|
for f in files:
|
|
all_records.extend(scan_file(f))
|
|
|
|
if not args.quiet:
|
|
if all_records:
|
|
_print_file_table(all_records, project_root)
|
|
else:
|
|
print("(no PyMethodDef / PyGetSetDef arrays found)")
|
|
print()
|
|
|
|
_print_summary(all_records)
|
|
|
|
if args.strict:
|
|
non_macro = sum(
|
|
1 for r in all_records if r.classification != "MACRO"
|
|
)
|
|
if non_macro:
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|