Add tools/audit_pymethoddef.py - MCRF_METHOD/MCRF_PROPERTY compliance auditor
Static-analysis tool for the pre-1.0 API freeze: walks src/**/*.cpp with
tree-sitter-cpp, locates every PyMethodDef and PyGetSetDef array initializer,
and classifies each entry's docstring slot as MACRO / RAW_STRING / NULL /
MISSING. The MACRO classification (MCRF_METHOD or MCRF_PROPERTY from
McRFPy_Doc.h) is the project's compliance target; raw string literals and
NULL docs predate the macro system and need migration before the 1.0 freeze.
Features:
- Per-file table (file:line, array, entry, classification) plus summary
footer with totals, per-kind breakdown, % MACRO compliance, and a
ranked list of the worst-offender files.
- --strict exits nonzero when any non-MACRO entries are present (CI use).
- --quiet suppresses per-file tables and prints only the summary.
- --paths PATH [PATH ...] limits the scan to specific files / directories.
- Sentinel terminator entries ({NULL}, {0}) are filtered out.
- Concatenated string literals and parenthesized expressions are handled.
Dependencies live in a project-local .venv-audit (added to .gitignore) so
no system Python pollution. tools/generate_all_docs.sh now invokes the
auditor at the end (informational, no --strict) so doc regeneration
surfaces compliance drift alongside the generated docs themselves.
Initial baseline on the current tree: 755 entries scanned, 340 MACRO
compliant (45.0%). Top offenders: UIEntity.cpp (51), 3d/PyVoxelGrid.cpp
(36), 3d/Viewport3D.cpp (31), UIGridPyProperties.cpp (30),
3d/Entity3D.cpp (28).
Refs pre-1.0 documentation freeze.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3030ac488b
commit
626d5ae708
3 changed files with 487 additions and 0 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -33,6 +33,9 @@ __lib_windows/
|
|||
build-windows/
|
||||
build_windows/
|
||||
_oldscripts/
|
||||
|
||||
# Audit tooling virtualenv (tools/audit_pymethoddef.py)
|
||||
.venv-audit/
|
||||
assets/
|
||||
cellular_automata_fire/
|
||||
deps/
|
||||
|
|
|
|||
463
tools/audit_pymethoddef.py
Executable file
463
tools/audit_pymethoddef.py
Executable file
|
|
@ -0,0 +1,463 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
audit_pymethoddef.py - Static-analysis tool for McRogueFace Python bindings.
|
||||
|
||||
Walks src/**/*.cpp, parses each file with tree-sitter-cpp, and locates every
|
||||
`PyMethodDef <name>[] = {...}` and `PyGetSetDef <name>[] = {...}` declaration.
|
||||
For each entry inside those array initializers, classifies the docstring slot:
|
||||
|
||||
MACRO - uses MCRF_METHOD(...) or MCRF_PROPERTY(...)
|
||||
RAW_STRING - inline C string literal (or concatenated string literals)
|
||||
NULL - explicit NULL literal
|
||||
MISSING - entry too short to have a doc field (probably malformed)
|
||||
|
||||
The `MACRO` classification is the project's compliance target. RAW_STRING and
|
||||
NULL entries should be migrated to the macro system before the 1.0 API freeze.
|
||||
|
||||
Sentinel terminator entries (e.g. `{NULL}`, `{0}`) are skipped.
|
||||
|
||||
Usage:
|
||||
python3 tools/audit_pymethoddef.py [--strict] [--quiet]
|
||||
[--paths PATH [PATH ...]]
|
||||
|
||||
Flags:
|
||||
--strict Exit nonzero if any non-MACRO entries are found (CI mode).
|
||||
--quiet Suppress per-file output, print only the summary.
|
||||
--paths Restrict scan to the given files/directories. Defaults to src/.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
|
||||
try:
|
||||
import tree_sitter_cpp
|
||||
from tree_sitter import Language, Parser
|
||||
except ImportError as e:
|
||||
sys.stderr.write(
|
||||
"ERROR: tree-sitter / tree-sitter-cpp not installed.\n"
|
||||
"Activate the audit venv first:\n"
|
||||
" source .venv-audit/bin/activate\n"
|
||||
f"(import error: {e})\n"
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tree-sitter setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LANG = Language(tree_sitter_cpp.language())
|
||||
_PARSER = Parser(_LANG)
|
||||
|
||||
# Indices of the docstring field in the struct initializer.
|
||||
# PyMethodDef: {ml_name, ml_meth, ml_flags, ml_doc} -> idx 3
|
||||
# PyGetSetDef: {name, get, set, doc, closure} -> idx 3
|
||||
DOC_FIELD_INDEX = 3
|
||||
EXPECTED_MIN_FIELDS = {
|
||||
"PyMethodDef": 4, # need at least 4 to have ml_doc
|
||||
"PyGetSetDef": 4, # need at least 4 to have doc (closure can be omitted)
|
||||
}
|
||||
|
||||
# Punctuation/structural child node types we ignore when walking entry fields.
|
||||
_PUNCT_TYPES = {"{", "}", ",", "(", ")", "[", "]", ";"}
|
||||
|
||||
# Macros that mark a docstring slot as compliant.
|
||||
_COMPLIANT_MACROS = {"MCRF_METHOD", "MCRF_PROPERTY"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data records
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class EntryRecord:
|
||||
file_path: Path
|
||||
line: int # 1-based
|
||||
array_kind: str # "PyMethodDef" or "PyGetSetDef"
|
||||
array_name: str # e.g. "PyAnimation::methods"
|
||||
entry_name: str # ml_name / name string, or "<unknown>"
|
||||
classification: str # MACRO / RAW_STRING / NULL / MISSING
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tree-sitter helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _node_text(src: bytes, node) -> str:
|
||||
return src[node.start_byte:node.end_byte].decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def _meaningful_children(node) -> List:
|
||||
"""Return children of an initializer_list, skipping punctuation tokens."""
|
||||
return [c for c in node.children if c.type not in _PUNCT_TYPES]
|
||||
|
||||
|
||||
def _classify_doc_field(src: bytes, doc_node) -> str:
|
||||
"""Map a docstring AST node to a classification string."""
|
||||
t = doc_node.type
|
||||
if t == "null":
|
||||
return "NULL"
|
||||
if t in ("string_literal", "concatenated_string", "raw_string_literal"):
|
||||
return "RAW_STRING"
|
||||
if t == "call_expression":
|
||||
# The first child of call_expression is the callee identifier.
|
||||
callee = doc_node.child_by_field_name("function")
|
||||
if callee is None and doc_node.children:
|
||||
callee = doc_node.children[0]
|
||||
if callee is not None:
|
||||
name = _node_text(src, callee).strip()
|
||||
if name in _COMPLIANT_MACROS:
|
||||
return "MACRO"
|
||||
return "RAW_STRING" # call expression to something non-MACRO
|
||||
if t == "identifier":
|
||||
# Bare identifier in the doc slot - could be a #define alias. Treat as
|
||||
# raw (non-compliant) so the user investigates it.
|
||||
text = _node_text(src, doc_node).strip()
|
||||
if text == "NULL":
|
||||
return "NULL"
|
||||
return "RAW_STRING"
|
||||
# Anything else (parenthesized_expression, etc.) - inspect text fallback.
|
||||
text = _node_text(src, doc_node).strip()
|
||||
stripped = text.lstrip("(").lstrip()
|
||||
for macro in _COMPLIANT_MACROS:
|
||||
if stripped.startswith(macro + "("):
|
||||
return "MACRO"
|
||||
if stripped == "NULL":
|
||||
return "NULL"
|
||||
return "RAW_STRING"
|
||||
|
||||
|
||||
def _entry_name(src: bytes, entry_node) -> str:
|
||||
"""Return the first string literal in an entry initializer (the ml_name)."""
|
||||
for c in _meaningful_children(entry_node):
|
||||
if c.type == "string_literal":
|
||||
# string_literal contains string_content children
|
||||
for sub in c.children:
|
||||
if sub.type == "string_content":
|
||||
return _node_text(src, sub)
|
||||
return _node_text(src, c).strip('"')
|
||||
if c.type == "concatenated_string":
|
||||
for lit in c.children:
|
||||
if lit.type == "string_literal":
|
||||
for sub in lit.children:
|
||||
if sub.type == "string_content":
|
||||
return _node_text(src, sub)
|
||||
return _node_text(src, c)
|
||||
# Stop at the first non-string field - the name should come first.
|
||||
break
|
||||
return "<unknown>"
|
||||
|
||||
|
||||
def _is_sentinel(src: bytes, entry_node) -> bool:
|
||||
"""True if the entry looks like a sentinel terminator (e.g. {NULL} / {0})."""
|
||||
fields = _meaningful_children(entry_node)
|
||||
if not fields:
|
||||
return True
|
||||
if len(fields) == 1:
|
||||
only = fields[0]
|
||||
if only.type == "null":
|
||||
return True
|
||||
if only.type == "number_literal" and _node_text(src, only).strip() == "0":
|
||||
return True
|
||||
# Some codebases write {NULL, NULL, NULL, NULL}. Treat all-NULL/0 as sentinel.
|
||||
for f in fields:
|
||||
if f.type == "null":
|
||||
continue
|
||||
if f.type == "number_literal" and _node_text(src, f).strip() == "0":
|
||||
continue
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _array_name_from_init_declarator(src: bytes, init_decl) -> Optional[str]:
|
||||
"""Extract the array name from an init_declarator containing array_declarator."""
|
||||
arr = None
|
||||
for c in init_decl.children:
|
||||
if c.type == "array_declarator":
|
||||
arr = c
|
||||
break
|
||||
if arr is None:
|
||||
return None
|
||||
# The first child is the declarator (identifier or qualified_identifier).
|
||||
for c in arr.children:
|
||||
if c.type in ("identifier", "qualified_identifier", "field_identifier"):
|
||||
return _node_text(src, c)
|
||||
return None
|
||||
|
||||
|
||||
def _outer_initializer_list(init_decl):
|
||||
"""Get the top-level initializer_list child of an init_declarator, if any."""
|
||||
for c in init_decl.children:
|
||||
if c.type == "initializer_list":
|
||||
return c
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-file scan
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _walk_declarations(node, out: list) -> None:
|
||||
"""Collect all `declaration` nodes under `node` (recursive)."""
|
||||
if node.type == "declaration":
|
||||
out.append(node)
|
||||
for c in node.children:
|
||||
_walk_declarations(c, out)
|
||||
|
||||
|
||||
def scan_file(path: Path) -> List[EntryRecord]:
|
||||
try:
|
||||
src = path.read_bytes()
|
||||
except OSError as e:
|
||||
sys.stderr.write(f"WARNING: cannot read {path}: {e}\n")
|
||||
return []
|
||||
|
||||
tree = _PARSER.parse(src)
|
||||
decls: list = []
|
||||
_walk_declarations(tree.root_node, decls)
|
||||
|
||||
records: List[EntryRecord] = []
|
||||
for decl in decls:
|
||||
# Find the type_identifier child to determine if this is one of ours.
|
||||
type_kind = None
|
||||
for c in decl.children:
|
||||
if c.type == "type_identifier":
|
||||
txt = _node_text(src, c).strip()
|
||||
if txt in EXPECTED_MIN_FIELDS:
|
||||
type_kind = txt
|
||||
break
|
||||
if type_kind is None:
|
||||
continue
|
||||
|
||||
# Each declaration may have multiple init_declarators (rare for arrays
|
||||
# but cheap to handle).
|
||||
for c in decl.children:
|
||||
if c.type != "init_declarator":
|
||||
continue
|
||||
outer_init = _outer_initializer_list(c)
|
||||
if outer_init is None:
|
||||
continue # forward decl or extern - no initializer
|
||||
array_name = _array_name_from_init_declarator(src, c) or "<anon>"
|
||||
|
||||
# Each direct child initializer_list is an entry.
|
||||
for entry in outer_init.children:
|
||||
if entry.type != "initializer_list":
|
||||
continue
|
||||
if _is_sentinel(src, entry):
|
||||
continue
|
||||
|
||||
fields = _meaningful_children(entry)
|
||||
line = entry.start_point[0] + 1 # tree-sitter is 0-based
|
||||
name = _entry_name(src, entry)
|
||||
|
||||
if len(fields) <= DOC_FIELD_INDEX:
|
||||
records.append(EntryRecord(
|
||||
file_path=path,
|
||||
line=line,
|
||||
array_kind=type_kind,
|
||||
array_name=array_name,
|
||||
entry_name=name,
|
||||
classification="MISSING",
|
||||
))
|
||||
continue
|
||||
|
||||
doc_node = fields[DOC_FIELD_INDEX]
|
||||
classification = _classify_doc_field(src, doc_node)
|
||||
records.append(EntryRecord(
|
||||
file_path=path,
|
||||
line=line,
|
||||
array_kind=type_kind,
|
||||
array_name=array_name,
|
||||
entry_name=name,
|
||||
classification=classification,
|
||||
))
|
||||
return records
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _iter_cpp_files(roots: Iterable[Path]) -> Iterable[Path]:
|
||||
for root in roots:
|
||||
if root.is_file():
|
||||
if root.suffix == ".cpp":
|
||||
yield root
|
||||
continue
|
||||
if not root.exists():
|
||||
sys.stderr.write(f"WARNING: path does not exist: {root}\n")
|
||||
continue
|
||||
for dirpath, _dirnames, filenames in os.walk(root):
|
||||
for fn in filenames:
|
||||
if fn.endswith(".cpp"):
|
||||
yield Path(dirpath) / fn
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reporting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _print_file_table(records: List[EntryRecord], project_root: Path) -> None:
|
||||
by_file: dict[Path, List[EntryRecord]] = defaultdict(list)
|
||||
for r in records:
|
||||
by_file[r.file_path].append(r)
|
||||
|
||||
for path in sorted(by_file):
|
||||
try:
|
||||
rel = path.relative_to(project_root)
|
||||
except ValueError:
|
||||
rel = path
|
||||
entries = sorted(by_file[path], key=lambda r: r.line)
|
||||
|
||||
# Compute column widths for this file.
|
||||
loc_w = max(len(f"{rel}:{e.line}") for e in entries)
|
||||
arr_w = max(len(e.array_name) for e in entries)
|
||||
ent_w = max(len(e.entry_name) for e in entries)
|
||||
loc_w = max(loc_w, len("file:line"))
|
||||
arr_w = max(arr_w, len("array"))
|
||||
ent_w = max(ent_w, len("entry"))
|
||||
|
||||
header = (
|
||||
f"{'file:line':<{loc_w}} "
|
||||
f"{'array':<{arr_w}} "
|
||||
f"{'entry':<{ent_w}} "
|
||||
f"classification"
|
||||
)
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
for e in entries:
|
||||
loc = f"{rel}:{e.line}"
|
||||
print(
|
||||
f"{loc:<{loc_w}} "
|
||||
f"{e.array_name:<{arr_w}} "
|
||||
f"{e.entry_name:<{ent_w}} "
|
||||
f"{e.classification}"
|
||||
)
|
||||
print()
|
||||
|
||||
|
||||
def _print_summary(records: List[EntryRecord]) -> None:
|
||||
total = len(records)
|
||||
counts = Counter(r.classification for r in records)
|
||||
macro = counts.get("MACRO", 0)
|
||||
raw = counts.get("RAW_STRING", 0)
|
||||
null = counts.get("NULL", 0)
|
||||
missing = counts.get("MISSING", 0)
|
||||
pct = (macro / total * 100.0) if total else 0.0
|
||||
|
||||
print("=" * 60)
|
||||
print("PyMethodDef / PyGetSetDef Documentation Audit Summary")
|
||||
print("=" * 60)
|
||||
print(f"Total entries scanned : {total}")
|
||||
print(f" MACRO compliant : {macro}")
|
||||
print(f" RAW_STRING : {raw}")
|
||||
print(f" NULL : {null}")
|
||||
print(f" MISSING : {missing}")
|
||||
print(f"MACRO compliance : {pct:.1f}%")
|
||||
|
||||
# Per-kind breakdown.
|
||||
by_kind = defaultdict(Counter)
|
||||
for r in records:
|
||||
by_kind[r.array_kind][r.classification] += 1
|
||||
if by_kind:
|
||||
print()
|
||||
print("Breakdown by kind:")
|
||||
for kind in sorted(by_kind):
|
||||
kc = by_kind[kind]
|
||||
kt = sum(kc.values())
|
||||
kp = (kc.get("MACRO", 0) / kt * 100.0) if kt else 0.0
|
||||
print(
|
||||
f" {kind:<13} total={kt:<4} "
|
||||
f"MACRO={kc.get('MACRO', 0):<4} "
|
||||
f"RAW={kc.get('RAW_STRING', 0):<4} "
|
||||
f"NULL={kc.get('NULL', 0):<4} "
|
||||
f"MISSING={kc.get('MISSING', 0):<4} "
|
||||
f"({kp:.1f}% compliant)"
|
||||
)
|
||||
|
||||
# Top offenders.
|
||||
offenders: dict[Path, int] = defaultdict(int)
|
||||
for r in records:
|
||||
if r.classification != "MACRO":
|
||||
offenders[r.file_path] += 1
|
||||
if offenders:
|
||||
print()
|
||||
print("Top non-compliant files:")
|
||||
ranked = sorted(offenders.items(), key=lambda kv: kv[1], reverse=True)
|
||||
for path, count in ranked[:10]:
|
||||
try:
|
||||
rel = path.relative_to(Path.cwd())
|
||||
except ValueError:
|
||||
rel = path
|
||||
print(f" {count:>4} {rel}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _default_roots() -> List[Path]:
|
||||
cwd = Path.cwd()
|
||||
src = cwd / "src"
|
||||
return [src] if src.exists() else [cwd]
|
||||
|
||||
|
||||
def main(argv: Optional[List[str]] = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Audit PyMethodDef / PyGetSetDef entries in McRogueFace C++ "
|
||||
"sources for MCRF_METHOD / MCRF_PROPERTY documentation macro use."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--strict", action="store_true",
|
||||
help="Exit nonzero if any non-MACRO entries are found (CI mode)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quiet", action="store_true",
|
||||
help="Print only the summary, omit per-file tables."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--paths", nargs="+", type=Path, default=None,
|
||||
help="Files or directories to scan (default: ./src)."
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
roots = args.paths if args.paths else _default_roots()
|
||||
project_root = Path.cwd()
|
||||
|
||||
files = sorted(set(_iter_cpp_files(roots)))
|
||||
if not files:
|
||||
sys.stderr.write("WARNING: no .cpp files found.\n")
|
||||
return 0
|
||||
|
||||
all_records: List[EntryRecord] = []
|
||||
for f in files:
|
||||
all_records.extend(scan_file(f))
|
||||
|
||||
if not args.quiet:
|
||||
if all_records:
|
||||
_print_file_table(all_records, project_root)
|
||||
else:
|
||||
print("(no PyMethodDef / PyGetSetDef arrays found)")
|
||||
print()
|
||||
|
||||
_print_summary(all_records)
|
||||
|
||||
if args.strict:
|
||||
non_macro = sum(
|
||||
1 for r in all_records if r.classification != "MACRO"
|
||||
)
|
||||
if non_macro:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -26,3 +26,24 @@ echo " HTML: docs/api_reference_dynamic.html"
|
|||
echo " Markdown: docs/API_REFERENCE_DYNAMIC.md"
|
||||
echo " Man page: docs/mcrfpy.3"
|
||||
echo " Stubs: stubs/mcrfpy.pyi"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Static-analysis audit: report MCRF_METHOD / MCRF_PROPERTY macro compliance
|
||||
# across every PyMethodDef / PyGetSetDef array in src/. This is informational
|
||||
# only (no --strict) so it cannot break the doc build, but the summary makes
|
||||
# pre-1.0 documentation drift visible alongside doc generation.
|
||||
#
|
||||
# Requires the .venv-audit virtual environment with tree-sitter +
|
||||
# tree-sitter-cpp installed. The audit is skipped silently if absent so
|
||||
# contributors without the venv aren't blocked.
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ -x "./.venv-audit/bin/python3" ] && [ -f "./tools/audit_pymethoddef.py" ]; then
|
||||
echo ""
|
||||
echo "=== PyMethodDef / PyGetSetDef Macro Compliance Audit ==="
|
||||
./.venv-audit/bin/python3 ./tools/audit_pymethoddef.py --quiet || true
|
||||
elif [ -f "./tools/audit_pymethoddef.py" ]; then
|
||||
echo ""
|
||||
echo "(skipping audit_pymethoddef.py: .venv-audit not found - run"
|
||||
echo " 'python3 -m venv .venv-audit && .venv-audit/bin/pip install"
|
||||
echo " tree-sitter tree-sitter-cpp' to enable)"
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue