diff --git a/.gitignore b/.gitignore index 39c46e1..edae67d 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,9 @@ __lib_windows/ build-windows/ build_windows/ _oldscripts/ + +# Audit tooling virtualenv (tools/audit_pymethoddef.py) +.venv-audit/ assets/ cellular_automata_fire/ deps/ diff --git a/tools/audit_pymethoddef.py b/tools/audit_pymethoddef.py new file mode 100755 index 0000000..f3f86ce --- /dev/null +++ b/tools/audit_pymethoddef.py @@ -0,0 +1,463 @@ +#!/usr/bin/env python3 +""" +audit_pymethoddef.py - Static-analysis tool for McRogueFace Python bindings. + +Walks src/**/*.cpp, parses each file with tree-sitter-cpp, and locates every +`PyMethodDef [] = {...}` and `PyGetSetDef [] = {...}` declaration. +For each entry inside those array initializers, classifies the docstring slot: + + MACRO - uses MCRF_METHOD(...) or MCRF_PROPERTY(...) + RAW_STRING - inline C string literal (or concatenated string literals) + NULL - explicit NULL literal + MISSING - entry too short to have a doc field (probably malformed) + +The `MACRO` classification is the project's compliance target. RAW_STRING and +NULL entries should be migrated to the macro system before the 1.0 API freeze. + +Sentinel terminator entries (e.g. `{NULL}`, `{0}`) are skipped. + +Usage: + python3 tools/audit_pymethoddef.py [--strict] [--quiet] + [--paths PATH [PATH ...]] + +Flags: + --strict Exit nonzero if any non-MACRO entries are found (CI mode). + --quiet Suppress per-file output, print only the summary. + --paths Restrict scan to the given files/directories. Defaults to src/. +""" +from __future__ import annotations + +import argparse +import os +import sys +from collections import Counter, defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, List, Optional, Tuple + +try: + import tree_sitter_cpp + from tree_sitter import Language, Parser +except ImportError as e: + sys.stderr.write( + "ERROR: tree-sitter / tree-sitter-cpp not installed.\n" + "Activate the audit venv first:\n" + " source .venv-audit/bin/activate\n" + f"(import error: {e})\n" + ) + sys.exit(2) + + +# --------------------------------------------------------------------------- +# Tree-sitter setup +# --------------------------------------------------------------------------- + +_LANG = Language(tree_sitter_cpp.language()) +_PARSER = Parser(_LANG) + +# Indices of the docstring field in the struct initializer. +# PyMethodDef: {ml_name, ml_meth, ml_flags, ml_doc} -> idx 3 +# PyGetSetDef: {name, get, set, doc, closure} -> idx 3 +DOC_FIELD_INDEX = 3 +EXPECTED_MIN_FIELDS = { + "PyMethodDef": 4, # need at least 4 to have ml_doc + "PyGetSetDef": 4, # need at least 4 to have doc (closure can be omitted) +} + +# Punctuation/structural child node types we ignore when walking entry fields. +_PUNCT_TYPES = {"{", "}", ",", "(", ")", "[", "]", ";"} + +# Macros that mark a docstring slot as compliant. +_COMPLIANT_MACROS = {"MCRF_METHOD", "MCRF_PROPERTY"} + + +# --------------------------------------------------------------------------- +# Data records +# --------------------------------------------------------------------------- + +@dataclass +class EntryRecord: + file_path: Path + line: int # 1-based + array_kind: str # "PyMethodDef" or "PyGetSetDef" + array_name: str # e.g. "PyAnimation::methods" + entry_name: str # ml_name / name string, or "" + classification: str # MACRO / RAW_STRING / NULL / MISSING + + +# --------------------------------------------------------------------------- +# Tree-sitter helpers +# --------------------------------------------------------------------------- + +def _node_text(src: bytes, node) -> str: + return src[node.start_byte:node.end_byte].decode("utf-8", errors="replace") + + +def _meaningful_children(node) -> List: + """Return children of an initializer_list, skipping punctuation tokens.""" + return [c for c in node.children if c.type not in _PUNCT_TYPES] + + +def _classify_doc_field(src: bytes, doc_node) -> str: + """Map a docstring AST node to a classification string.""" + t = doc_node.type + if t == "null": + return "NULL" + if t in ("string_literal", "concatenated_string", "raw_string_literal"): + return "RAW_STRING" + if t == "call_expression": + # The first child of call_expression is the callee identifier. + callee = doc_node.child_by_field_name("function") + if callee is None and doc_node.children: + callee = doc_node.children[0] + if callee is not None: + name = _node_text(src, callee).strip() + if name in _COMPLIANT_MACROS: + return "MACRO" + return "RAW_STRING" # call expression to something non-MACRO + if t == "identifier": + # Bare identifier in the doc slot - could be a #define alias. Treat as + # raw (non-compliant) so the user investigates it. + text = _node_text(src, doc_node).strip() + if text == "NULL": + return "NULL" + return "RAW_STRING" + # Anything else (parenthesized_expression, etc.) - inspect text fallback. + text = _node_text(src, doc_node).strip() + stripped = text.lstrip("(").lstrip() + for macro in _COMPLIANT_MACROS: + if stripped.startswith(macro + "("): + return "MACRO" + if stripped == "NULL": + return "NULL" + return "RAW_STRING" + + +def _entry_name(src: bytes, entry_node) -> str: + """Return the first string literal in an entry initializer (the ml_name).""" + for c in _meaningful_children(entry_node): + if c.type == "string_literal": + # string_literal contains string_content children + for sub in c.children: + if sub.type == "string_content": + return _node_text(src, sub) + return _node_text(src, c).strip('"') + if c.type == "concatenated_string": + for lit in c.children: + if lit.type == "string_literal": + for sub in lit.children: + if sub.type == "string_content": + return _node_text(src, sub) + return _node_text(src, c) + # Stop at the first non-string field - the name should come first. + break + return "" + + +def _is_sentinel(src: bytes, entry_node) -> bool: + """True if the entry looks like a sentinel terminator (e.g. {NULL} / {0}).""" + fields = _meaningful_children(entry_node) + if not fields: + return True + if len(fields) == 1: + only = fields[0] + if only.type == "null": + return True + if only.type == "number_literal" and _node_text(src, only).strip() == "0": + return True + # Some codebases write {NULL, NULL, NULL, NULL}. Treat all-NULL/0 as sentinel. + for f in fields: + if f.type == "null": + continue + if f.type == "number_literal" and _node_text(src, f).strip() == "0": + continue + return False + return True + + +def _array_name_from_init_declarator(src: bytes, init_decl) -> Optional[str]: + """Extract the array name from an init_declarator containing array_declarator.""" + arr = None + for c in init_decl.children: + if c.type == "array_declarator": + arr = c + break + if arr is None: + return None + # The first child is the declarator (identifier or qualified_identifier). + for c in arr.children: + if c.type in ("identifier", "qualified_identifier", "field_identifier"): + return _node_text(src, c) + return None + + +def _outer_initializer_list(init_decl): + """Get the top-level initializer_list child of an init_declarator, if any.""" + for c in init_decl.children: + if c.type == "initializer_list": + return c + return None + + +# --------------------------------------------------------------------------- +# Per-file scan +# --------------------------------------------------------------------------- + +def _walk_declarations(node, out: list) -> None: + """Collect all `declaration` nodes under `node` (recursive).""" + if node.type == "declaration": + out.append(node) + for c in node.children: + _walk_declarations(c, out) + + +def scan_file(path: Path) -> List[EntryRecord]: + try: + src = path.read_bytes() + except OSError as e: + sys.stderr.write(f"WARNING: cannot read {path}: {e}\n") + return [] + + tree = _PARSER.parse(src) + decls: list = [] + _walk_declarations(tree.root_node, decls) + + records: List[EntryRecord] = [] + for decl in decls: + # Find the type_identifier child to determine if this is one of ours. + type_kind = None + for c in decl.children: + if c.type == "type_identifier": + txt = _node_text(src, c).strip() + if txt in EXPECTED_MIN_FIELDS: + type_kind = txt + break + if type_kind is None: + continue + + # Each declaration may have multiple init_declarators (rare for arrays + # but cheap to handle). + for c in decl.children: + if c.type != "init_declarator": + continue + outer_init = _outer_initializer_list(c) + if outer_init is None: + continue # forward decl or extern - no initializer + array_name = _array_name_from_init_declarator(src, c) or "" + + # Each direct child initializer_list is an entry. + for entry in outer_init.children: + if entry.type != "initializer_list": + continue + if _is_sentinel(src, entry): + continue + + fields = _meaningful_children(entry) + line = entry.start_point[0] + 1 # tree-sitter is 0-based + name = _entry_name(src, entry) + + if len(fields) <= DOC_FIELD_INDEX: + records.append(EntryRecord( + file_path=path, + line=line, + array_kind=type_kind, + array_name=array_name, + entry_name=name, + classification="MISSING", + )) + continue + + doc_node = fields[DOC_FIELD_INDEX] + classification = _classify_doc_field(src, doc_node) + records.append(EntryRecord( + file_path=path, + line=line, + array_kind=type_kind, + array_name=array_name, + entry_name=name, + classification=classification, + )) + return records + + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + +def _iter_cpp_files(roots: Iterable[Path]) -> Iterable[Path]: + for root in roots: + if root.is_file(): + if root.suffix == ".cpp": + yield root + continue + if not root.exists(): + sys.stderr.write(f"WARNING: path does not exist: {root}\n") + continue + for dirpath, _dirnames, filenames in os.walk(root): + for fn in filenames: + if fn.endswith(".cpp"): + yield Path(dirpath) / fn + + +# --------------------------------------------------------------------------- +# Reporting +# --------------------------------------------------------------------------- + +def _print_file_table(records: List[EntryRecord], project_root: Path) -> None: + by_file: dict[Path, List[EntryRecord]] = defaultdict(list) + for r in records: + by_file[r.file_path].append(r) + + for path in sorted(by_file): + try: + rel = path.relative_to(project_root) + except ValueError: + rel = path + entries = sorted(by_file[path], key=lambda r: r.line) + + # Compute column widths for this file. + loc_w = max(len(f"{rel}:{e.line}") for e in entries) + arr_w = max(len(e.array_name) for e in entries) + ent_w = max(len(e.entry_name) for e in entries) + loc_w = max(loc_w, len("file:line")) + arr_w = max(arr_w, len("array")) + ent_w = max(ent_w, len("entry")) + + header = ( + f"{'file:line':<{loc_w}} " + f"{'array':<{arr_w}} " + f"{'entry':<{ent_w}} " + f"classification" + ) + print(header) + print("-" * len(header)) + for e in entries: + loc = f"{rel}:{e.line}" + print( + f"{loc:<{loc_w}} " + f"{e.array_name:<{arr_w}} " + f"{e.entry_name:<{ent_w}} " + f"{e.classification}" + ) + print() + + +def _print_summary(records: List[EntryRecord]) -> None: + total = len(records) + counts = Counter(r.classification for r in records) + macro = counts.get("MACRO", 0) + raw = counts.get("RAW_STRING", 0) + null = counts.get("NULL", 0) + missing = counts.get("MISSING", 0) + pct = (macro / total * 100.0) if total else 0.0 + + print("=" * 60) + print("PyMethodDef / PyGetSetDef Documentation Audit Summary") + print("=" * 60) + print(f"Total entries scanned : {total}") + print(f" MACRO compliant : {macro}") + print(f" RAW_STRING : {raw}") + print(f" NULL : {null}") + print(f" MISSING : {missing}") + print(f"MACRO compliance : {pct:.1f}%") + + # Per-kind breakdown. + by_kind = defaultdict(Counter) + for r in records: + by_kind[r.array_kind][r.classification] += 1 + if by_kind: + print() + print("Breakdown by kind:") + for kind in sorted(by_kind): + kc = by_kind[kind] + kt = sum(kc.values()) + kp = (kc.get("MACRO", 0) / kt * 100.0) if kt else 0.0 + print( + f" {kind:<13} total={kt:<4} " + f"MACRO={kc.get('MACRO', 0):<4} " + f"RAW={kc.get('RAW_STRING', 0):<4} " + f"NULL={kc.get('NULL', 0):<4} " + f"MISSING={kc.get('MISSING', 0):<4} " + f"({kp:.1f}% compliant)" + ) + + # Top offenders. + offenders: dict[Path, int] = defaultdict(int) + for r in records: + if r.classification != "MACRO": + offenders[r.file_path] += 1 + if offenders: + print() + print("Top non-compliant files:") + ranked = sorted(offenders.items(), key=lambda kv: kv[1], reverse=True) + for path, count in ranked[:10]: + try: + rel = path.relative_to(Path.cwd()) + except ValueError: + rel = path + print(f" {count:>4} {rel}") + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def _default_roots() -> List[Path]: + cwd = Path.cwd() + src = cwd / "src" + return [src] if src.exists() else [cwd] + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser( + description=( + "Audit PyMethodDef / PyGetSetDef entries in McRogueFace C++ " + "sources for MCRF_METHOD / MCRF_PROPERTY documentation macro use." + ), + ) + parser.add_argument( + "--strict", action="store_true", + help="Exit nonzero if any non-MACRO entries are found (CI mode)." + ) + parser.add_argument( + "--quiet", action="store_true", + help="Print only the summary, omit per-file tables." + ) + parser.add_argument( + "--paths", nargs="+", type=Path, default=None, + help="Files or directories to scan (default: ./src)." + ) + args = parser.parse_args(argv) + + roots = args.paths if args.paths else _default_roots() + project_root = Path.cwd() + + files = sorted(set(_iter_cpp_files(roots))) + if not files: + sys.stderr.write("WARNING: no .cpp files found.\n") + return 0 + + all_records: List[EntryRecord] = [] + for f in files: + all_records.extend(scan_file(f)) + + if not args.quiet: + if all_records: + _print_file_table(all_records, project_root) + else: + print("(no PyMethodDef / PyGetSetDef arrays found)") + print() + + _print_summary(all_records) + + if args.strict: + non_macro = sum( + 1 for r in all_records if r.classification != "MACRO" + ) + if non_macro: + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/generate_all_docs.sh b/tools/generate_all_docs.sh index 7c0234d..024c3d3 100755 --- a/tools/generate_all_docs.sh +++ b/tools/generate_all_docs.sh @@ -26,3 +26,24 @@ echo " HTML: docs/api_reference_dynamic.html" echo " Markdown: docs/API_REFERENCE_DYNAMIC.md" echo " Man page: docs/mcrfpy.3" echo " Stubs: stubs/mcrfpy.pyi" + +# --------------------------------------------------------------------------- +# Static-analysis audit: report MCRF_METHOD / MCRF_PROPERTY macro compliance +# across every PyMethodDef / PyGetSetDef array in src/. This is informational +# only (no --strict) so it cannot break the doc build, but the summary makes +# pre-1.0 documentation drift visible alongside doc generation. +# +# Requires the .venv-audit virtual environment with tree-sitter + +# tree-sitter-cpp installed. The audit is skipped silently if absent so +# contributors without the venv aren't blocked. +# --------------------------------------------------------------------------- +if [ -x "./.venv-audit/bin/python3" ] && [ -f "./tools/audit_pymethoddef.py" ]; then + echo "" + echo "=== PyMethodDef / PyGetSetDef Macro Compliance Audit ===" + ./.venv-audit/bin/python3 ./tools/audit_pymethoddef.py --quiet || true +elif [ -f "./tools/audit_pymethoddef.py" ]; then + echo "" + echo "(skipping audit_pymethoddef.py: .venv-audit not found - run" + echo " 'python3 -m venv .venv-audit && .venv-audit/bin/pip install" + echo " tree-sitter tree-sitter-cpp' to enable)" +fi