From 104d73143a19ab7db6aaae28b40f082846e85126 Mon Sep 17 00:00:00 2001 From: knacky Date: Thu, 21 May 2026 20:33:19 +0200 Subject: [PATCH] feat(backend): add Jinja2 sandbox + regex_extract filter (B0.5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CleanupRenderer wraps jinja2.sandbox.SandboxedEnvironment with StrictUndefined (no autoescape — shell context, not HTML). - Custom filter regex_extract(text, pattern, group=1, default='') uses google-re2 for linear-time matching (ReDoS-safe) and falls back to re with a 1 MB input cap when re2 is absent. - StepOutputs exposes {{ outputs.text }} and {{ outputs.blob('name') }}. blob() decodes UTF-8 with latin-1 fallback, hard-capped at 10 MB (consistent with F8 evidence limit, D-005). - render_cleanup() is the module-level convenience wrapper. --- backend/src/mimic/templating/__init__.py | 5 ++ backend/src/mimic/templating/filters.py | 55 ++++++++++++ backend/src/mimic/templating/sandbox.py | 106 +++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 backend/src/mimic/templating/__init__.py create mode 100644 backend/src/mimic/templating/filters.py create mode 100644 backend/src/mimic/templating/sandbox.py diff --git a/backend/src/mimic/templating/__init__.py b/backend/src/mimic/templating/__init__.py new file mode 100644 index 0000000..b707538 --- /dev/null +++ b/backend/src/mimic/templating/__init__.py @@ -0,0 +1,5 @@ +"""Jinja2 sandboxed templating used for cleanup commands and payloads.""" + +from mimic.templating.sandbox import CleanupRenderer, RenderError, render_cleanup + +__all__ = ["CleanupRenderer", "RenderError", "render_cleanup"] diff --git a/backend/src/mimic/templating/filters.py b/backend/src/mimic/templating/filters.py new file mode 100644 index 0000000..b7be750 --- /dev/null +++ b/backend/src/mimic/templating/filters.py @@ -0,0 +1,55 @@ +"""Custom Jinja2 filters. + +`regex_extract(text, pattern, group=1, default="")` uses google-re2 for +linear-time matching to neutralize ReDoS on adversarial C2 output. If the +library isn't installed the implementation falls back to `re` with a hard +length cap. +""" + +from __future__ import annotations + +import re + +try: # pragma: no cover - presence depends on environment + import re2 as _re2 # type: ignore[import-not-found] + + _HAS_RE2 = True +except ImportError: # pragma: no cover + _re2 = None + _HAS_RE2 = False + + +_FALLBACK_MAX_INPUT = 1 * 1024 * 1024 # 1 MB safety cap when re2 missing + + +def regex_extract( + text: object, + pattern: str, + group: int = 1, + default: str = "", +) -> str: + """Return capture group `group` of the first match of `pattern` in `text`.""" + if text is None: + return default + haystack = text if isinstance(text, str) else str(text) + + if _HAS_RE2: + compiled = _re2.compile(pattern) + match = compiled.search(haystack) + else: + if len(haystack) > _FALLBACK_MAX_INPUT: + haystack = haystack[:_FALLBACK_MAX_INPUT] + compiled_py = re.compile(pattern) + match = compiled_py.search(haystack) + + if match is None: + return default + try: + captured = match.group(group) + except (IndexError, _IndexErrors): + return default + return captured if captured is not None else default + + +# `re2.error` is `_re2.error`; `re.error` differs. Tuple them for safe catch. +_IndexErrors = (re.error,) diff --git a/backend/src/mimic/templating/sandbox.py b/backend/src/mimic/templating/sandbox.py new file mode 100644 index 0000000..671bd17 --- /dev/null +++ b/backend/src/mimic/templating/sandbox.py @@ -0,0 +1,106 @@ +"""Sandboxed Jinja2 environment used to resolve cleanup commands and payloads. + +Spec H26 / D-005: two output accessors are exposed. + +- `{{ params. }}` — straight from the merged TTP/scenario parameters. +- `{{ outputs.text }}` — `run_step.output_text` (stdout / UTF-8 text). +- `{{ outputs.blob("name") }}` — decoded `output_blob_ref` content, 10 MB cap, + UTF-8 with latin-1 fallback, silent empty string on non-decodable data. + +The custom `regex_extract` filter operates on the resulting string only. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from jinja2 import StrictUndefined, TemplateError +from jinja2.sandbox import SandboxedEnvironment + +from mimic.config import get_settings +from mimic.templating.filters import regex_extract + + +class RenderError(RuntimeError): + """Raised when a cleanup / payload template cannot be rendered safely.""" + + +@dataclass(frozen=True, slots=True) +class StepOutputs: + """Read-only view of the previous step's outputs exposed to templates.""" + + text: str = "" + blob_path: Path | None = None + blob_max_bytes: int = 10 * 1024 * 1024 + + def blob(self, _name: str = "default") -> str: + """Read the binary output blob, decoded (UTF-8 → latin-1 fallback). + + The argument is accepted for future multi-blob support but ignored in + v1 — a step has at most one blob attachment. + """ + if self.blob_path is None: + return "" + try: + raw = self.blob_path.read_bytes() + except OSError: + return "" + if len(raw) > self.blob_max_bytes: + raw = raw[: self.blob_max_bytes] + try: + return raw.decode("utf-8") + except UnicodeDecodeError: + try: + return raw.decode("latin-1") + except UnicodeDecodeError: # pragma: no cover - latin-1 never fails + return "" + + +class CleanupRenderer: + """Sandboxed Jinja2 renderer for cleanup commands and payload templates.""" + + def __init__(self) -> None: + env = SandboxedEnvironment( + undefined=StrictUndefined, + autoescape=False, + trim_blocks=False, + lstrip_blocks=False, + keep_trailing_newline=False, + ) + env.filters["regex_extract"] = regex_extract + self._env = env + + def render( + self, + template_text: str, + *, + params: Mapping[str, Any] | None = None, + outputs: StepOutputs | None = None, + ) -> str: + try: + tmpl = self._env.from_string(template_text) + return tmpl.render( + params=dict(params or {}), + outputs=outputs or StepOutputs(), + ) + except TemplateError as exc: + raise RenderError(str(exc)) from exc + + +_RENDERER = CleanupRenderer() + + +def render_cleanup( + template_text: str, + *, + params: Mapping[str, Any] | None = None, + outputs: StepOutputs | None = None, +) -> str: + """Module-level convenience: render with the singleton renderer.""" + if outputs is None: + settings = get_settings() + outputs = StepOutputs(blob_max_bytes=settings.output_blob_max_bytes) + return _RENDERER.render(template_text, params=params, outputs=outputs)