Code-review BLOCKER B1. Reaffirms D-011: a `re` stdlib fallback defeats the OPSEC-safe-regex guarantee because hostile C2 output can trigger catastrophic backtracking. The `[:1MB]` slice cap does not mitigate that — re-evaluating a malicious pattern over 1 MB of attacker-controlled text is still a worker freeze. - `mimic.templating.filters` now imports `re2` unconditionally and raises `RuntimeError` at module load if the binding is absent. No `re` import, no `_HAS_RE2` branch, no `_FALLBACK_MAX_INPUT`. - `pyproject.toml` already pinned `google-re2 >= 1.1, < 2.0`; this commit hardens the import path to actually enforce it. - New test `test_re2_is_required` asserts the binding is wired in.
127 lines
4.2 KiB
Python
127 lines
4.2 KiB
Python
"""Jinja2 sandbox + regex_extract tests."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import gzip
|
|
|
|
import pytest
|
|
from jinja2 import TemplateError
|
|
|
|
from mimic.templating.filters import regex_extract
|
|
from mimic.templating.sandbox import (
|
|
CleanupRenderer,
|
|
RenderError,
|
|
StepOutputs,
|
|
render_cleanup,
|
|
)
|
|
|
|
|
|
class TestRegexExtract:
|
|
def test_re2_is_required(self) -> None:
|
|
"""D-011 / B1: google-re2 is the only allowed engine. Asserts the
|
|
binding is wired into the module (the import-time RuntimeError check
|
|
already covers absence)."""
|
|
from mimic.templating import filters as filters_module # noqa: PLC0415
|
|
|
|
assert filters_module._re2 is not None
|
|
assert filters_module._re2.__name__ == "re2"
|
|
|
|
def test_returns_capture_group(self) -> None:
|
|
assert regex_extract("hello world", r"hello (\w+)") == "world"
|
|
|
|
def test_no_match_raises(self) -> None:
|
|
with pytest.raises(TemplateError, match="no match"):
|
|
regex_extract("hello", r"foo(\d+)")
|
|
|
|
def test_none_input_raises(self) -> None:
|
|
with pytest.raises(TemplateError, match="None"):
|
|
regex_extract(None, r"x")
|
|
|
|
def test_no_groups_falls_back_to_full_match(self) -> None:
|
|
assert regex_extract("abc123", r"\w+\d+") == "abc123"
|
|
|
|
def test_named_group(self) -> None:
|
|
assert regex_extract("pid=4242", r"pid=(?P<n>\d+)", name="n") == "4242"
|
|
|
|
def test_missing_named_group_raises(self) -> None:
|
|
with pytest.raises(TemplateError):
|
|
regex_extract("pid=4242", r"pid=(\d+)", name="absent")
|
|
|
|
|
|
class TestCleanupRenderer:
|
|
def setup_method(self) -> None:
|
|
self.renderer = CleanupRenderer()
|
|
|
|
def test_render_params(self) -> None:
|
|
out = self.renderer.render(
|
|
"echo {{ params.target }}",
|
|
params={"target": "WIN-01"},
|
|
)
|
|
assert out == "echo WIN-01"
|
|
|
|
def test_render_outputs_text(self) -> None:
|
|
out = self.renderer.render(
|
|
'echo "{{ outputs.text }}"',
|
|
outputs=StepOutputs(text="captured"),
|
|
)
|
|
assert out == 'echo "captured"'
|
|
|
|
def test_regex_extract_filter(self) -> None:
|
|
out = self.renderer.render(
|
|
r"{{ outputs.text | regex_extract('pid=(\\d+)') }}",
|
|
outputs=StepOutputs(text="status: pid=4242 user=svc"),
|
|
)
|
|
assert out == "4242"
|
|
|
|
def test_regex_extract_no_match_propagates_as_render_error(self) -> None:
|
|
with pytest.raises(RenderError, match="no match"):
|
|
self.renderer.render(
|
|
r"{{ outputs.text | regex_extract('pid=(\\d+)') }}",
|
|
outputs=StepOutputs(text="nothing"),
|
|
)
|
|
|
|
def test_strict_undefined_raises(self) -> None:
|
|
with pytest.raises(RenderError):
|
|
self.renderer.render("{{ params.does_not_exist }}", params={})
|
|
|
|
def test_sandbox_forbids_attribute_access(self) -> None:
|
|
with pytest.raises(RenderError):
|
|
self.renderer.render(
|
|
"{{ ().__class__.__bases__[0].__subclasses__() }}",
|
|
params={},
|
|
)
|
|
|
|
def test_module_singleton_round_trip(self) -> None:
|
|
out = render_cleanup("hello {{ params.x }}", params={"x": "there"})
|
|
assert out == "hello there"
|
|
|
|
|
|
class TestStepOutputsBlob:
|
|
def test_blob_returns_empty_when_no_path(self) -> None:
|
|
out = StepOutputs(text="x")
|
|
assert out.blob() == ""
|
|
|
|
def test_blob_reads_gzipped_file(self, tmp_path) -> None:
|
|
blob = tmp_path / "blob.gz"
|
|
with gzip.open(blob, "wb") as fh:
|
|
fh.write(b"hello")
|
|
out = StepOutputs(blob_path=blob)
|
|
assert out.blob() == "hello"
|
|
|
|
def test_blob_caps_size_after_decompression(self, tmp_path) -> None:
|
|
blob = tmp_path / "blob.gz"
|
|
with gzip.open(blob, "wb") as fh:
|
|
fh.write(b"A" * 1024)
|
|
out = StepOutputs(blob_path=blob, blob_max_bytes=10)
|
|
assert out.blob() == "A" * 10
|
|
|
|
def test_blob_missing_file_returns_empty(self, tmp_path) -> None:
|
|
out = StepOutputs(blob_path=tmp_path / "absent.gz")
|
|
assert out.blob() == ""
|
|
|
|
def test_blob_non_gzip_returns_empty(self, tmp_path) -> None:
|
|
blob = tmp_path / "blob.gz"
|
|
blob.write_bytes(b"not actually gzip")
|
|
out = StepOutputs(blob_path=blob)
|
|
assert out.blob() == ""
|