Files
mimic-big/backend/tests/unit/test_templating.py
knacky 162b6988f8 fix(backend): align regex_extract + outputs.blob() with D-011/D-012
D-011 — `regex_extract(text, pattern, *, group=1, name=None)`:
- engine google-re2 (linear-time, ReDoS-safe), `re` fallback with 1 MB cap.
- first match only.
- no match → raises Jinja2 `TemplateError` (no silent default — cleanup
  templates must fail loud when source string drifts).
- default capture is group 1 with fallback to group(0) when the pattern has
  no groups; named groups via `name="<name>"`.

D-012 — `outputs.blob()`:
- reads the gzip-compressed CAS file from `MIMIC_BLOB_ROOT`.
- 10 MB cap is applied **after** decompression.
- decode UTF-8 with latin-1 fallback; never raises (missing / corrupt /
  non-gzip blobs return empty string, logged at WARNING).

Unit tests rewritten to cover both the new fail-loud regex contract and
the gzip read path. 49 unit tests pass; ruff clean.
2026-05-21 20:44:48 +02:00

118 lines
3.8 KiB
Python

"""Jinja2 sandbox + regex_extract tests."""
from __future__ import annotations
import gzip
import pytest
from jinja2 import TemplateError
from mimic.templating.filters import regex_extract
from mimic.templating.sandbox import (
CleanupRenderer,
RenderError,
StepOutputs,
render_cleanup,
)
class TestRegexExtract:
def test_returns_capture_group(self) -> None:
assert regex_extract("hello world", r"hello (\w+)") == "world"
def test_no_match_raises(self) -> None:
with pytest.raises(TemplateError, match="no match"):
regex_extract("hello", r"foo(\d+)")
def test_none_input_raises(self) -> None:
with pytest.raises(TemplateError, match="None"):
regex_extract(None, r"x")
def test_no_groups_falls_back_to_full_match(self) -> None:
assert regex_extract("abc123", r"\w+\d+") == "abc123"
def test_named_group(self) -> None:
assert regex_extract("pid=4242", r"pid=(?P<n>\d+)", name="n") == "4242"
def test_missing_named_group_raises(self) -> None:
with pytest.raises(TemplateError):
regex_extract("pid=4242", r"pid=(\d+)", name="absent")
class TestCleanupRenderer:
def setup_method(self) -> None:
self.renderer = CleanupRenderer()
def test_render_params(self) -> None:
out = self.renderer.render(
"echo {{ params.target }}",
params={"target": "WIN-01"},
)
assert out == "echo WIN-01"
def test_render_outputs_text(self) -> None:
out = self.renderer.render(
'echo "{{ outputs.text }}"',
outputs=StepOutputs(text="captured"),
)
assert out == 'echo "captured"'
def test_regex_extract_filter(self) -> None:
out = self.renderer.render(
r"{{ outputs.text | regex_extract('pid=(\\d+)') }}",
outputs=StepOutputs(text="status: pid=4242 user=svc"),
)
assert out == "4242"
def test_regex_extract_no_match_propagates_as_render_error(self) -> None:
with pytest.raises(RenderError, match="no match"):
self.renderer.render(
r"{{ outputs.text | regex_extract('pid=(\\d+)') }}",
outputs=StepOutputs(text="nothing"),
)
def test_strict_undefined_raises(self) -> None:
with pytest.raises(RenderError):
self.renderer.render("{{ params.does_not_exist }}", params={})
def test_sandbox_forbids_attribute_access(self) -> None:
with pytest.raises(RenderError):
self.renderer.render(
"{{ ().__class__.__bases__[0].__subclasses__() }}",
params={},
)
def test_module_singleton_round_trip(self) -> None:
out = render_cleanup("hello {{ params.x }}", params={"x": "there"})
assert out == "hello there"
class TestStepOutputsBlob:
def test_blob_returns_empty_when_no_path(self) -> None:
out = StepOutputs(text="x")
assert out.blob() == ""
def test_blob_reads_gzipped_file(self, tmp_path) -> None:
blob = tmp_path / "blob.gz"
with gzip.open(blob, "wb") as fh:
fh.write(b"hello")
out = StepOutputs(blob_path=blob)
assert out.blob() == "hello"
def test_blob_caps_size_after_decompression(self, tmp_path) -> None:
blob = tmp_path / "blob.gz"
with gzip.open(blob, "wb") as fh:
fh.write(b"A" * 1024)
out = StepOutputs(blob_path=blob, blob_max_bytes=10)
assert out.blob() == "A" * 10
def test_blob_missing_file_returns_empty(self, tmp_path) -> None:
out = StepOutputs(blob_path=tmp_path / "absent.gz")
assert out.blob() == ""
def test_blob_non_gzip_returns_empty(self, tmp_path) -> None:
blob = tmp_path / "blob.gz"
blob.write_bytes(b"not actually gzip")
out = StepOutputs(blob_path=blob)
assert out.blob() == ""