feat(backend): sprint 3 — multi-technique simulations + MITRE matrix

- Simulation model: replace mitre_technique_id/name scalars with techniques JSON column [{id, name}] - Alembic migration 0003: add techniques, backfill from scalars, drop old columns (reversible) - MITRE service: add get_tactics(), lookup_name(), get_matrix() with canonical tactic order and sub-technique nesting - serializer: enrich techniques with tactics from service at serialize time (graceful empty tactics if bundle outdated) - simulation_workflow: PATCH now accepts technique_ids list, validates against bundle, deduplicates preserving order, auto-transitions on non-empty list - simulations API: add GET /api/mitre/matrix endpoint (503 if bundle absent) - test_mitre.py: updated _reset_mitre fixture, added T1059.006 sub-technique, 14 new tests for get_tactics/lookup_name/get_matrix/matrix endpoint - test_simulations_techniques.py: 20 new tests covering AC-13.1 to AC-13.5 (create, PATCH, dedup, auto-transition, SOC blocked, migration backfill logic) Total: 161 tests passing. ruff clean. mypy: no new errors. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-27 03:56:02 +02:00
parent e1d9738f23
commit b5ea2929de
8 changed files with 737 additions and 30 deletions
--- a/backend/app/services/mitre.py
+++ b/backend/app/services/mitre.py
@@ -8,11 +8,30 @@ from typing import Any

 logger = logging.getLogger(__name__)

-# Absolute path to the committed bundle.
 _BUNDLE_PATH = Path(__file__).parent.parent.parent / "data" / "mitre" / "enterprise-attack.json"

+# Canonical Enterprise tactic order (12 tactics).
+_TACTIC_ORDER = [
+    "initial-access",
+    "execution",
+    "persistence",
+    "privilege-escalation",
+    "defense-evasion",
+    "credential-access",
+    "discovery",
+    "lateral-movement",
+    "collection",
+    "command-and-control",
+    "exfiltration",
+    "impact",
+]
+
 mitre_loaded: bool = False
 _index: list[dict[str, Any]] = []
+_tactics_by_technique: dict[str, list[str]] = {}
+_name_by_id: dict[str, str] = {}
+# matrix: list of tactic dicts (built once at load time)
+_matrix: list[dict[str, Any]] = []


 def _extract_tactics(obj: dict[str, Any]) -> list[str]:
@@ -20,7 +39,7 @@ def _extract_tactics(obj: dict[str, Any]) -> list[str]:
    return [
        p["phase_name"]
        for p in phases
-        if isinstance(p, dict) and "phase_name" in p
+        if isinstance(p, dict) and "phase_name" in p and p.get("kill_chain_name") == "mitre-attack"
    ]


@@ -31,9 +50,65 @@ def _get_external_id(obj: dict[str, Any]) -> str | None:
    return None


+def _is_subtechnique(tech_id: str) -> bool:
+    return "." in tech_id
+
+
+def _parent_id(sub_id: str) -> str:
+    return sub_id.split(".")[0]
+
+
+def _build_matrix(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Build the tactic → techniques → subtechniques tree."""
+    # Group top-level techniques by tactic.
+    tactic_techs: dict[str, list[dict[str, Any]]] = {t: [] for t in _TACTIC_ORDER}
+
+    for entry in entries:
+        if _is_subtechnique(entry["id"]):
+            continue
+        for tactic in entry["tactics"]:
+            if tactic in tactic_techs:
+                tactic_techs[tactic].append(entry)
+
+    # Attach sub-techniques to their parents.
+    parent_subs: dict[str, list[dict[str, Any]]] = {}
+    for entry in entries:
+        if not _is_subtechnique(entry["id"]):
+            continue
+        pid = _parent_id(entry["id"])
+        parent_subs.setdefault(pid, []).append({"id": entry["id"], "name": entry["name"]})
+
+    # Sort subs alphabetically by name.
+    for subs in parent_subs.values():
+        subs.sort(key=lambda x: x["name"])
+
+    matrix: list[dict[str, Any]] = []
+    for tactic_id in _TACTIC_ORDER:
+        techs = tactic_techs.get(tactic_id, [])
+        # Sort techniques alphabetically.
+        techs_sorted = sorted(techs, key=lambda x: x["name"])
+        tactic_name = tactic_id.replace("-", " ").title()
+        matrix.append(
+            {
+                "tactic_id": tactic_id,
+                "tactic_name": tactic_name,
+                "techniques": [
+                    {
+                        "id": t["id"],
+                        "name": t["name"],
+                        "subtechniques": parent_subs.get(t["id"], []),
+                    }
+                    for t in techs_sorted
+                ],
+            }
+        )
+
+    return matrix
+
+
 def load_bundle(path: Path | None = None) -> None:
    """Load the MITRE bundle into memory. Called once at app boot."""
-    global mitre_loaded, _index
+    global mitre_loaded, _index, _tactics_by_technique, _name_by_id, _matrix
    bundle_path = path or _BUNDLE_PATH

    try:
@@ -49,6 +124,9 @@ def load_bundle(path: Path | None = None) -> None:
        return

    entries: list[dict[str, Any]] = []
+    tactics_map: dict[str, list[str]] = {}
+    name_map: dict[str, str] = {}
+
    for obj in data.get("objects") or []:
        if not isinstance(obj, dict):
            continue
@@ -59,19 +137,35 @@ def load_bundle(path: Path | None = None) -> None:
        ext_id = _get_external_id(obj)
        if not ext_id:
            continue
-        entries.append(
-            {
-                "id": ext_id,
-                "name": obj.get("name", ""),
-                "tactics": _extract_tactics(obj),
-            }
-        )
+        tactics = _extract_tactics(obj)
+        name = obj.get("name", "")
+        entries.append({"id": ext_id, "name": name, "tactics": tactics})
+        tactics_map[ext_id] = tactics
+        name_map[ext_id] = name

    _index = entries
+    _tactics_by_technique = tactics_map
+    _name_by_id = name_map
+    _matrix = _build_matrix(entries)
    mitre_loaded = True
    logger.info("MITRE bundle loaded: %d techniques", len(_index))


+def get_tactics(technique_id: str) -> list[str]:
+    """Return tactic list for a technique id; empty list if unknown."""
+    return _tactics_by_technique.get(technique_id, [])
+
+
+def lookup_name(technique_id: str) -> str | None:
+    """Return the name for a technique id, or None if not in the bundle."""
+    return _name_by_id.get(technique_id)
+
+
+def get_matrix() -> list[dict[str, Any]]:
+    """Return the full tactic → techniques → subtechniques tree."""
+    return _matrix
+
+
 def search(query: str, limit: int = 20) -> list[dict[str, Any]]:
    """Return up to `limit` techniques matching `query`.

--- a/backend/app/services/simulation_workflow.py
+++ b/backend/app/services/simulation_workflow.py
@@ -10,11 +10,10 @@ from backend.app.extensions import db
 from backend.app.models import User
 from backend.app.models.simulation import Simulation, SimulationStatus

+# Fields only admin/redteam may write (excluding technique_ids which is handled separately).
 REDTEAM_FIELDS = frozenset(
    {
        "name",
-        "mitre_technique_id",
-        "mitre_technique_name",
        "description",
        "commands",
        "prerequisites",
@@ -25,8 +24,6 @@ REDTEAM_FIELDS = frozenset(

 SOC_FIELDS = frozenset({"log_source", "logs", "soc_comment", "incident_number"})

-# Transitions allowed via POST /transition endpoint (manual only).
-# auto pending→in_progress is handled in apply_patch, not here.
 _ALLOWED_TRANSITIONS: dict[str, dict[str, set[str]]] = {
    "review_required": {
        "from": {"pending", "in_progress"},
@@ -48,6 +45,27 @@ def _is_non_empty(value: Any) -> bool:
    return not (isinstance(value, list) and len(value) == 0)


+def _resolve_technique_ids(
+    technique_ids: list[str],
+) -> tuple[list[dict[str, str]] | None, tuple[Any, int] | None]:
+    """Validate and resolve technique IDs to [{id, name}] snapshots.
+
+    Returns (resolved_list, None) on success or (None, error_tuple) on failure.
+    Deduplicates while preserving order.
+    """
+    from backend.app.services import mitre as mitre_svc
+
+    # Dedup, preserve order.
+    seen: dict[str, None] = dict.fromkeys(technique_ids)
+    resolved: list[dict[str, str]] = []
+    for tid in seen:
+        name = mitre_svc.lookup_name(tid)
+        if name is None:
+            return None, (jsonify({"error": f"unknown technique id: {tid}"}), 400)
+        resolved.append({"id": tid, "name": name})
+    return resolved, None
+
+
 def apply_patch(
    simulation: Simulation, payload: dict[str, Any], user: User
 ) -> tuple[Any, int] | None:
@@ -59,15 +77,14 @@ def apply_patch(
    role = user.role.value

    if role == "soc":
-        # SOC can only patch when status allows it.
        if simulation.status not in (
            SimulationStatus.REVIEW_REQUIRED,
            SimulationStatus.DONE,
        ):
            return jsonify({"error": "simulation not ready for SOC review"}), 403

-        # SOC must not send redteam fields.
-        redteam_keys_in_payload = REDTEAM_FIELDS & payload.keys()
+        # SOC must not send redteam fields or technique_ids.
+        redteam_keys_in_payload = (REDTEAM_FIELDS | {"technique_ids"}) & payload.keys()
        if redteam_keys_in_payload:
            return jsonify({"error": "soc cannot edit redteam fields"}), 403

@@ -76,10 +93,10 @@ def apply_patch(
                setattr(simulation, field, payload[field])

    else:
-        # admin / redteam: apply all fields present.
+        # admin / redteam path.
        redteam_keys_present = REDTEAM_FIELDS & payload.keys()

-        # Validate executed_at before any writes so a bad value causes no partial mutation.
+        # Validate executed_at upfront before any writes.
        executed_at_value: datetime | None = None
        if "executed_at" in redteam_keys_present:
            val = payload["executed_at"]
@@ -91,21 +108,39 @@ def apply_patch(
                except ValueError:
                    return jsonify({"error": "invalid executed_at"}), 400

+        # Validate and resolve technique_ids upfront.
+        resolved_techniques: list[dict[str, str]] | None = None
+        if "technique_ids" in payload:
+            raw_ids = payload["technique_ids"]
+            if not isinstance(raw_ids, list):
+                return jsonify({"error": "technique_ids must be a list"}), 400
+            resolved_techniques, err = _resolve_technique_ids(raw_ids)
+            if err is not None:
+                return err
+
+        # Apply scalar redteam fields.
        for field in redteam_keys_present:
            if field == "executed_at":
                simulation.executed_at = executed_at_value
            else:
                setattr(simulation, field, payload[field])

+        # Apply resolved techniques.
+        if resolved_techniques is not None:
+            simulation.techniques = resolved_techniques
+
+        # Apply SOC fields (admin/redteam may also write them).
        for field in SOC_FIELDS:
            if field in payload:
                setattr(simulation, field, payload[field])

-        # Auto-transition pending → in_progress: at least one redteam field with
-        # a non-empty value in the *incoming payload*.
-        if simulation.status == SimulationStatus.PENDING and any(
-            _is_non_empty(payload[k]) for k in redteam_keys_present
-        ):
+        # Auto-transition pending → in_progress.
+        # Triggers when any redteam scalar has a non-empty value, OR technique_ids is non-empty.
+        auto_trigger = any(_is_non_empty(payload[k]) for k in redteam_keys_present)
+        if not auto_trigger and "technique_ids" in payload:
+            auto_trigger = len(payload["technique_ids"]) > 0
+
+        if simulation.status == SimulationStatus.PENDING and auto_trigger:
            simulation.status = SimulationStatus.IN_PROGRESS

    simulation.updated_at = datetime.now(UTC)