Files
mimic/backend/app/services/export.py
Knacky e41679b331 fix(export): render PDF in A4 landscape for 7-column readability
Add @page { size: A4 landscape } to _CSS, reduce font-size to 11px,
and set table-layout: fixed + word-break: break-word so 7 columns
fit without overflow. Unit test asserts the landscape rule is present
in the rendered HTML.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-09 18:13:46 +02:00

278 lines
9.4 KiB
Python

"""Engagement export renderers — Markdown, CSV, PDF."""
from __future__ import annotations
import csv
import io
import re
import unicodedata
from datetime import date
from html import escape as _html_escape
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from backend.app.models.engagement import Engagement
from backend.app.models.simulation import Simulation
def _export_filename(engagement: Engagement, ext: str) -> str:
name = engagement.name or ""
normalized = unicodedata.normalize("NFKD", name).encode("ascii", "ignore").decode()
slug = re.sub(r"[^a-z0-9]+", "-", normalized.lower()).strip("-")[:60] or "unnamed"
today = date.today().strftime("%Y%m%d")
return f"engagement-{engagement.id}-{slug}-{today}.{ext}"
def _creator(obj: object) -> str:
"""Return username string from an ORM object with a created_by relationship."""
cb = getattr(obj, "created_by", None)
if cb is None:
return ""
return getattr(cb, "username", "") or ""
# ---------------------------------------------------------------------------
# CSV formula-injection defense (defined early — used by _format_execution_csv)
# ---------------------------------------------------------------------------
# \t and \r included: Excel auto-trims leading whitespace, so a tab/CR prefix still
# reaches the formula parser in some sheet versions.
_CSV_FORMULA_TRIGGERS = ("=", "+", "-", "@", "\t", "\r")
def _csv_safe(value: object) -> object:
"""Defuse spreadsheet formula injection by prefixing user-controlled cells.
Excel / LibreOffice / Google Sheets interpret cells starting with =, +, -, @,
\\t or \\r as formulas. Since this CSV is the engagement handoff to SOC and is
explicitly opened in a spreadsheet app, an authenticated red-team user could
craft a simulation field that executes on the SOC analyst's machine. Prefixing
with a single apostrophe forces the spreadsheet to treat the cell as text.
"""
if isinstance(value, str) and value and value[0] in _CSV_FORMULA_TRIGGERS:
return "'" + value
return value
# ---------------------------------------------------------------------------
# Execution cell helpers
# ---------------------------------------------------------------------------
def _format_execution_text(sim: Simulation) -> str:
"""Canonical 3-part execution concat for Markdown and PDF (no CSV sanitization)."""
parts = [
sim.executed_at.isoformat() if sim.executed_at else "",
sim.commands or "",
sim.execution_result or "",
]
return "\n".join(parts)
def _format_execution_csv(sim: Simulation) -> str:
"""Execution concat for CSV: each user-controlled component is formula-defused
before joining so that inner lines starting with =, +, -, @ are safe."""
parts = [
sim.executed_at.isoformat() if sim.executed_at else "",
str(_csv_safe(sim.commands or "")),
str(_csv_safe(sim.execution_result or "")),
]
return "\n".join(parts)
# ---------------------------------------------------------------------------
# Markdown
# ---------------------------------------------------------------------------
_MD_HEADERS = [
"Scénario",
"Test",
"Source de log",
"Commentaires SOC",
"Exécution",
"Logs remontés au SIEM",
"Cyber incident",
]
def render_engagement_markdown(
engagement: Engagement, simulations: list[Simulation]
) -> str:
lines: list[str] = []
lines.append(f"# {engagement.name}")
lines.append("")
if engagement.description:
lines.append(engagement.description)
lines.append("")
lines.append(f"**Status**: {engagement.status.value}")
lines.append(
f"**Start date**: {engagement.start_date.isoformat() if engagement.start_date else 'N/A'}"
)
lines.append(
f"**End date**: {engagement.end_date.isoformat() if engagement.end_date else 'N/A'}"
)
lines.append(f"**Created by**: {_creator(engagement)}")
lines.append(
f"**Created at**: {engagement.created_at.isoformat() if engagement.created_at else 'N/A'}"
)
lines.append("")
if not simulations:
return "\n".join(lines)
lines.append("---")
lines.append("")
lines.append("## Simulations")
lines.append("")
header_row = "| " + " | ".join(_MD_HEADERS) + " |"
separator = "| " + " | ".join("---" for _ in _MD_HEADERS) + " |"
lines.append(header_row)
lines.append(separator)
for sim in simulations:
def _cell(value: str | None) -> str:
# Escape HTML (including quotes) first to prevent stored XSS in MD renderers
# that interpret inline HTML, then escape pipe (GFM table syntax),
# then fold newlines to <br/> (our own safe markup, inserted after escape).
s = _html_escape(value or "")
s = s.replace("|", "\\|")
s = s.replace("\n", "<br/>")
return s
execution = _format_execution_text(sim)
row = "| " + " | ".join([
_cell(sim.name),
_cell(sim.description),
_cell(sim.log_source),
_cell(sim.soc_comment),
_cell(execution),
_cell(sim.logs),
_cell(sim.incident_number),
]) + " |"
lines.append(row)
lines.append("")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# CSV
# ---------------------------------------------------------------------------
_CSV_HEADERS = [
"Scénario",
"Test",
"Source de log",
"Commentaires SOC",
"Exécution",
"Logs remontés au SIEM",
"Cyber incident",
]
def render_engagement_csv(
_engagement: Engagement, simulations: list[Simulation]
) -> str:
buf = io.StringIO()
writer = csv.writer(buf)
writer.writerow(_CSV_HEADERS)
for sim in simulations:
execution = _format_execution_csv(sim)
writer.writerow([
_csv_safe(sim.name or ""),
_csv_safe(sim.description or ""),
_csv_safe(sim.log_source or ""),
_csv_safe(sim.soc_comment or ""),
_csv_safe(execution), # belt-and-braces: outer check covers empty executed_at case
_csv_safe(sim.logs or ""),
_csv_safe(sim.incident_number or ""),
])
return buf.getvalue()
# ---------------------------------------------------------------------------
# HTML (internal, used by PDF renderer)
# ---------------------------------------------------------------------------
_CSS = """
@page { size: A4 landscape; margin: 20mm; }
body { font-family: sans-serif; font-size: 11px; color: #1a1a1a; margin: 0; }
h1 { font-size: 20px; border-bottom: 2px solid #333; padding-bottom: 6px; }
h2 { font-size: 15px; margin-top: 32px; color: #333; }
table { border-collapse: collapse; width: 100%; margin-bottom: 12px; table-layout: fixed; }
th, td { border: 1px solid #ccc; padding: 3px 6px; text-align: left; vertical-align: top; white-space: pre-wrap; word-break: break-word; }
th { background: #e0e0e0; }
.meta { color: #555; margin-bottom: 16px; }
"""
_HTML_HEADERS = [
"Scénario",
"Test",
"Source de log",
"Commentaires SOC",
"Exécution",
"Logs remontés au SIEM",
"Cyber incident",
]
def _render_engagement_html(
engagement: Engagement, simulations: list[Simulation]
) -> str:
h = _html_escape
parts: list[str] = []
parts.append("<!DOCTYPE html><html><head><meta charset='utf-8'>")
parts.append(f"<style>{_CSS}</style></head><body>")
parts.append(f"<h1>{h(engagement.name)}</h1>")
parts.append("<div class='meta'>")
if engagement.description:
parts.append(f"<p>{h(engagement.description)}</p>")
parts.append(f"<p><strong>Status:</strong> {h(engagement.status.value)}</p>")
sd = engagement.start_date.isoformat() if engagement.start_date else "N/A"
ed = engagement.end_date.isoformat() if engagement.end_date else "N/A"
parts.append(f"<p><strong>Dates:</strong> {h(sd)}{h(ed)}</p>")
parts.append(f"<p><strong>Created by:</strong> {h(_creator(engagement))}</p>")
ca = engagement.created_at.isoformat() if engagement.created_at else "N/A"
parts.append(f"<p><strong>Created at:</strong> {h(ca)}</p>")
parts.append("</div>")
if simulations:
parts.append("<h2>Simulations</h2>")
thead = "<thead><tr>" + "".join(f"<th>{h(col)}</th>" for col in _HTML_HEADERS) + "</tr></thead>"
parts.append(f"<table>{thead}<tbody>")
for sim in simulations:
execution_html = h(_format_execution_text(sim)).replace("\n", "<br/>")
cells = [
h(sim.name or ""),
h(sim.description or ""),
h(sim.log_source or ""),
h(sim.soc_comment or ""),
execution_html,
h(sim.logs or ""),
h(sim.incident_number or ""),
]
row = "<tr>" + "".join(f"<td>{c}</td>" for c in cells) + "</tr>"
parts.append(row)
parts.append("</tbody></table>")
parts.append("</body></html>")
return "".join(parts)
# ---------------------------------------------------------------------------
# PDF
# ---------------------------------------------------------------------------
def render_engagement_pdf(
engagement: Engagement, simulations: list[Simulation]
) -> bytes:
from weasyprint import HTML
html = _render_engagement_html(engagement, simulations)
return HTML(string=html).write_pdf()