392 lines
13 KiB
Python
392 lines
13 KiB
Python
|
|
"""Blue-side evidence storage service (M7).
|
||
|
|
|
||
|
|
Files live under `${EVIDENCE_DIR}/<mission_id>/<test_id>/<sha256><ext>`.
|
||
|
|
The path is content-addressed: re-uploading byte-identical content into the
|
||
|
|
same test reuses the existing file on disk and inserts a fresh row (so we
|
||
|
|
keep history of who uploaded what without duplicating bytes).
|
||
|
|
|
||
|
|
The upload pipeline streams to a tmpfile inside the same per-test directory
|
||
|
|
(`atomic move` semantics on POSIX), computing the SHA256 chunk-by-chunk and
|
||
|
|
aborting when the byte count crosses `MAX_BYTES`. We refuse files whose
|
||
|
|
extension is not in the whitelist; MIME is also validated but with a more
|
||
|
|
permissive fallback (browsers and `file(1)` disagree on `.evtx`).
|
||
|
|
|
||
|
|
Soft delete only flips `deleted_at`. The bytes are kept on disk so a future
|
||
|
|
admin `/admin/purge` (M12) can remove them physically. Until then, the path
|
||
|
|
is still queryable but the API hides it from non-admins.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import hashlib
|
||
|
|
import logging
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import tempfile
|
||
|
|
import uuid
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import BinaryIO
|
||
|
|
|
||
|
|
from app.core.config import settings
|
||
|
|
from app.db.session import session_scope
|
||
|
|
from app.models.auth import User
|
||
|
|
from app.models.evidence import EvidenceFile
|
||
|
|
from app.models.mission import MissionScenario, MissionTest
|
||
|
|
from app.services.mission_tests import (
|
||
|
|
EvidenceView,
|
||
|
|
_ensure_mission_visible,
|
||
|
|
_load_test,
|
||
|
|
_to_evidence_view,
|
||
|
|
_touch,
|
||
|
|
)
|
||
|
|
|
||
|
|
log = logging.getLogger("metamorph.evidence")
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
# Validation rules
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
|
||
|
|
|
||
|
|
MAX_BYTES: int = 25 * 1024 * 1024 # 25 MB per spec §M7
|
||
|
|
|
||
|
|
# Filename extensions accepted at the upload boundary. Lowercased; the upload
|
||
|
|
# handler downcases the original filename's tail before comparing.
|
||
|
|
ALLOWED_EXTS: frozenset[str] = frozenset(
|
||
|
|
{
|
||
|
|
".png",
|
||
|
|
".jpg",
|
||
|
|
".jpeg",
|
||
|
|
".pdf",
|
||
|
|
".txt",
|
||
|
|
".log",
|
||
|
|
".json",
|
||
|
|
".csv",
|
||
|
|
".evtx",
|
||
|
|
".zip",
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
# Accept a permissive MIME set so common browser/OS combos clear validation.
|
||
|
|
# `.evtx` is canonically `application/octet-stream`; some Windows clients send
|
||
|
|
# `application/x-msexcel` for csv; etc. We trust the extension first and use
|
||
|
|
# the MIME as a secondary signal.
|
||
|
|
ALLOWED_MIMES: frozenset[str] = frozenset(
|
||
|
|
{
|
||
|
|
"image/png",
|
||
|
|
"image/jpeg",
|
||
|
|
"image/jpg",
|
||
|
|
"application/pdf",
|
||
|
|
"text/plain",
|
||
|
|
"text/csv",
|
||
|
|
"application/csv",
|
||
|
|
"application/json",
|
||
|
|
"application/octet-stream",
|
||
|
|
"application/zip",
|
||
|
|
"application/x-zip-compressed",
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
# Exceptions
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
|
||
|
|
|
||
|
|
class EvidenceNotFound(Exception):
|
||
|
|
"""Evidence row missing, soft-deleted, or not visible to the viewer."""
|
||
|
|
|
||
|
|
|
||
|
|
class EvidenceValidationError(Exception):
|
||
|
|
"""Extension/MIME/size invalid at the upload boundary."""
|
||
|
|
|
||
|
|
def __init__(self, code: str, message: str) -> None:
|
||
|
|
super().__init__(message)
|
||
|
|
self.code = code
|
||
|
|
|
||
|
|
|
||
|
|
class EvidenceStorageError(Exception):
|
||
|
|
"""Disk I/O failure during upload — bytes left on disk are best-effort cleaned."""
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
# Helpers
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
|
||
|
|
|
||
|
|
def _evidence_dir() -> Path:
|
||
|
|
return Path(settings.EVIDENCE_DIR).resolve()
|
||
|
|
|
||
|
|
|
||
|
|
def _test_dir(mission_id: uuid.UUID, test_id: uuid.UUID) -> Path:
|
||
|
|
root = _evidence_dir()
|
||
|
|
# Refuse to lay down per-mission directories at filesystem roots — an
|
||
|
|
# operator who set EVIDENCE_DIR=/ would otherwise write into / itself.
|
||
|
|
if root in (Path("/"), Path(root.anchor)):
|
||
|
|
raise EvidenceStorageError("EVIDENCE_DIR cannot be a filesystem root")
|
||
|
|
return root / str(mission_id) / str(test_id)
|
||
|
|
|
||
|
|
|
||
|
|
def _sniff_ext(filename: str) -> str:
|
||
|
|
"""Lowercased extension including the leading dot, or '' if none."""
|
||
|
|
name = filename.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
|
||
|
|
if "." not in name:
|
||
|
|
return ""
|
||
|
|
return "." + name.rsplit(".", 1)[-1].lower()
|
||
|
|
|
||
|
|
|
||
|
|
def _validate_meta(filename: str, mime: str) -> str:
|
||
|
|
ext = _sniff_ext(filename)
|
||
|
|
if not ext:
|
||
|
|
raise EvidenceValidationError(
|
||
|
|
"missing_extension", "filename must have an extension"
|
||
|
|
)
|
||
|
|
if ext not in ALLOWED_EXTS:
|
||
|
|
raise EvidenceValidationError(
|
||
|
|
"unsupported_extension", f"extension {ext!r} is not allowed"
|
||
|
|
)
|
||
|
|
normalised_mime = (mime or "application/octet-stream").lower().split(";", 1)[0].strip()
|
||
|
|
if normalised_mime not in ALLOWED_MIMES:
|
||
|
|
raise EvidenceValidationError(
|
||
|
|
"unsupported_mime", f"mime {normalised_mime!r} is not allowed"
|
||
|
|
)
|
||
|
|
return ext
|
||
|
|
|
||
|
|
|
||
|
|
def _stream_to_tmpfile(
|
||
|
|
src: BinaryIO, target_dir: Path
|
||
|
|
) -> tuple[Path, str, int]:
|
||
|
|
"""Stream the upload into a tmpfile under `target_dir`, capping size.
|
||
|
|
|
||
|
|
Returns (tmp_path, sha256_hex, total_bytes). Raises
|
||
|
|
`EvidenceValidationError("too_large", …)` once the cumulative count goes
|
||
|
|
above `MAX_BYTES`. The tmpfile is *always* removed on error.
|
||
|
|
"""
|
||
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
fd, tmp_name = tempfile.mkstemp(prefix=".upload-", dir=str(target_dir))
|
||
|
|
tmp_path = Path(tmp_name)
|
||
|
|
hasher = hashlib.sha256()
|
||
|
|
total = 0
|
||
|
|
try:
|
||
|
|
with os.fdopen(fd, "wb") as fh:
|
||
|
|
while True:
|
||
|
|
chunk = src.read(64 * 1024)
|
||
|
|
if not chunk:
|
||
|
|
break
|
||
|
|
total += len(chunk)
|
||
|
|
if total > MAX_BYTES:
|
||
|
|
raise EvidenceValidationError(
|
||
|
|
"too_large",
|
||
|
|
f"file exceeds the {MAX_BYTES} byte limit",
|
||
|
|
)
|
||
|
|
hasher.update(chunk)
|
||
|
|
fh.write(chunk)
|
||
|
|
return tmp_path, hasher.hexdigest(), total
|
||
|
|
except Exception:
|
||
|
|
try:
|
||
|
|
tmp_path.unlink(missing_ok=True)
|
||
|
|
except OSError:
|
||
|
|
pass
|
||
|
|
raise
|
||
|
|
|
||
|
|
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
# Public API
|
||
|
|
# --------------------------------------------------------------------------- #
|
||
|
|
|
||
|
|
|
||
|
|
def add_evidence(
|
||
|
|
mission_id: uuid.UUID,
|
||
|
|
test_id: uuid.UUID,
|
||
|
|
*,
|
||
|
|
file_stream: BinaryIO,
|
||
|
|
original_filename: str,
|
||
|
|
mime: str,
|
||
|
|
viewer_id: uuid.UUID,
|
||
|
|
viewer_is_admin: bool,
|
||
|
|
) -> EvidenceView:
|
||
|
|
"""Persist the upload and return a view of the new evidence row.
|
||
|
|
|
||
|
|
Pre-conditions:
|
||
|
|
- The caller already verified that the viewer holds `mission.write_blue_fields`.
|
||
|
|
- Mission + test visibility is enforced here (404, not 403).
|
||
|
|
|
||
|
|
Disk layout:
|
||
|
|
${EVIDENCE_DIR}/<mission_id>/<test_id>/<sha256><ext>
|
||
|
|
"""
|
||
|
|
ext = _validate_meta(original_filename, mime)
|
||
|
|
target_dir = _test_dir(mission_id, test_id)
|
||
|
|
|
||
|
|
# Visibility/existence check BEFORE we touch disk.
|
||
|
|
with session_scope() as s:
|
||
|
|
_ensure_mission_visible(s, mission_id, viewer_id, viewer_is_admin)
|
||
|
|
_load_test(s, mission_id, test_id) # raises MissionTestNotFound on miss
|
||
|
|
|
||
|
|
tmp_path, sha256, size_bytes = _stream_to_tmpfile(file_stream, target_dir)
|
||
|
|
|
||
|
|
# Defence in depth — the hash comes from hashlib but if any caller ever
|
||
|
|
# passes pre-computed bytes we want to fail loudly rather than write to a
|
||
|
|
# path like `..something.evtx`.
|
||
|
|
if not re.fullmatch(r"[0-9a-f]{64}", sha256):
|
||
|
|
tmp_path.unlink(missing_ok=True)
|
||
|
|
raise EvidenceStorageError("computed sha256 is malformed")
|
||
|
|
|
||
|
|
final_path = target_dir / f"{sha256}{ext}"
|
||
|
|
try:
|
||
|
|
if final_path.exists():
|
||
|
|
# Same bytes already on disk — drop the tmp and reuse the canonical path.
|
||
|
|
tmp_path.unlink(missing_ok=True)
|
||
|
|
else:
|
||
|
|
# `os.replace` is the atomic rename primitive on POSIX (and the
|
||
|
|
# documented atomic rename on Windows when src/dst live on the
|
||
|
|
# same volume). We stage the tmpfile in `target_dir` so it
|
||
|
|
# always shares a filesystem with the destination.
|
||
|
|
os.replace(str(tmp_path), str(final_path))
|
||
|
|
except OSError as e:
|
||
|
|
try:
|
||
|
|
tmp_path.unlink(missing_ok=True)
|
||
|
|
except OSError:
|
||
|
|
pass
|
||
|
|
log.warning(
|
||
|
|
"metamorph.evidence.storage_failed",
|
||
|
|
extra={"mission_id": str(mission_id), "test_id": str(test_id), "error": str(e)},
|
||
|
|
)
|
||
|
|
raise EvidenceStorageError(str(e)) from e
|
||
|
|
|
||
|
|
with session_scope() as s:
|
||
|
|
# Re-load + double-check visibility (defence in depth: the membership
|
||
|
|
# set could have changed between the pre-check and now).
|
||
|
|
_ensure_mission_visible(s, mission_id, viewer_id, viewer_is_admin)
|
||
|
|
test = _load_test(s, mission_id, test_id)
|
||
|
|
ev = EvidenceFile(
|
||
|
|
mission_test_id=test.id,
|
||
|
|
sha256=sha256,
|
||
|
|
mime=(mime or "application/octet-stream").lower().split(";", 1)[0].strip(),
|
||
|
|
size_bytes=size_bytes,
|
||
|
|
storage_path=str(final_path),
|
||
|
|
original_filename=original_filename[:255],
|
||
|
|
uploaded_by_user_id=viewer_id,
|
||
|
|
uploaded_at=datetime.now(tz=timezone.utc),
|
||
|
|
)
|
||
|
|
s.add(ev)
|
||
|
|
_touch(test, viewer_id)
|
||
|
|
s.flush()
|
||
|
|
s.refresh(ev)
|
||
|
|
uploader = s.get(User, viewer_id)
|
||
|
|
log.info(
|
||
|
|
"metamorph.evidence.added",
|
||
|
|
extra={
|
||
|
|
"evidence_id": str(ev.id),
|
||
|
|
"mission_id": str(mission_id),
|
||
|
|
"test_id": str(test_id),
|
||
|
|
"sha256": sha256,
|
||
|
|
"size_bytes": size_bytes,
|
||
|
|
"mime": ev.mime,
|
||
|
|
},
|
||
|
|
)
|
||
|
|
return _to_evidence_view(ev, uploader)
|
||
|
|
|
||
|
|
|
||
|
|
def _resolve_evidence_chain(
|
||
|
|
s, evidence_id: uuid.UUID
|
||
|
|
) -> tuple[EvidenceFile, MissionTest, MissionScenario] | None:
|
||
|
|
"""Walk evidence → test → scenario, returning None if any link is missing or deleted."""
|
||
|
|
ev = s.get(EvidenceFile, evidence_id)
|
||
|
|
if ev is None or ev.deleted_at is not None:
|
||
|
|
return None
|
||
|
|
test = s.get(MissionTest, ev.mission_test_id)
|
||
|
|
if test is None or test.deleted_at is not None:
|
||
|
|
return None
|
||
|
|
scenario = s.get(MissionScenario, test.scenario_id)
|
||
|
|
if scenario is None or scenario.deleted_at is not None:
|
||
|
|
return None
|
||
|
|
return ev, test, scenario
|
||
|
|
|
||
|
|
|
||
|
|
def get_evidence(
|
||
|
|
evidence_id: uuid.UUID,
|
||
|
|
*,
|
||
|
|
viewer_id: uuid.UUID,
|
||
|
|
viewer_is_admin: bool,
|
||
|
|
) -> EvidenceView:
|
||
|
|
"""Read a single evidence record. Membership-aware (404 on miss/forbidden)."""
|
||
|
|
with session_scope() as s:
|
||
|
|
chain = _resolve_evidence_chain(s, evidence_id)
|
||
|
|
if chain is None:
|
||
|
|
raise EvidenceNotFound()
|
||
|
|
ev, _, scenario = chain
|
||
|
|
try:
|
||
|
|
_ensure_mission_visible(s, scenario.mission_id, viewer_id, viewer_is_admin)
|
||
|
|
except Exception as e:
|
||
|
|
raise EvidenceNotFound() from e
|
||
|
|
uploader = s.get(User, ev.uploaded_by_user_id) if ev.uploaded_by_user_id else None
|
||
|
|
return _to_evidence_view(ev, uploader)
|
||
|
|
|
||
|
|
|
||
|
|
def get_evidence_for_download(
|
||
|
|
evidence_id: uuid.UUID,
|
||
|
|
*,
|
||
|
|
viewer_id: uuid.UUID,
|
||
|
|
viewer_is_admin: bool,
|
||
|
|
) -> tuple[EvidenceView, Path]:
|
||
|
|
"""Return view + on-disk path. Raises EvidenceNotFound if the bytes are gone."""
|
||
|
|
with session_scope() as s:
|
||
|
|
chain = _resolve_evidence_chain(s, evidence_id)
|
||
|
|
if chain is None:
|
||
|
|
raise EvidenceNotFound()
|
||
|
|
ev, _, scenario = chain
|
||
|
|
try:
|
||
|
|
_ensure_mission_visible(s, scenario.mission_id, viewer_id, viewer_is_admin)
|
||
|
|
except Exception as e:
|
||
|
|
raise EvidenceNotFound() from e
|
||
|
|
uploader = s.get(User, ev.uploaded_by_user_id) if ev.uploaded_by_user_id else None
|
||
|
|
view = _to_evidence_view(ev, uploader)
|
||
|
|
path = Path(ev.storage_path)
|
||
|
|
if not path.exists():
|
||
|
|
log.warning(
|
||
|
|
"metamorph.evidence.bytes_missing",
|
||
|
|
extra={"evidence_id": str(evidence_id), "path": str(path)},
|
||
|
|
)
|
||
|
|
raise EvidenceNotFound()
|
||
|
|
return view, path
|
||
|
|
|
||
|
|
|
||
|
|
def soft_delete_evidence(
|
||
|
|
evidence_id: uuid.UUID,
|
||
|
|
*,
|
||
|
|
viewer_id: uuid.UUID,
|
||
|
|
viewer_is_admin: bool,
|
||
|
|
) -> None:
|
||
|
|
"""Mark an evidence row deleted. Disk bytes are kept until admin purge (M12)."""
|
||
|
|
with session_scope() as s:
|
||
|
|
chain = _resolve_evidence_chain(s, evidence_id)
|
||
|
|
if chain is None:
|
||
|
|
raise EvidenceNotFound()
|
||
|
|
ev, test, scenario = chain
|
||
|
|
try:
|
||
|
|
_ensure_mission_visible(s, scenario.mission_id, viewer_id, viewer_is_admin)
|
||
|
|
except Exception as e:
|
||
|
|
raise EvidenceNotFound() from e
|
||
|
|
ev.deleted_at = datetime.now(tz=timezone.utc)
|
||
|
|
_touch(test, viewer_id)
|
||
|
|
s.flush()
|
||
|
|
log.info(
|
||
|
|
"metamorph.evidence.soft_deleted",
|
||
|
|
extra={"evidence_id": str(evidence_id), "mission_id": str(scenario.mission_id)},
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
__all__ = [
|
||
|
|
"MAX_BYTES",
|
||
|
|
"ALLOWED_EXTS",
|
||
|
|
"ALLOWED_MIMES",
|
||
|
|
"EvidenceNotFound",
|
||
|
|
"EvidenceValidationError",
|
||
|
|
"EvidenceStorageError",
|
||
|
|
"add_evidence",
|
||
|
|
"get_evidence",
|
||
|
|
"get_evidence_for_download",
|
||
|
|
"soft_delete_evidence",
|
||
|
|
]
|