Milestone 3

2026-05-11 06:05:27 +02:00
commit 4c25e198fc
125 changed files with 13489 additions and 0 deletions
--- a/backend/app/core/init.py
+++ b/backend/app/core/init.py
--- a/backend/app/core/auth_decorators.py
+++ b/backend/app/core/auth_decorators.py
@@ -0,0 +1,139 @@
+"""Flask decorators for authentication + authorization.
+
+Usage:
+    @bp.get("/whatever")
+    @require_auth                        # populates g.current_user
+    def whatever():
+        return jsonify(...)
+
+    @bp.post("/admin/users")
+    @require_auth
+    @require_perm("user.create")         # checks the user's effective perms
+    def create_user():
+        ...
+
+`g.current_user` is a small `AuthenticatedUser` snapshot — no live ORM session.
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+from dataclasses import dataclass, field
+from functools import wraps
+from typing import Callable
+
+import jwt
+from flask import abort, g, request
+from sqlalchemy import select
+
+from app.core.jwt_tokens import decode_token
+from app.db.session import session_scope
+from app.models.auth import Permission, User
+from app.services.bootstrap import ADMIN_GROUP_NAME
+
+log = logging.getLogger("metamorph.auth")
+
+
+@dataclass(frozen=True)
+class AuthenticatedUser:
+    id: uuid.UUID
+    email: str
+    locale: str
+    display_name: str | None
+    is_admin: bool
+    permissions: frozenset[str] = field(default_factory=frozenset)
+    group_names: frozenset[str] = field(default_factory=frozenset)
+
+
+def _load_authenticated_user(user_id: uuid.UUID) -> AuthenticatedUser | None:
+    with session_scope() as s:
+        user = s.get(User, user_id)
+        if user is None or user.deleted_at is not None or not user.is_active:
+            return None
+        group_names: set[str] = set()
+        permissions: set[str] = set()
+        for grp in user.groups:
+            if grp.deleted_at is not None:
+                continue
+            group_names.add(grp.name)
+            for perm in grp.permissions:
+                permissions.add(perm.code)
+        return AuthenticatedUser(
+            id=user.id,
+            email=user.email,
+            locale=user.locale,
+            display_name=user.display_name,
+            is_admin=ADMIN_GROUP_NAME in group_names,
+            permissions=frozenset(permissions),
+            group_names=frozenset(group_names),
+        )
+
+
+def _extract_bearer() -> str | None:
+    raw = request.headers.get("Authorization", "")
+    if not raw.lower().startswith("bearer "):
+        return None
+    return raw[7:].strip() or None
+
+
+def require_auth(fn: Callable):
+    @wraps(fn)
+    def wrapper(*args, **kwargs):
+        token = _extract_bearer()
+        if token is None:
+            abort(401, description="missing bearer token")
+        try:
+            claims = decode_token(token, expected_type="access")
+        except jwt.ExpiredSignatureError:
+            abort(401, description="access token expired")
+        except jwt.PyJWTError:
+            abort(401, description="invalid access token")
+        try:
+            user_id = uuid.UUID(claims.sub)
+        except ValueError:
+            abort(401, description="malformed subject")
+        snapshot = _load_authenticated_user(user_id)
+        if snapshot is None:
+            abort(401, description="user no longer active")
+        g.current_user = snapshot
+        return fn(*args, **kwargs)
+
+    return wrapper
+
+
+def require_perm(*codes: str):
+    """Require any one of the listed permission codes.
+
+    Members of the system `admin` group bypass the check.
+    """
+
+    def decorator(fn: Callable):
+        @wraps(fn)
+        def wrapper(*args, **kwargs):
+            user: AuthenticatedUser | None = getattr(g, "current_user", None)
+            if user is None:
+                abort(401, description="not authenticated")
+            if user.is_admin:
+                return fn(*args, **kwargs)
+            if not any(code in user.permissions for code in codes):
+                log.info(
+                    "metamorph.auth.permission_denied",
+                    extra={
+                        "user_id": str(user.id),
+                        "required": list(codes),
+                        "had": sorted(user.permissions),
+                    },
+                )
+                abort(403, description="insufficient permissions")
+            return fn(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+def fetch_all_permissions() -> list[str]:
+    """Utility for debugging / admin UI: list every known permission code."""
+    with session_scope() as s:
+        return list(s.scalars(select(Permission.code).order_by(Permission.code)).all())
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -0,0 +1,76 @@
+"""Runtime configuration loaded from environment variables."""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import Field, model_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+# Sentinel values that .env.example ships with. If the runtime is configured
+# in a non-dev environment with one of these still in place, we refuse to boot.
+_DEV_JWT_SECRET = "change-me-to-a-long-random-string"
+_DEV_DB_PASSWORD = "change-me-strong"
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=True,
+        extra="ignore",
+    )
+
+    # === Runtime mode ===
+    # Set to "dev" to allow the default placeholder secrets. Anything else
+    # (e.g. "prod", "staging") forces strong values.
+    APP_ENV: Literal["dev", "prod", "staging", "test"] = "prod"
+
+    # === Postgres ===
+    POSTGRES_DB: str = "metamorph"
+    POSTGRES_USER: str = "metamorph"
+    POSTGRES_PASSWORD: str = ""
+    POSTGRES_HOST: str = "db"
+    POSTGRES_PORT: int = 5432
+
+    # === API ===
+    JWT_SECRET: str = Field(default="", min_length=0)
+    LOG_LEVEL: str = "INFO"
+    FRONT_ORIGIN: str = "http://localhost:8080"
+    EVIDENCE_DIR: str = "/data/evidence"
+
+    @property
+    def cors_origins(self) -> list[str]:
+        return [o.strip() for o in self.FRONT_ORIGIN.split(",") if o.strip()]
+
+    @property
+    def database_url(self) -> str:
+        """SQLAlchemy URL using the psycopg3 driver."""
+        return (
+            f"postgresql+psycopg://{self.POSTGRES_USER}:{self.POSTGRES_PASSWORD}"
+            f"@{self.POSTGRES_HOST}:{self.POSTGRES_PORT}/{self.POSTGRES_DB}"
+        )
+
+    @model_validator(mode="after")
+    def _enforce_secret_strength(self) -> "Settings":
+        """Refuse to boot in prod/staging if secrets are missing or default.
+
+        `dev` and `test` are explicitly exempted so workstations and the
+        ephemeral test container don't need real secrets.
+        """
+        if self.APP_ENV in ("dev", "test"):
+            return self
+        if not self.JWT_SECRET or self.JWT_SECRET == _DEV_JWT_SECRET or len(self.JWT_SECRET) < 32:
+            raise ValueError(
+                "JWT_SECRET is missing, default, or shorter than 32 chars. "
+                "Set APP_ENV=dev to bypass for local development."
+            )
+        if not self.POSTGRES_PASSWORD or self.POSTGRES_PASSWORD == _DEV_DB_PASSWORD:
+            raise ValueError(
+                "POSTGRES_PASSWORD is missing or default. "
+                "Set APP_ENV=dev to bypass for local development."
+            )
+        return self
+
+
+settings = Settings()
--- a/backend/app/core/install_token.py
+++ b/backend/app/core/install_token.py
@@ -0,0 +1,147 @@
+"""First-admin install token.
+
+When the `users` table is empty at boot, we mint a one-shot opaque token,
+store its SHA-256 in `settings(key='install_token_hash')`, and log the raw
+token to stdout. The operator copies it from the logs and posts it to
+`/api/v1/setup` with the desired admin credentials.
+
+Idempotency: as long as the token row exists and no admin has consumed it,
+subsequent boots reuse the same hash and re-emit the same token only if
+explicitly invoked via `flask metamorph print-install-token`.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime, timedelta, timezone
+
+from sqlalchemy import select
+
+from app.core.security import generate_opaque_token, hash_opaque_token
+from app.db.session import session_scope
+from app.models.auth import User
+from app.models.setting import Setting
+
+INSTALL_TOKEN_KEY = "install_token"
+log = logging.getLogger("metamorph.bootstrap")
+
+# Setting JSONB shape: {"hash": "<sha256>", "issued_at": ISO, "expires_at": ISO|null, "consumed_at": ISO|null}
+
+
+def _users_exist() -> bool:
+    with session_scope() as s:
+        return s.execute(select(User.id).limit(1)).first() is not None
+
+
+def _read_setting() -> Setting | None:
+    with session_scope() as s:
+        return s.get(Setting, INSTALL_TOKEN_KEY)
+
+
+def _write_setting(payload: dict) -> None:
+    with session_scope() as s:
+        existing = s.get(Setting, INSTALL_TOKEN_KEY)
+        if existing is None:
+            s.add(
+                Setting(
+                    key=INSTALL_TOKEN_KEY,
+                    value=payload,
+                    description="One-shot bootstrap token for the first admin (M2).",
+                )
+            )
+        else:
+            existing.value = payload
+
+
+def ensure_install_token(*, force: bool = False) -> str | None:
+    """Mint a token if no users exist and no live token is on file.
+
+    Returns the raw token if newly minted (caller is responsible for logging it),
+    or None if the bootstrap is already consumed / not applicable.
+    """
+    if _users_exist() and not force:
+        return None
+
+    setting = _read_setting()
+    if setting is not None and not force:
+        value = setting.value or {}
+        if value.get("consumed_at"):
+            return None  # consumed, do not mint again
+        # A pending token exists; we don't know its raw value any more.
+        # Caller must `force=True` to mint a new one (CLI command will do that).
+        return None
+
+    token = generate_opaque_token()
+    _write_setting(
+        {
+            "hash": hash_opaque_token(token),
+            "issued_at": datetime.now(tz=timezone.utc).isoformat(),
+            "expires_at": None,  # never expires until consumed
+            "consumed_at": None,
+        }
+    )
+    return token
+
+
+def regenerate_install_token() -> str:
+    """CLI helper: always mint and persist a fresh token (overwrites any pending one)."""
+    return ensure_install_token(force=True) or _force_mint()
+
+
+def _force_mint() -> str:
+    token = generate_opaque_token()
+    _write_setting(
+        {
+            "hash": hash_opaque_token(token),
+            "issued_at": datetime.now(tz=timezone.utc).isoformat(),
+            "expires_at": None,
+            "consumed_at": None,
+        }
+    )
+    return token
+
+
+def verify_install_token(token: str) -> bool:
+    """Constant-time comparison against the stored hash."""
+    setting = _read_setting()
+    if setting is None or not setting.value:
+        return False
+    payload = setting.value
+    if payload.get("consumed_at"):
+        return False
+    expected = payload.get("hash")
+    if not expected:
+        return False
+    import hmac
+
+    return hmac.compare_digest(hash_opaque_token(token), expected)
+
+
+def mark_install_token_consumed() -> None:
+    setting = _read_setting()
+    if setting is None:
+        return
+    payload = dict(setting.value or {})
+    payload["consumed_at"] = datetime.now(tz=timezone.utc).isoformat()
+    _write_setting(payload)
+
+
+def log_install_token_banner(raw_token: str) -> None:
+    """Pretty banner so the token is unmissable in container logs."""
+    sep = "=" * 72
+    log.warning(
+        "metamorph.install_token.minted",
+        extra={
+            "banner": sep,
+            "message_template": (
+                "BOOTSTRAP — copy the token below and POST it to /api/v1/setup "
+                "with your desired admin email + password. Save it: it is logged once."
+            ),
+            "install_token": raw_token,
+        },
+    )
+    # Also dump a plain banner so the token is grep-friendly even if the JSON
+    # consumer hides `extra` fields.
+    print(sep, flush=True)  # noqa: T201
+    print(f"INSTALL TOKEN: {raw_token}", flush=True)  # noqa: T201
+    print(sep, flush=True)  # noqa: T201
--- a/backend/app/core/jwt_tokens.py
+++ b/backend/app/core/jwt_tokens.py
@@ -0,0 +1,97 @@
+"""JWT encoding / decoding.
+
+Two token types:
+- `access`  — short-lived (1 h), in `Authorization: Bearer ...` headers, kept
+              client-side **in memory** only (cf. spec §M2).
+- `refresh` — long-lived (30 d), in an HTTPOnly Secure SameSite=Strict cookie
+              scoped to `/api/v1/auth/`. Rotated on every successful refresh,
+              old `jti` revoked.
+
+We sign HS256 with `settings.JWT_SECRET`. The `jti` claim links each token to
+its DB row in `refresh_tokens` for revocation; access tokens are stateless.
+"""
+
+from __future__ import annotations
+
+import secrets
+import uuid
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Literal
+
+import jwt
+
+from app.core.config import settings
+
+ACCESS_TOKEN_TTL = timedelta(hours=1)
+REFRESH_TOKEN_TTL = timedelta(days=30)
+ALGORITHM = "HS256"
+ISSUER = "metamorph"
+
+
+TokenType = Literal["access", "refresh"]
+
+
+@dataclass(frozen=True)
+class TokenClaims:
+    sub: str  # user id (UUID as string)
+    type: TokenType
+    jti: str
+    iat: datetime
+    exp: datetime
+
+
+def _now() -> datetime:
+    return datetime.now(tz=timezone.utc)
+
+
+def generate_jti() -> str:
+    """Compact, URL-safe random identifier (≈22 chars)."""
+    return secrets.token_urlsafe(16)
+
+
+def encode_token(
+    user_id: uuid.UUID | str,
+    token_type: TokenType,
+    *,
+    jti: str | None = None,
+) -> tuple[str, TokenClaims]:
+    """Return `(jwt_string, claims)`. `jti` is generated if not provided."""
+    now = _now()
+    ttl = ACCESS_TOKEN_TTL if token_type == "access" else REFRESH_TOKEN_TTL
+    claims = TokenClaims(
+        sub=str(user_id),
+        type=token_type,
+        jti=jti or generate_jti(),
+        iat=now,
+        exp=now + ttl,
+    )
+    payload = {
+        "iss": ISSUER,
+        "sub": claims.sub,
+        "type": claims.type,
+        "jti": claims.jti,
+        "iat": int(claims.iat.timestamp()),
+        "exp": int(claims.exp.timestamp()),
+    }
+    return jwt.encode(payload, settings.JWT_SECRET, algorithm=ALGORITHM), claims
+
+
+def decode_token(token: str, *, expected_type: TokenType) -> TokenClaims:
+    """Decode and validate a JWT. Raises `jwt.PyJWTError` on any failure."""
+    payload = jwt.decode(
+        token,
+        settings.JWT_SECRET,
+        algorithms=[ALGORITHM],
+        issuer=ISSUER,
+        options={"require": ["sub", "type", "jti", "iat", "exp"]},
+    )
+    if payload["type"] != expected_type:
+        raise jwt.InvalidTokenError(f"expected {expected_type} token, got {payload['type']}")
+    return TokenClaims(
+        sub=payload["sub"],
+        type=payload["type"],
+        jti=payload["jti"],
+        iat=datetime.fromtimestamp(payload["iat"], tz=timezone.utc),
+        exp=datetime.fromtimestamp(payload["exp"], tz=timezone.utc),
+    )
--- a/backend/app/core/logging.py
+++ b/backend/app/core/logging.py
@@ -0,0 +1,34 @@
+"""JSON structured logging on stdout."""
+
+from __future__ import annotations
+
+import logging
+import sys
+
+from pythonjsonlogger import jsonlogger
+
+
+def configure_logging(level: str = "INFO") -> None:
+    """Replace the root handler with a single JSON stdout handler.
+
+    Fields emitted: ts, level, name, msg, plus any extras passed via `logger.X(..., extra={...})`.
+    """
+    root = logging.getLogger()
+    root.setLevel(level.upper())
+
+    # Drop any pre-existing handlers (uvicorn/gunicorn add their own).
+    for h in list(root.handlers):
+        root.removeHandler(h)
+
+    handler = logging.StreamHandler(sys.stdout)
+    formatter = jsonlogger.JsonFormatter(
+        fmt="%(asctime)s %(levelname)s %(name)s %(message)s",
+        rename_fields={"asctime": "ts", "levelname": "level", "name": "logger"},
+        json_ensure_ascii=False,
+    )
+    handler.setFormatter(formatter)
+    root.addHandler(handler)
+
+    # Tame the noisy third parties unless explicitly debugging.
+    if level.upper() != "DEBUG":
+        logging.getLogger("werkzeug").setLevel(logging.WARNING)
--- a/backend/app/core/rate_limit.py
+++ b/backend/app/core/rate_limit.py
@@ -0,0 +1,29 @@
+"""Shared flask-limiter instance.
+
+Anchored on remote address. In-memory backend for v1 (single-process gunicorn
+worker pool can drift; that's acceptable at this scale). M14 will switch to
+Redis if it becomes a real concern.
+
+The limiter is enforced in `APP_ENV in ("prod", "staging")` — dev and test
+deployments share an in-memory backend that's noisy across hot-reloads and
+would gate the Playwright e2e suite at 10 req/min/IP. The spec NF-security
+requirement is explicitly a *production* one (cf. tasks/spec.md §6
+NF-security); a staging deployment is exposed to humans so the same limits
+apply there.
+"""
+
+from __future__ import annotations
+
+from flask_limiter import Limiter
+from flask_limiter.util import get_remote_address
+
+from app.core.config import settings
+
+limiter = Limiter(
+    key_func=get_remote_address,
+    default_limits=[],
+    storage_uri="memory://",
+    headers_enabled=True,
+    strategy="fixed-window",
+    enabled=settings.APP_ENV in ("prod", "staging"),
+)
--- a/backend/app/core/security.py
+++ b/backend/app/core/security.py
@@ -0,0 +1,62 @@
+"""Password hashing and constant-time secret hashing."""
+
+from __future__ import annotations
+
+import hashlib
+import hmac
+import secrets
+
+from argon2 import PasswordHasher
+from argon2.exceptions import VerifyMismatchError
+
+# Argon2id with moderate cost. `time_cost=2`, `memory_cost=64MiB`, `parallelism=2`
+# is well above OWASP minimums while staying snappy on a Debian small VM.
+_hasher = PasswordHasher(
+    time_cost=2,
+    memory_cost=64 * 1024,
+    parallelism=2,
+    hash_len=32,
+    salt_len=16,
+)
+
+
+def hash_password(plaintext: str) -> str:
+    return _hasher.hash(plaintext)
+
+
+def verify_password(stored_hash: str, plaintext: str) -> bool:
+    """Constant-time verification. Returns False on mismatch, never raises."""
+    try:
+        return _hasher.verify(stored_hash, plaintext)
+    except VerifyMismatchError:
+        return False
+    except Exception:  # corrupted hash or unsupported parameters
+        return False
+
+
+def needs_rehash(stored_hash: str) -> bool:
+    """True when Argon2 parameters have evolved since the hash was created."""
+    try:
+        return _hasher.check_needs_rehash(stored_hash)
+    except Exception:
+        return True
+
+
+# === Opaque-token helpers (refresh tokens, invitation tokens) ===
+#
+# We never store the raw token in DB — only its SHA-256. Comparison uses
+# `hmac.compare_digest` to dodge timing attacks. Tokens are URL-safe base64.
+
+TOKEN_BYTES = 48  # 384 bits of entropy → 64 chars b64url
+
+
+def generate_opaque_token() -> str:
+    return secrets.token_urlsafe(TOKEN_BYTES)
+
+
+def hash_opaque_token(token: str) -> str:
+    return hashlib.sha256(token.encode("utf-8")).hexdigest()
+
+
+def verify_opaque_token(token: str, stored_hash: str) -> bool:
+    return hmac.compare_digest(hash_opaque_token(token), stored_hash)