feat: download - download safeguard

This commit is contained in:
kodi
2026-03-14 13:24:17 +01:00
parent 7e7c2f3958
commit ea337338e3
4 changed files with 248 additions and 36 deletions
+144 -26
View File
@@ -1,10 +1,13 @@
from __future__ import annotations
import os
from io import BytesIO
import time
import zipfile
from dataclasses import dataclass
from datetime import datetime, timezone
from io import BytesIO
from pathlib import Path
from typing import Callable
from backend.app.api.errors import AppError
from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse
@@ -54,11 +57,37 @@ PDF_CONTENT_TYPES = {
}
@dataclass(frozen=True)
class ZipDownloadPreflightLimits:
max_items: int = 1000
max_total_input_bytes: int = 2 * 1024 * 1024 * 1024
max_individual_file_bytes: int = 500 * 1024 * 1024
scan_timeout_seconds: float = 10.0
@dataclass
class ZipDownloadPreflightState:
item_count: int = 0
total_input_bytes: int = 0
ZIP_DOWNLOAD_PREFLIGHT_LIMITS = ZipDownloadPreflightLimits()
class FileOpsService:
def __init__(self, path_guard: PathGuard, filesystem: FilesystemAdapter, history_repository: HistoryRepository | None = None):
def __init__(
self,
path_guard: PathGuard,
filesystem: FilesystemAdapter,
history_repository: HistoryRepository | None = None,
zip_download_preflight_limits: ZipDownloadPreflightLimits = ZIP_DOWNLOAD_PREFLIGHT_LIMITS,
monotonic: Callable[[], float] | None = None,
):
self._path_guard = path_guard
self._filesystem = filesystem
self._history_repository = history_repository
self._zip_download_preflight_limits = zip_download_preflight_limits
self._monotonic = monotonic or time.monotonic
def mkdir(self, parent_path: str, name: str) -> MkdirResponse:
try:
@@ -673,7 +702,6 @@ class FileOpsService:
def _prepare_zip_download(self, resolved_targets: list) -> dict:
archive_names: set[str] = set()
for resolved_target in resolved_targets:
self._validate_download_target(resolved_target)
archive_name = resolved_target.absolute.name
if archive_name in archive_names:
raise AppError(
@@ -682,6 +710,7 @@ class FileOpsService:
status_code=400,
)
archive_names.add(archive_name)
self._run_zip_download_preflight(resolved_targets)
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir():
download_name = f"{resolved_targets[0].absolute.name}.zip"
@@ -705,37 +734,126 @@ class FileOpsService:
"content_type": "application/zip",
}
def _validate_download_target(self, resolved_target) -> None:
def _run_zip_download_preflight(self, resolved_targets: list) -> None:
started_at = self._monotonic()
state = ZipDownloadPreflightState()
for resolved_target in resolved_targets:
self._ensure_zip_download_preflight_within_timeout(started_at)
self._validate_zip_download_root_target(resolved_target)
if resolved_target.absolute.is_file():
self._record_zip_download_file(
state=state,
entry_path=resolved_target.absolute,
entry_relative=resolved_target.relative,
)
continue
self._increment_zip_download_item_count(
state=state,
entry_relative=resolved_target.relative,
)
self._scan_zip_download_directory(
state=state,
resolved_target=resolved_target,
started_at=started_at,
)
def _validate_zip_download_root_target(self, resolved_target) -> None:
_, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative)
if lexical_source.is_symlink():
raise AppError(
code="type_conflict",
message="Source must not be a symlink",
status_code=409,
self._raise_zip_download_preflight_error(
reason="symlink_detected",
details={"path": resolved_target.relative},
)
if resolved_target.absolute.is_file():
if resolved_target.absolute.is_file() or resolved_target.absolute.is_dir():
return
if resolved_target.absolute.is_dir():
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
root_path = Path(root)
for name in [*dirnames, *filenames]:
entry = root_path / name
if entry.is_symlink():
raise AppError(
code="type_conflict",
message="Source directory must not contain symlinks",
status_code=409,
details={"path": resolved_target.relative},
)
return
raise AppError(
code="type_conflict",
message="Unsupported path type for download",
status_code=409,
self._raise_zip_download_preflight_error(
reason="unsupported_path_type",
details={"path": resolved_target.relative},
)
def _scan_zip_download_directory(self, state: ZipDownloadPreflightState, resolved_target, started_at: float) -> None:
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
root_path = Path(root)
dirnames.sort()
filenames.sort()
for name in [*dirnames, *filenames]:
self._ensure_zip_download_preflight_within_timeout(started_at)
entry_path = root_path / name
relative_suffix = entry_path.relative_to(resolved_target.absolute).as_posix()
entry_relative = self._join_relative(resolved_target.relative, relative_suffix)
if entry_path.is_symlink():
self._raise_zip_download_preflight_error(
reason="symlink_detected",
details={"path": entry_relative},
)
if entry_path.is_dir():
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
continue
self._record_zip_download_file(
state=state,
entry_path=entry_path,
entry_relative=entry_relative,
)
def _record_zip_download_file(
self,
*,
state: ZipDownloadPreflightState,
entry_path: Path,
entry_relative: str,
) -> None:
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
file_size = int(entry_path.stat().st_size)
if file_size > self._zip_download_preflight_limits.max_individual_file_bytes:
self._raise_zip_download_preflight_error(
reason="max_individual_file_size_exceeded",
details={
"path": entry_relative,
"limit_bytes": str(self._zip_download_preflight_limits.max_individual_file_bytes),
"actual_bytes": str(file_size),
},
)
state.total_input_bytes += file_size
if state.total_input_bytes > self._zip_download_preflight_limits.max_total_input_bytes:
self._raise_zip_download_preflight_error(
reason="max_total_input_bytes_exceeded",
details={
"limit_bytes": str(self._zip_download_preflight_limits.max_total_input_bytes),
"actual_bytes": str(state.total_input_bytes),
},
)
def _increment_zip_download_item_count(self, *, state: ZipDownloadPreflightState, entry_relative: str) -> None:
state.item_count += 1
if state.item_count > self._zip_download_preflight_limits.max_items:
self._raise_zip_download_preflight_error(
reason="max_items_exceeded",
details={
"path": entry_relative,
"limit": str(self._zip_download_preflight_limits.max_items),
"actual": str(state.item_count),
},
)
def _ensure_zip_download_preflight_within_timeout(self, started_at: float) -> None:
elapsed = self._monotonic() - started_at
if elapsed > self._zip_download_preflight_limits.scan_timeout_seconds:
self._raise_zip_download_preflight_error(
reason="preflight_timeout",
details={
"timeout_seconds": str(self._zip_download_preflight_limits.scan_timeout_seconds),
},
)
@staticmethod
def _raise_zip_download_preflight_error(reason: str, details: dict[str, str]) -> None:
raise AppError(
code="download_preflight_failed",
message="Zip download preflight failed",
status_code=409,
details={"reason": reason, **details},
)
def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None:
root_name = resolved_target.absolute.name
if resolved_target.absolute.is_file():