diff --git a/webui/backend/app/services/__pycache__/file_ops_service.cpython-313.pyc b/webui/backend/app/services/__pycache__/file_ops_service.cpython-313.pyc index 9f23335..9aa3599 100644 Binary files a/webui/backend/app/services/__pycache__/file_ops_service.cpython-313.pyc and b/webui/backend/app/services/__pycache__/file_ops_service.cpython-313.pyc differ diff --git a/webui/backend/app/services/file_ops_service.py b/webui/backend/app/services/file_ops_service.py index ef44c03..0b399d8 100644 --- a/webui/backend/app/services/file_ops_service.py +++ b/webui/backend/app/services/file_ops_service.py @@ -1,10 +1,13 @@ from __future__ import annotations import os -from io import BytesIO +import time import zipfile +from dataclasses import dataclass from datetime import datetime, timezone +from io import BytesIO from pathlib import Path +from typing import Callable from backend.app.api.errors import AppError from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse @@ -54,11 +57,37 @@ PDF_CONTENT_TYPES = { } +@dataclass(frozen=True) +class ZipDownloadPreflightLimits: + max_items: int = 1000 + max_total_input_bytes: int = 2 * 1024 * 1024 * 1024 + max_individual_file_bytes: int = 500 * 1024 * 1024 + scan_timeout_seconds: float = 10.0 + + +@dataclass +class ZipDownloadPreflightState: + item_count: int = 0 + total_input_bytes: int = 0 + + +ZIP_DOWNLOAD_PREFLIGHT_LIMITS = ZipDownloadPreflightLimits() + + class FileOpsService: - def __init__(self, path_guard: PathGuard, filesystem: FilesystemAdapter, history_repository: HistoryRepository | None = None): + def __init__( + self, + path_guard: PathGuard, + filesystem: FilesystemAdapter, + history_repository: HistoryRepository | None = None, + zip_download_preflight_limits: ZipDownloadPreflightLimits = ZIP_DOWNLOAD_PREFLIGHT_LIMITS, + monotonic: Callable[[], float] | None = None, + ): self._path_guard = path_guard self._filesystem = filesystem self._history_repository = history_repository + self._zip_download_preflight_limits = zip_download_preflight_limits + self._monotonic = monotonic or time.monotonic def mkdir(self, parent_path: str, name: str) -> MkdirResponse: try: @@ -673,7 +702,6 @@ class FileOpsService: def _prepare_zip_download(self, resolved_targets: list) -> dict: archive_names: set[str] = set() for resolved_target in resolved_targets: - self._validate_download_target(resolved_target) archive_name = resolved_target.absolute.name if archive_name in archive_names: raise AppError( @@ -682,6 +710,7 @@ class FileOpsService: status_code=400, ) archive_names.add(archive_name) + self._run_zip_download_preflight(resolved_targets) if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir(): download_name = f"{resolved_targets[0].absolute.name}.zip" @@ -705,37 +734,126 @@ class FileOpsService: "content_type": "application/zip", } - def _validate_download_target(self, resolved_target) -> None: + def _run_zip_download_preflight(self, resolved_targets: list) -> None: + started_at = self._monotonic() + state = ZipDownloadPreflightState() + for resolved_target in resolved_targets: + self._ensure_zip_download_preflight_within_timeout(started_at) + self._validate_zip_download_root_target(resolved_target) + if resolved_target.absolute.is_file(): + self._record_zip_download_file( + state=state, + entry_path=resolved_target.absolute, + entry_relative=resolved_target.relative, + ) + continue + self._increment_zip_download_item_count( + state=state, + entry_relative=resolved_target.relative, + ) + self._scan_zip_download_directory( + state=state, + resolved_target=resolved_target, + started_at=started_at, + ) + + def _validate_zip_download_root_target(self, resolved_target) -> None: _, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative) if lexical_source.is_symlink(): - raise AppError( - code="type_conflict", - message="Source must not be a symlink", - status_code=409, + self._raise_zip_download_preflight_error( + reason="symlink_detected", details={"path": resolved_target.relative}, ) - if resolved_target.absolute.is_file(): + if resolved_target.absolute.is_file() or resolved_target.absolute.is_dir(): return - if resolved_target.absolute.is_dir(): - for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False): - root_path = Path(root) - for name in [*dirnames, *filenames]: - entry = root_path / name - if entry.is_symlink(): - raise AppError( - code="type_conflict", - message="Source directory must not contain symlinks", - status_code=409, - details={"path": resolved_target.relative}, - ) - return - raise AppError( - code="type_conflict", - message="Unsupported path type for download", - status_code=409, + self._raise_zip_download_preflight_error( + reason="unsupported_path_type", details={"path": resolved_target.relative}, ) + def _scan_zip_download_directory(self, state: ZipDownloadPreflightState, resolved_target, started_at: float) -> None: + for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False): + root_path = Path(root) + dirnames.sort() + filenames.sort() + for name in [*dirnames, *filenames]: + self._ensure_zip_download_preflight_within_timeout(started_at) + entry_path = root_path / name + relative_suffix = entry_path.relative_to(resolved_target.absolute).as_posix() + entry_relative = self._join_relative(resolved_target.relative, relative_suffix) + if entry_path.is_symlink(): + self._raise_zip_download_preflight_error( + reason="symlink_detected", + details={"path": entry_relative}, + ) + if entry_path.is_dir(): + self._increment_zip_download_item_count(state=state, entry_relative=entry_relative) + continue + self._record_zip_download_file( + state=state, + entry_path=entry_path, + entry_relative=entry_relative, + ) + + def _record_zip_download_file( + self, + *, + state: ZipDownloadPreflightState, + entry_path: Path, + entry_relative: str, + ) -> None: + self._increment_zip_download_item_count(state=state, entry_relative=entry_relative) + file_size = int(entry_path.stat().st_size) + if file_size > self._zip_download_preflight_limits.max_individual_file_bytes: + self._raise_zip_download_preflight_error( + reason="max_individual_file_size_exceeded", + details={ + "path": entry_relative, + "limit_bytes": str(self._zip_download_preflight_limits.max_individual_file_bytes), + "actual_bytes": str(file_size), + }, + ) + state.total_input_bytes += file_size + if state.total_input_bytes > self._zip_download_preflight_limits.max_total_input_bytes: + self._raise_zip_download_preflight_error( + reason="max_total_input_bytes_exceeded", + details={ + "limit_bytes": str(self._zip_download_preflight_limits.max_total_input_bytes), + "actual_bytes": str(state.total_input_bytes), + }, + ) + + def _increment_zip_download_item_count(self, *, state: ZipDownloadPreflightState, entry_relative: str) -> None: + state.item_count += 1 + if state.item_count > self._zip_download_preflight_limits.max_items: + self._raise_zip_download_preflight_error( + reason="max_items_exceeded", + details={ + "path": entry_relative, + "limit": str(self._zip_download_preflight_limits.max_items), + "actual": str(state.item_count), + }, + ) + + def _ensure_zip_download_preflight_within_timeout(self, started_at: float) -> None: + elapsed = self._monotonic() - started_at + if elapsed > self._zip_download_preflight_limits.scan_timeout_seconds: + self._raise_zip_download_preflight_error( + reason="preflight_timeout", + details={ + "timeout_seconds": str(self._zip_download_preflight_limits.scan_timeout_seconds), + }, + ) + + @staticmethod + def _raise_zip_download_preflight_error(reason: str, details: dict[str, str]) -> None: + raise AppError( + code="download_preflight_failed", + message="Zip download preflight failed", + status_code=409, + details={"reason": reason, **details}, + ) + def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None: root_name = resolved_target.absolute.name if resolved_target.absolute.is_file(): diff --git a/webui/backend/tests/golden/__pycache__/test_api_download_golden.cpython-313.pyc b/webui/backend/tests/golden/__pycache__/test_api_download_golden.cpython-313.pyc index 82594ab..d6c6b74 100644 Binary files a/webui/backend/tests/golden/__pycache__/test_api_download_golden.cpython-313.pyc and b/webui/backend/tests/golden/__pycache__/test_api_download_golden.cpython-313.pyc differ diff --git a/webui/backend/tests/golden/test_api_download_golden.py b/webui/backend/tests/golden/test_api_download_golden.py index 6e749ea..8a0159f 100644 --- a/webui/backend/tests/golden/test_api_download_golden.py +++ b/webui/backend/tests/golden/test_api_download_golden.py @@ -16,7 +16,7 @@ from backend.app.dependencies import get_file_ops_service from backend.app.fs.filesystem_adapter import FilesystemAdapter from backend.app.main import app from backend.app.security.path_guard import PathGuard -from backend.app.services.file_ops_service import FileOpsService +from backend.app.services.file_ops_service import FileOpsService, ZipDownloadPreflightLimits class DownloadApiGoldenTest(unittest.TestCase): @@ -24,13 +24,9 @@ class DownloadApiGoldenTest(unittest.TestCase): self.temp_dir = tempfile.TemporaryDirectory() self.root = Path(self.temp_dir.name) / "root" self.root.mkdir(parents=True, exist_ok=True) - path_guard = PathGuard({"storage1": str(self.root), "storage2": str(self.root)}) - service = FileOpsService(path_guard=path_guard, filesystem=FilesystemAdapter()) - - async def _override_file_ops_service() -> FileOpsService: - return service - - app.dependency_overrides[get_file_ops_service] = _override_file_ops_service + self.path_guard = PathGuard({"storage1": str(self.root), "storage2": str(self.root)}) + self.filesystem = FilesystemAdapter() + self._override_service() def tearDown(self) -> None: app.dependency_overrides.clear() @@ -44,6 +40,24 @@ class DownloadApiGoldenTest(unittest.TestCase): return asyncio.run(_run()) + def _override_service( + self, + *, + limits: ZipDownloadPreflightLimits | None = None, + monotonic=None, + ) -> None: + service = FileOpsService( + path_guard=self.path_guard, + filesystem=self.filesystem, + zip_download_preflight_limits=limits or ZipDownloadPreflightLimits(), + monotonic=monotonic, + ) + + async def _override_file_ops_service() -> FileOpsService: + return service + + app.dependency_overrides[get_file_ops_service] = _override_file_ops_service + def test_download_success_for_allowed_file(self) -> None: src = self.root / "report.txt" src.write_text("hello download", encoding="utf-8") @@ -55,7 +69,7 @@ class DownloadApiGoldenTest(unittest.TestCase): self.assertIn('attachment; filename="report.txt"', response.headers.get("content-disposition", "")) self.assertEqual(response.headers.get("content-type"), "text/plain; charset=utf-8") - def test_download_directory_type_conflict(self) -> None: + def test_download_single_directory_as_zip(self) -> None: (self.root / "docs").mkdir() (self.root / "docs" / "a.txt").write_text("a", encoding="utf-8") @@ -121,6 +135,64 @@ class DownloadApiGoldenTest(unittest.TestCase): self.assertIn("photos/", archive.namelist()) self.assertIn("photos/nested/img.txt", archive.namelist()) + def test_download_zip_rejected_when_max_items_exceeded(self) -> None: + (self.root / "docs").mkdir() + (self.root / "docs" / "a.txt").write_text("A", encoding="utf-8") + (self.root / "docs" / "b.txt").write_text("B", encoding="utf-8") + (self.root / "docs" / "c.txt").write_text("C", encoding="utf-8") + self._override_service( + limits=ZipDownloadPreflightLimits( + max_items=3, + max_total_input_bytes=1024, + max_individual_file_bytes=1024, + scan_timeout_seconds=10.0, + ) + ) + + response = self._get("/api/files/download?path=storage1/docs") + + self.assertEqual(response.status_code, 409) + self.assertEqual(response.json()["error"]["code"], "download_preflight_failed") + self.assertEqual(response.json()["error"]["message"], "Zip download preflight failed") + self.assertEqual(response.json()["error"]["details"]["reason"], "max_items_exceeded") + + def test_download_zip_rejected_when_max_total_input_bytes_exceeded(self) -> None: + (self.root / "a.txt").write_text("AAAA", encoding="utf-8") + (self.root / "b.txt").write_text("BBBB", encoding="utf-8") + self._override_service( + limits=ZipDownloadPreflightLimits( + max_items=10, + max_total_input_bytes=7, + max_individual_file_bytes=1024, + scan_timeout_seconds=10.0, + ) + ) + + response = self._get("/api/files/download?path=storage1/a.txt&path=storage1/b.txt") + + self.assertEqual(response.status_code, 409) + self.assertEqual(response.json()["error"]["code"], "download_preflight_failed") + self.assertEqual(response.json()["error"]["details"]["reason"], "max_total_input_bytes_exceeded") + + def test_download_zip_rejected_when_individual_file_too_large(self) -> None: + (self.root / "docs").mkdir() + (self.root / "docs" / "large.bin").write_bytes(b"123456") + self._override_service( + limits=ZipDownloadPreflightLimits( + max_items=10, + max_total_input_bytes=1024, + max_individual_file_bytes=5, + scan_timeout_seconds=10.0, + ) + ) + + response = self._get("/api/files/download?path=storage1/docs") + + self.assertEqual(response.status_code, 409) + self.assertEqual(response.json()["error"]["code"], "download_preflight_failed") + self.assertEqual(response.json()["error"]["details"]["reason"], "max_individual_file_size_exceeded") + self.assertEqual(response.json()["error"]["details"]["path"], "storage1/docs/large.bin") + def test_download_directory_with_symlink_rejected(self) -> None: target = self.root / "real.txt" target.write_text("x", encoding="utf-8") @@ -130,7 +202,29 @@ class DownloadApiGoldenTest(unittest.TestCase): response = self._get("/api/files/download?path=storage1/docs") self.assertEqual(response.status_code, 409) - self.assertEqual(response.json()["error"]["code"], "type_conflict") + self.assertEqual(response.json()["error"]["code"], "download_preflight_failed") + self.assertEqual(response.json()["error"]["details"]["reason"], "symlink_detected") + self.assertEqual(response.json()["error"]["details"]["path"], "storage1/docs/link.txt") + + def test_download_zip_preflight_timeout_rejected_cleanly(self) -> None: + (self.root / "a.txt").write_text("A", encoding="utf-8") + (self.root / "b.txt").write_text("B", encoding="utf-8") + ticks = iter([0.0, 11.0, 11.0, 11.0]) + self._override_service( + limits=ZipDownloadPreflightLimits( + max_items=10, + max_total_input_bytes=1024, + max_individual_file_bytes=1024, + scan_timeout_seconds=10.0, + ), + monotonic=lambda: next(ticks), + ) + + response = self._get("/api/files/download?path=storage1/a.txt&path=storage1/b.txt") + + self.assertEqual(response.status_code, 409) + self.assertEqual(response.json()["error"]["code"], "download_preflight_failed") + self.assertEqual(response.json()["error"]["details"]["reason"], "preflight_timeout") def test_download_path_not_found(self) -> None: response = self._get("/api/files/download?path=storage1/missing.txt")