feat: download - download safeguard

This commit is contained in:
kodi
2026-03-14 13:24:17 +01:00
parent 7e7c2f3958
commit ea337338e3
4 changed files with 248 additions and 36 deletions
+139 -21
View File
@@ -1,10 +1,13 @@
from __future__ import annotations from __future__ import annotations
import os import os
from io import BytesIO import time
import zipfile import zipfile
from dataclasses import dataclass
from datetime import datetime, timezone from datetime import datetime, timezone
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Callable
from backend.app.api.errors import AppError from backend.app.api.errors import AppError
from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse
@@ -54,11 +57,37 @@ PDF_CONTENT_TYPES = {
} }
@dataclass(frozen=True)
class ZipDownloadPreflightLimits:
max_items: int = 1000
max_total_input_bytes: int = 2 * 1024 * 1024 * 1024
max_individual_file_bytes: int = 500 * 1024 * 1024
scan_timeout_seconds: float = 10.0
@dataclass
class ZipDownloadPreflightState:
item_count: int = 0
total_input_bytes: int = 0
ZIP_DOWNLOAD_PREFLIGHT_LIMITS = ZipDownloadPreflightLimits()
class FileOpsService: class FileOpsService:
def __init__(self, path_guard: PathGuard, filesystem: FilesystemAdapter, history_repository: HistoryRepository | None = None): def __init__(
self,
path_guard: PathGuard,
filesystem: FilesystemAdapter,
history_repository: HistoryRepository | None = None,
zip_download_preflight_limits: ZipDownloadPreflightLimits = ZIP_DOWNLOAD_PREFLIGHT_LIMITS,
monotonic: Callable[[], float] | None = None,
):
self._path_guard = path_guard self._path_guard = path_guard
self._filesystem = filesystem self._filesystem = filesystem
self._history_repository = history_repository self._history_repository = history_repository
self._zip_download_preflight_limits = zip_download_preflight_limits
self._monotonic = monotonic or time.monotonic
def mkdir(self, parent_path: str, name: str) -> MkdirResponse: def mkdir(self, parent_path: str, name: str) -> MkdirResponse:
try: try:
@@ -673,7 +702,6 @@ class FileOpsService:
def _prepare_zip_download(self, resolved_targets: list) -> dict: def _prepare_zip_download(self, resolved_targets: list) -> dict:
archive_names: set[str] = set() archive_names: set[str] = set()
for resolved_target in resolved_targets: for resolved_target in resolved_targets:
self._validate_download_target(resolved_target)
archive_name = resolved_target.absolute.name archive_name = resolved_target.absolute.name
if archive_name in archive_names: if archive_name in archive_names:
raise AppError( raise AppError(
@@ -682,6 +710,7 @@ class FileOpsService:
status_code=400, status_code=400,
) )
archive_names.add(archive_name) archive_names.add(archive_name)
self._run_zip_download_preflight(resolved_targets)
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir(): if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir():
download_name = f"{resolved_targets[0].absolute.name}.zip" download_name = f"{resolved_targets[0].absolute.name}.zip"
@@ -705,35 +734,124 @@ class FileOpsService:
"content_type": "application/zip", "content_type": "application/zip",
} }
def _validate_download_target(self, resolved_target) -> None: def _run_zip_download_preflight(self, resolved_targets: list) -> None:
started_at = self._monotonic()
state = ZipDownloadPreflightState()
for resolved_target in resolved_targets:
self._ensure_zip_download_preflight_within_timeout(started_at)
self._validate_zip_download_root_target(resolved_target)
if resolved_target.absolute.is_file():
self._record_zip_download_file(
state=state,
entry_path=resolved_target.absolute,
entry_relative=resolved_target.relative,
)
continue
self._increment_zip_download_item_count(
state=state,
entry_relative=resolved_target.relative,
)
self._scan_zip_download_directory(
state=state,
resolved_target=resolved_target,
started_at=started_at,
)
def _validate_zip_download_root_target(self, resolved_target) -> None:
_, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative) _, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative)
if lexical_source.is_symlink(): if lexical_source.is_symlink():
raise AppError( self._raise_zip_download_preflight_error(
code="type_conflict", reason="symlink_detected",
message="Source must not be a symlink",
status_code=409,
details={"path": resolved_target.relative}, details={"path": resolved_target.relative},
) )
if resolved_target.absolute.is_file(): if resolved_target.absolute.is_file() or resolved_target.absolute.is_dir():
return return
if resolved_target.absolute.is_dir(): self._raise_zip_download_preflight_error(
reason="unsupported_path_type",
details={"path": resolved_target.relative},
)
def _scan_zip_download_directory(self, state: ZipDownloadPreflightState, resolved_target, started_at: float) -> None:
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False): for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
root_path = Path(root) root_path = Path(root)
dirnames.sort()
filenames.sort()
for name in [*dirnames, *filenames]: for name in [*dirnames, *filenames]:
entry = root_path / name self._ensure_zip_download_preflight_within_timeout(started_at)
if entry.is_symlink(): entry_path = root_path / name
raise AppError( relative_suffix = entry_path.relative_to(resolved_target.absolute).as_posix()
code="type_conflict", entry_relative = self._join_relative(resolved_target.relative, relative_suffix)
message="Source directory must not contain symlinks", if entry_path.is_symlink():
status_code=409, self._raise_zip_download_preflight_error(
details={"path": resolved_target.relative}, reason="symlink_detected",
details={"path": entry_relative},
) )
return if entry_path.is_dir():
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
continue
self._record_zip_download_file(
state=state,
entry_path=entry_path,
entry_relative=entry_relative,
)
def _record_zip_download_file(
self,
*,
state: ZipDownloadPreflightState,
entry_path: Path,
entry_relative: str,
) -> None:
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
file_size = int(entry_path.stat().st_size)
if file_size > self._zip_download_preflight_limits.max_individual_file_bytes:
self._raise_zip_download_preflight_error(
reason="max_individual_file_size_exceeded",
details={
"path": entry_relative,
"limit_bytes": str(self._zip_download_preflight_limits.max_individual_file_bytes),
"actual_bytes": str(file_size),
},
)
state.total_input_bytes += file_size
if state.total_input_bytes > self._zip_download_preflight_limits.max_total_input_bytes:
self._raise_zip_download_preflight_error(
reason="max_total_input_bytes_exceeded",
details={
"limit_bytes": str(self._zip_download_preflight_limits.max_total_input_bytes),
"actual_bytes": str(state.total_input_bytes),
},
)
def _increment_zip_download_item_count(self, *, state: ZipDownloadPreflightState, entry_relative: str) -> None:
state.item_count += 1
if state.item_count > self._zip_download_preflight_limits.max_items:
self._raise_zip_download_preflight_error(
reason="max_items_exceeded",
details={
"path": entry_relative,
"limit": str(self._zip_download_preflight_limits.max_items),
"actual": str(state.item_count),
},
)
def _ensure_zip_download_preflight_within_timeout(self, started_at: float) -> None:
elapsed = self._monotonic() - started_at
if elapsed > self._zip_download_preflight_limits.scan_timeout_seconds:
self._raise_zip_download_preflight_error(
reason="preflight_timeout",
details={
"timeout_seconds": str(self._zip_download_preflight_limits.scan_timeout_seconds),
},
)
@staticmethod
def _raise_zip_download_preflight_error(reason: str, details: dict[str, str]) -> None:
raise AppError( raise AppError(
code="type_conflict", code="download_preflight_failed",
message="Unsupported path type for download", message="Zip download preflight failed",
status_code=409, status_code=409,
details={"path": resolved_target.relative}, details={"reason": reason, **details},
) )
def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None: def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None:
@@ -16,7 +16,7 @@ from backend.app.dependencies import get_file_ops_service
from backend.app.fs.filesystem_adapter import FilesystemAdapter from backend.app.fs.filesystem_adapter import FilesystemAdapter
from backend.app.main import app from backend.app.main import app
from backend.app.security.path_guard import PathGuard from backend.app.security.path_guard import PathGuard
from backend.app.services.file_ops_service import FileOpsService from backend.app.services.file_ops_service import FileOpsService, ZipDownloadPreflightLimits
class DownloadApiGoldenTest(unittest.TestCase): class DownloadApiGoldenTest(unittest.TestCase):
@@ -24,13 +24,9 @@ class DownloadApiGoldenTest(unittest.TestCase):
self.temp_dir = tempfile.TemporaryDirectory() self.temp_dir = tempfile.TemporaryDirectory()
self.root = Path(self.temp_dir.name) / "root" self.root = Path(self.temp_dir.name) / "root"
self.root.mkdir(parents=True, exist_ok=True) self.root.mkdir(parents=True, exist_ok=True)
path_guard = PathGuard({"storage1": str(self.root), "storage2": str(self.root)}) self.path_guard = PathGuard({"storage1": str(self.root), "storage2": str(self.root)})
service = FileOpsService(path_guard=path_guard, filesystem=FilesystemAdapter()) self.filesystem = FilesystemAdapter()
self._override_service()
async def _override_file_ops_service() -> FileOpsService:
return service
app.dependency_overrides[get_file_ops_service] = _override_file_ops_service
def tearDown(self) -> None: def tearDown(self) -> None:
app.dependency_overrides.clear() app.dependency_overrides.clear()
@@ -44,6 +40,24 @@ class DownloadApiGoldenTest(unittest.TestCase):
return asyncio.run(_run()) return asyncio.run(_run())
def _override_service(
self,
*,
limits: ZipDownloadPreflightLimits | None = None,
monotonic=None,
) -> None:
service = FileOpsService(
path_guard=self.path_guard,
filesystem=self.filesystem,
zip_download_preflight_limits=limits or ZipDownloadPreflightLimits(),
monotonic=monotonic,
)
async def _override_file_ops_service() -> FileOpsService:
return service
app.dependency_overrides[get_file_ops_service] = _override_file_ops_service
def test_download_success_for_allowed_file(self) -> None: def test_download_success_for_allowed_file(self) -> None:
src = self.root / "report.txt" src = self.root / "report.txt"
src.write_text("hello download", encoding="utf-8") src.write_text("hello download", encoding="utf-8")
@@ -55,7 +69,7 @@ class DownloadApiGoldenTest(unittest.TestCase):
self.assertIn('attachment; filename="report.txt"', response.headers.get("content-disposition", "")) self.assertIn('attachment; filename="report.txt"', response.headers.get("content-disposition", ""))
self.assertEqual(response.headers.get("content-type"), "text/plain; charset=utf-8") self.assertEqual(response.headers.get("content-type"), "text/plain; charset=utf-8")
def test_download_directory_type_conflict(self) -> None: def test_download_single_directory_as_zip(self) -> None:
(self.root / "docs").mkdir() (self.root / "docs").mkdir()
(self.root / "docs" / "a.txt").write_text("a", encoding="utf-8") (self.root / "docs" / "a.txt").write_text("a", encoding="utf-8")
@@ -121,6 +135,64 @@ class DownloadApiGoldenTest(unittest.TestCase):
self.assertIn("photos/", archive.namelist()) self.assertIn("photos/", archive.namelist())
self.assertIn("photos/nested/img.txt", archive.namelist()) self.assertIn("photos/nested/img.txt", archive.namelist())
def test_download_zip_rejected_when_max_items_exceeded(self) -> None:
(self.root / "docs").mkdir()
(self.root / "docs" / "a.txt").write_text("A", encoding="utf-8")
(self.root / "docs" / "b.txt").write_text("B", encoding="utf-8")
(self.root / "docs" / "c.txt").write_text("C", encoding="utf-8")
self._override_service(
limits=ZipDownloadPreflightLimits(
max_items=3,
max_total_input_bytes=1024,
max_individual_file_bytes=1024,
scan_timeout_seconds=10.0,
)
)
response = self._get("/api/files/download?path=storage1/docs")
self.assertEqual(response.status_code, 409)
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
self.assertEqual(response.json()["error"]["message"], "Zip download preflight failed")
self.assertEqual(response.json()["error"]["details"]["reason"], "max_items_exceeded")
def test_download_zip_rejected_when_max_total_input_bytes_exceeded(self) -> None:
(self.root / "a.txt").write_text("AAAA", encoding="utf-8")
(self.root / "b.txt").write_text("BBBB", encoding="utf-8")
self._override_service(
limits=ZipDownloadPreflightLimits(
max_items=10,
max_total_input_bytes=7,
max_individual_file_bytes=1024,
scan_timeout_seconds=10.0,
)
)
response = self._get("/api/files/download?path=storage1/a.txt&path=storage1/b.txt")
self.assertEqual(response.status_code, 409)
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
self.assertEqual(response.json()["error"]["details"]["reason"], "max_total_input_bytes_exceeded")
def test_download_zip_rejected_when_individual_file_too_large(self) -> None:
(self.root / "docs").mkdir()
(self.root / "docs" / "large.bin").write_bytes(b"123456")
self._override_service(
limits=ZipDownloadPreflightLimits(
max_items=10,
max_total_input_bytes=1024,
max_individual_file_bytes=5,
scan_timeout_seconds=10.0,
)
)
response = self._get("/api/files/download?path=storage1/docs")
self.assertEqual(response.status_code, 409)
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
self.assertEqual(response.json()["error"]["details"]["reason"], "max_individual_file_size_exceeded")
self.assertEqual(response.json()["error"]["details"]["path"], "storage1/docs/large.bin")
def test_download_directory_with_symlink_rejected(self) -> None: def test_download_directory_with_symlink_rejected(self) -> None:
target = self.root / "real.txt" target = self.root / "real.txt"
target.write_text("x", encoding="utf-8") target.write_text("x", encoding="utf-8")
@@ -130,7 +202,29 @@ class DownloadApiGoldenTest(unittest.TestCase):
response = self._get("/api/files/download?path=storage1/docs") response = self._get("/api/files/download?path=storage1/docs")
self.assertEqual(response.status_code, 409) self.assertEqual(response.status_code, 409)
self.assertEqual(response.json()["error"]["code"], "type_conflict") self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
self.assertEqual(response.json()["error"]["details"]["reason"], "symlink_detected")
self.assertEqual(response.json()["error"]["details"]["path"], "storage1/docs/link.txt")
def test_download_zip_preflight_timeout_rejected_cleanly(self) -> None:
(self.root / "a.txt").write_text("A", encoding="utf-8")
(self.root / "b.txt").write_text("B", encoding="utf-8")
ticks = iter([0.0, 11.0, 11.0, 11.0])
self._override_service(
limits=ZipDownloadPreflightLimits(
max_items=10,
max_total_input_bytes=1024,
max_individual_file_bytes=1024,
scan_timeout_seconds=10.0,
),
monotonic=lambda: next(ticks),
)
response = self._get("/api/files/download?path=storage1/a.txt&path=storage1/b.txt")
self.assertEqual(response.status_code, 409)
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
self.assertEqual(response.json()["error"]["details"]["reason"], "preflight_timeout")
def test_download_path_not_found(self) -> None: def test_download_path_not_found(self) -> None:
response = self._get("/api/files/download?path=storage1/missing.txt") response = self._get("/api/files/download?path=storage1/missing.txt")