feat: download - download safeguard
This commit is contained in:
Binary file not shown.
@@ -1,10 +1,13 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from io import BytesIO
|
import time
|
||||||
import zipfile
|
import zipfile
|
||||||
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
from backend.app.api.errors import AppError
|
from backend.app.api.errors import AppError
|
||||||
from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse
|
from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse
|
||||||
@@ -54,11 +57,37 @@ PDF_CONTENT_TYPES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ZipDownloadPreflightLimits:
|
||||||
|
max_items: int = 1000
|
||||||
|
max_total_input_bytes: int = 2 * 1024 * 1024 * 1024
|
||||||
|
max_individual_file_bytes: int = 500 * 1024 * 1024
|
||||||
|
scan_timeout_seconds: float = 10.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ZipDownloadPreflightState:
|
||||||
|
item_count: int = 0
|
||||||
|
total_input_bytes: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
ZIP_DOWNLOAD_PREFLIGHT_LIMITS = ZipDownloadPreflightLimits()
|
||||||
|
|
||||||
|
|
||||||
class FileOpsService:
|
class FileOpsService:
|
||||||
def __init__(self, path_guard: PathGuard, filesystem: FilesystemAdapter, history_repository: HistoryRepository | None = None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
path_guard: PathGuard,
|
||||||
|
filesystem: FilesystemAdapter,
|
||||||
|
history_repository: HistoryRepository | None = None,
|
||||||
|
zip_download_preflight_limits: ZipDownloadPreflightLimits = ZIP_DOWNLOAD_PREFLIGHT_LIMITS,
|
||||||
|
monotonic: Callable[[], float] | None = None,
|
||||||
|
):
|
||||||
self._path_guard = path_guard
|
self._path_guard = path_guard
|
||||||
self._filesystem = filesystem
|
self._filesystem = filesystem
|
||||||
self._history_repository = history_repository
|
self._history_repository = history_repository
|
||||||
|
self._zip_download_preflight_limits = zip_download_preflight_limits
|
||||||
|
self._monotonic = monotonic or time.monotonic
|
||||||
|
|
||||||
def mkdir(self, parent_path: str, name: str) -> MkdirResponse:
|
def mkdir(self, parent_path: str, name: str) -> MkdirResponse:
|
||||||
try:
|
try:
|
||||||
@@ -673,7 +702,6 @@ class FileOpsService:
|
|||||||
def _prepare_zip_download(self, resolved_targets: list) -> dict:
|
def _prepare_zip_download(self, resolved_targets: list) -> dict:
|
||||||
archive_names: set[str] = set()
|
archive_names: set[str] = set()
|
||||||
for resolved_target in resolved_targets:
|
for resolved_target in resolved_targets:
|
||||||
self._validate_download_target(resolved_target)
|
|
||||||
archive_name = resolved_target.absolute.name
|
archive_name = resolved_target.absolute.name
|
||||||
if archive_name in archive_names:
|
if archive_name in archive_names:
|
||||||
raise AppError(
|
raise AppError(
|
||||||
@@ -682,6 +710,7 @@ class FileOpsService:
|
|||||||
status_code=400,
|
status_code=400,
|
||||||
)
|
)
|
||||||
archive_names.add(archive_name)
|
archive_names.add(archive_name)
|
||||||
|
self._run_zip_download_preflight(resolved_targets)
|
||||||
|
|
||||||
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir():
|
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir():
|
||||||
download_name = f"{resolved_targets[0].absolute.name}.zip"
|
download_name = f"{resolved_targets[0].absolute.name}.zip"
|
||||||
@@ -705,37 +734,126 @@ class FileOpsService:
|
|||||||
"content_type": "application/zip",
|
"content_type": "application/zip",
|
||||||
}
|
}
|
||||||
|
|
||||||
def _validate_download_target(self, resolved_target) -> None:
|
def _run_zip_download_preflight(self, resolved_targets: list) -> None:
|
||||||
|
started_at = self._monotonic()
|
||||||
|
state = ZipDownloadPreflightState()
|
||||||
|
for resolved_target in resolved_targets:
|
||||||
|
self._ensure_zip_download_preflight_within_timeout(started_at)
|
||||||
|
self._validate_zip_download_root_target(resolved_target)
|
||||||
|
if resolved_target.absolute.is_file():
|
||||||
|
self._record_zip_download_file(
|
||||||
|
state=state,
|
||||||
|
entry_path=resolved_target.absolute,
|
||||||
|
entry_relative=resolved_target.relative,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
self._increment_zip_download_item_count(
|
||||||
|
state=state,
|
||||||
|
entry_relative=resolved_target.relative,
|
||||||
|
)
|
||||||
|
self._scan_zip_download_directory(
|
||||||
|
state=state,
|
||||||
|
resolved_target=resolved_target,
|
||||||
|
started_at=started_at,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _validate_zip_download_root_target(self, resolved_target) -> None:
|
||||||
_, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative)
|
_, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative)
|
||||||
if lexical_source.is_symlink():
|
if lexical_source.is_symlink():
|
||||||
raise AppError(
|
self._raise_zip_download_preflight_error(
|
||||||
code="type_conflict",
|
reason="symlink_detected",
|
||||||
message="Source must not be a symlink",
|
|
||||||
status_code=409,
|
|
||||||
details={"path": resolved_target.relative},
|
details={"path": resolved_target.relative},
|
||||||
)
|
)
|
||||||
if resolved_target.absolute.is_file():
|
if resolved_target.absolute.is_file() or resolved_target.absolute.is_dir():
|
||||||
return
|
return
|
||||||
if resolved_target.absolute.is_dir():
|
self._raise_zip_download_preflight_error(
|
||||||
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
|
reason="unsupported_path_type",
|
||||||
root_path = Path(root)
|
|
||||||
for name in [*dirnames, *filenames]:
|
|
||||||
entry = root_path / name
|
|
||||||
if entry.is_symlink():
|
|
||||||
raise AppError(
|
|
||||||
code="type_conflict",
|
|
||||||
message="Source directory must not contain symlinks",
|
|
||||||
status_code=409,
|
|
||||||
details={"path": resolved_target.relative},
|
|
||||||
)
|
|
||||||
return
|
|
||||||
raise AppError(
|
|
||||||
code="type_conflict",
|
|
||||||
message="Unsupported path type for download",
|
|
||||||
status_code=409,
|
|
||||||
details={"path": resolved_target.relative},
|
details={"path": resolved_target.relative},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _scan_zip_download_directory(self, state: ZipDownloadPreflightState, resolved_target, started_at: float) -> None:
|
||||||
|
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
|
||||||
|
root_path = Path(root)
|
||||||
|
dirnames.sort()
|
||||||
|
filenames.sort()
|
||||||
|
for name in [*dirnames, *filenames]:
|
||||||
|
self._ensure_zip_download_preflight_within_timeout(started_at)
|
||||||
|
entry_path = root_path / name
|
||||||
|
relative_suffix = entry_path.relative_to(resolved_target.absolute).as_posix()
|
||||||
|
entry_relative = self._join_relative(resolved_target.relative, relative_suffix)
|
||||||
|
if entry_path.is_symlink():
|
||||||
|
self._raise_zip_download_preflight_error(
|
||||||
|
reason="symlink_detected",
|
||||||
|
details={"path": entry_relative},
|
||||||
|
)
|
||||||
|
if entry_path.is_dir():
|
||||||
|
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
|
||||||
|
continue
|
||||||
|
self._record_zip_download_file(
|
||||||
|
state=state,
|
||||||
|
entry_path=entry_path,
|
||||||
|
entry_relative=entry_relative,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _record_zip_download_file(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
state: ZipDownloadPreflightState,
|
||||||
|
entry_path: Path,
|
||||||
|
entry_relative: str,
|
||||||
|
) -> None:
|
||||||
|
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
|
||||||
|
file_size = int(entry_path.stat().st_size)
|
||||||
|
if file_size > self._zip_download_preflight_limits.max_individual_file_bytes:
|
||||||
|
self._raise_zip_download_preflight_error(
|
||||||
|
reason="max_individual_file_size_exceeded",
|
||||||
|
details={
|
||||||
|
"path": entry_relative,
|
||||||
|
"limit_bytes": str(self._zip_download_preflight_limits.max_individual_file_bytes),
|
||||||
|
"actual_bytes": str(file_size),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
state.total_input_bytes += file_size
|
||||||
|
if state.total_input_bytes > self._zip_download_preflight_limits.max_total_input_bytes:
|
||||||
|
self._raise_zip_download_preflight_error(
|
||||||
|
reason="max_total_input_bytes_exceeded",
|
||||||
|
details={
|
||||||
|
"limit_bytes": str(self._zip_download_preflight_limits.max_total_input_bytes),
|
||||||
|
"actual_bytes": str(state.total_input_bytes),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _increment_zip_download_item_count(self, *, state: ZipDownloadPreflightState, entry_relative: str) -> None:
|
||||||
|
state.item_count += 1
|
||||||
|
if state.item_count > self._zip_download_preflight_limits.max_items:
|
||||||
|
self._raise_zip_download_preflight_error(
|
||||||
|
reason="max_items_exceeded",
|
||||||
|
details={
|
||||||
|
"path": entry_relative,
|
||||||
|
"limit": str(self._zip_download_preflight_limits.max_items),
|
||||||
|
"actual": str(state.item_count),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _ensure_zip_download_preflight_within_timeout(self, started_at: float) -> None:
|
||||||
|
elapsed = self._monotonic() - started_at
|
||||||
|
if elapsed > self._zip_download_preflight_limits.scan_timeout_seconds:
|
||||||
|
self._raise_zip_download_preflight_error(
|
||||||
|
reason="preflight_timeout",
|
||||||
|
details={
|
||||||
|
"timeout_seconds": str(self._zip_download_preflight_limits.scan_timeout_seconds),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _raise_zip_download_preflight_error(reason: str, details: dict[str, str]) -> None:
|
||||||
|
raise AppError(
|
||||||
|
code="download_preflight_failed",
|
||||||
|
message="Zip download preflight failed",
|
||||||
|
status_code=409,
|
||||||
|
details={"reason": reason, **details},
|
||||||
|
)
|
||||||
|
|
||||||
def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None:
|
def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None:
|
||||||
root_name = resolved_target.absolute.name
|
root_name = resolved_target.absolute.name
|
||||||
if resolved_target.absolute.is_file():
|
if resolved_target.absolute.is_file():
|
||||||
|
|||||||
Binary file not shown.
@@ -16,7 +16,7 @@ from backend.app.dependencies import get_file_ops_service
|
|||||||
from backend.app.fs.filesystem_adapter import FilesystemAdapter
|
from backend.app.fs.filesystem_adapter import FilesystemAdapter
|
||||||
from backend.app.main import app
|
from backend.app.main import app
|
||||||
from backend.app.security.path_guard import PathGuard
|
from backend.app.security.path_guard import PathGuard
|
||||||
from backend.app.services.file_ops_service import FileOpsService
|
from backend.app.services.file_ops_service import FileOpsService, ZipDownloadPreflightLimits
|
||||||
|
|
||||||
|
|
||||||
class DownloadApiGoldenTest(unittest.TestCase):
|
class DownloadApiGoldenTest(unittest.TestCase):
|
||||||
@@ -24,13 +24,9 @@ class DownloadApiGoldenTest(unittest.TestCase):
|
|||||||
self.temp_dir = tempfile.TemporaryDirectory()
|
self.temp_dir = tempfile.TemporaryDirectory()
|
||||||
self.root = Path(self.temp_dir.name) / "root"
|
self.root = Path(self.temp_dir.name) / "root"
|
||||||
self.root.mkdir(parents=True, exist_ok=True)
|
self.root.mkdir(parents=True, exist_ok=True)
|
||||||
path_guard = PathGuard({"storage1": str(self.root), "storage2": str(self.root)})
|
self.path_guard = PathGuard({"storage1": str(self.root), "storage2": str(self.root)})
|
||||||
service = FileOpsService(path_guard=path_guard, filesystem=FilesystemAdapter())
|
self.filesystem = FilesystemAdapter()
|
||||||
|
self._override_service()
|
||||||
async def _override_file_ops_service() -> FileOpsService:
|
|
||||||
return service
|
|
||||||
|
|
||||||
app.dependency_overrides[get_file_ops_service] = _override_file_ops_service
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
app.dependency_overrides.clear()
|
app.dependency_overrides.clear()
|
||||||
@@ -44,6 +40,24 @@ class DownloadApiGoldenTest(unittest.TestCase):
|
|||||||
|
|
||||||
return asyncio.run(_run())
|
return asyncio.run(_run())
|
||||||
|
|
||||||
|
def _override_service(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
limits: ZipDownloadPreflightLimits | None = None,
|
||||||
|
monotonic=None,
|
||||||
|
) -> None:
|
||||||
|
service = FileOpsService(
|
||||||
|
path_guard=self.path_guard,
|
||||||
|
filesystem=self.filesystem,
|
||||||
|
zip_download_preflight_limits=limits or ZipDownloadPreflightLimits(),
|
||||||
|
monotonic=monotonic,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _override_file_ops_service() -> FileOpsService:
|
||||||
|
return service
|
||||||
|
|
||||||
|
app.dependency_overrides[get_file_ops_service] = _override_file_ops_service
|
||||||
|
|
||||||
def test_download_success_for_allowed_file(self) -> None:
|
def test_download_success_for_allowed_file(self) -> None:
|
||||||
src = self.root / "report.txt"
|
src = self.root / "report.txt"
|
||||||
src.write_text("hello download", encoding="utf-8")
|
src.write_text("hello download", encoding="utf-8")
|
||||||
@@ -55,7 +69,7 @@ class DownloadApiGoldenTest(unittest.TestCase):
|
|||||||
self.assertIn('attachment; filename="report.txt"', response.headers.get("content-disposition", ""))
|
self.assertIn('attachment; filename="report.txt"', response.headers.get("content-disposition", ""))
|
||||||
self.assertEqual(response.headers.get("content-type"), "text/plain; charset=utf-8")
|
self.assertEqual(response.headers.get("content-type"), "text/plain; charset=utf-8")
|
||||||
|
|
||||||
def test_download_directory_type_conflict(self) -> None:
|
def test_download_single_directory_as_zip(self) -> None:
|
||||||
(self.root / "docs").mkdir()
|
(self.root / "docs").mkdir()
|
||||||
(self.root / "docs" / "a.txt").write_text("a", encoding="utf-8")
|
(self.root / "docs" / "a.txt").write_text("a", encoding="utf-8")
|
||||||
|
|
||||||
@@ -121,6 +135,64 @@ class DownloadApiGoldenTest(unittest.TestCase):
|
|||||||
self.assertIn("photos/", archive.namelist())
|
self.assertIn("photos/", archive.namelist())
|
||||||
self.assertIn("photos/nested/img.txt", archive.namelist())
|
self.assertIn("photos/nested/img.txt", archive.namelist())
|
||||||
|
|
||||||
|
def test_download_zip_rejected_when_max_items_exceeded(self) -> None:
|
||||||
|
(self.root / "docs").mkdir()
|
||||||
|
(self.root / "docs" / "a.txt").write_text("A", encoding="utf-8")
|
||||||
|
(self.root / "docs" / "b.txt").write_text("B", encoding="utf-8")
|
||||||
|
(self.root / "docs" / "c.txt").write_text("C", encoding="utf-8")
|
||||||
|
self._override_service(
|
||||||
|
limits=ZipDownloadPreflightLimits(
|
||||||
|
max_items=3,
|
||||||
|
max_total_input_bytes=1024,
|
||||||
|
max_individual_file_bytes=1024,
|
||||||
|
scan_timeout_seconds=10.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self._get("/api/files/download?path=storage1/docs")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 409)
|
||||||
|
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
|
||||||
|
self.assertEqual(response.json()["error"]["message"], "Zip download preflight failed")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["reason"], "max_items_exceeded")
|
||||||
|
|
||||||
|
def test_download_zip_rejected_when_max_total_input_bytes_exceeded(self) -> None:
|
||||||
|
(self.root / "a.txt").write_text("AAAA", encoding="utf-8")
|
||||||
|
(self.root / "b.txt").write_text("BBBB", encoding="utf-8")
|
||||||
|
self._override_service(
|
||||||
|
limits=ZipDownloadPreflightLimits(
|
||||||
|
max_items=10,
|
||||||
|
max_total_input_bytes=7,
|
||||||
|
max_individual_file_bytes=1024,
|
||||||
|
scan_timeout_seconds=10.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self._get("/api/files/download?path=storage1/a.txt&path=storage1/b.txt")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 409)
|
||||||
|
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["reason"], "max_total_input_bytes_exceeded")
|
||||||
|
|
||||||
|
def test_download_zip_rejected_when_individual_file_too_large(self) -> None:
|
||||||
|
(self.root / "docs").mkdir()
|
||||||
|
(self.root / "docs" / "large.bin").write_bytes(b"123456")
|
||||||
|
self._override_service(
|
||||||
|
limits=ZipDownloadPreflightLimits(
|
||||||
|
max_items=10,
|
||||||
|
max_total_input_bytes=1024,
|
||||||
|
max_individual_file_bytes=5,
|
||||||
|
scan_timeout_seconds=10.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self._get("/api/files/download?path=storage1/docs")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 409)
|
||||||
|
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["reason"], "max_individual_file_size_exceeded")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["path"], "storage1/docs/large.bin")
|
||||||
|
|
||||||
def test_download_directory_with_symlink_rejected(self) -> None:
|
def test_download_directory_with_symlink_rejected(self) -> None:
|
||||||
target = self.root / "real.txt"
|
target = self.root / "real.txt"
|
||||||
target.write_text("x", encoding="utf-8")
|
target.write_text("x", encoding="utf-8")
|
||||||
@@ -130,7 +202,29 @@ class DownloadApiGoldenTest(unittest.TestCase):
|
|||||||
response = self._get("/api/files/download?path=storage1/docs")
|
response = self._get("/api/files/download?path=storage1/docs")
|
||||||
|
|
||||||
self.assertEqual(response.status_code, 409)
|
self.assertEqual(response.status_code, 409)
|
||||||
self.assertEqual(response.json()["error"]["code"], "type_conflict")
|
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["reason"], "symlink_detected")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["path"], "storage1/docs/link.txt")
|
||||||
|
|
||||||
|
def test_download_zip_preflight_timeout_rejected_cleanly(self) -> None:
|
||||||
|
(self.root / "a.txt").write_text("A", encoding="utf-8")
|
||||||
|
(self.root / "b.txt").write_text("B", encoding="utf-8")
|
||||||
|
ticks = iter([0.0, 11.0, 11.0, 11.0])
|
||||||
|
self._override_service(
|
||||||
|
limits=ZipDownloadPreflightLimits(
|
||||||
|
max_items=10,
|
||||||
|
max_total_input_bytes=1024,
|
||||||
|
max_individual_file_bytes=1024,
|
||||||
|
scan_timeout_seconds=10.0,
|
||||||
|
),
|
||||||
|
monotonic=lambda: next(ticks),
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self._get("/api/files/download?path=storage1/a.txt&path=storage1/b.txt")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 409)
|
||||||
|
self.assertEqual(response.json()["error"]["code"], "download_preflight_failed")
|
||||||
|
self.assertEqual(response.json()["error"]["details"]["reason"], "preflight_timeout")
|
||||||
|
|
||||||
def test_download_path_not_found(self) -> None:
|
def test_download_path_not_found(self) -> None:
|
||||||
response = self._get("/api/files/download?path=storage1/missing.txt")
|
response = self._get("/api/files/download?path=storage1/missing.txt")
|
||||||
|
|||||||
Reference in New Issue
Block a user