feat: download - download safeguard
This commit is contained in:
Binary file not shown.
@@ -1,10 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from io import BytesIO
|
||||
import time
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from backend.app.api.errors import AppError
|
||||
from backend.app.api.schemas import DeleteResponse, FileInfoResponse, MkdirResponse, RenameResponse, SaveResponse, UploadResponse, ViewResponse
|
||||
@@ -54,11 +57,37 @@ PDF_CONTENT_TYPES = {
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ZipDownloadPreflightLimits:
|
||||
max_items: int = 1000
|
||||
max_total_input_bytes: int = 2 * 1024 * 1024 * 1024
|
||||
max_individual_file_bytes: int = 500 * 1024 * 1024
|
||||
scan_timeout_seconds: float = 10.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ZipDownloadPreflightState:
|
||||
item_count: int = 0
|
||||
total_input_bytes: int = 0
|
||||
|
||||
|
||||
ZIP_DOWNLOAD_PREFLIGHT_LIMITS = ZipDownloadPreflightLimits()
|
||||
|
||||
|
||||
class FileOpsService:
|
||||
def __init__(self, path_guard: PathGuard, filesystem: FilesystemAdapter, history_repository: HistoryRepository | None = None):
|
||||
def __init__(
|
||||
self,
|
||||
path_guard: PathGuard,
|
||||
filesystem: FilesystemAdapter,
|
||||
history_repository: HistoryRepository | None = None,
|
||||
zip_download_preflight_limits: ZipDownloadPreflightLimits = ZIP_DOWNLOAD_PREFLIGHT_LIMITS,
|
||||
monotonic: Callable[[], float] | None = None,
|
||||
):
|
||||
self._path_guard = path_guard
|
||||
self._filesystem = filesystem
|
||||
self._history_repository = history_repository
|
||||
self._zip_download_preflight_limits = zip_download_preflight_limits
|
||||
self._monotonic = monotonic or time.monotonic
|
||||
|
||||
def mkdir(self, parent_path: str, name: str) -> MkdirResponse:
|
||||
try:
|
||||
@@ -673,7 +702,6 @@ class FileOpsService:
|
||||
def _prepare_zip_download(self, resolved_targets: list) -> dict:
|
||||
archive_names: set[str] = set()
|
||||
for resolved_target in resolved_targets:
|
||||
self._validate_download_target(resolved_target)
|
||||
archive_name = resolved_target.absolute.name
|
||||
if archive_name in archive_names:
|
||||
raise AppError(
|
||||
@@ -682,6 +710,7 @@ class FileOpsService:
|
||||
status_code=400,
|
||||
)
|
||||
archive_names.add(archive_name)
|
||||
self._run_zip_download_preflight(resolved_targets)
|
||||
|
||||
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_dir():
|
||||
download_name = f"{resolved_targets[0].absolute.name}.zip"
|
||||
@@ -705,37 +734,126 @@ class FileOpsService:
|
||||
"content_type": "application/zip",
|
||||
}
|
||||
|
||||
def _validate_download_target(self, resolved_target) -> None:
|
||||
def _run_zip_download_preflight(self, resolved_targets: list) -> None:
|
||||
started_at = self._monotonic()
|
||||
state = ZipDownloadPreflightState()
|
||||
for resolved_target in resolved_targets:
|
||||
self._ensure_zip_download_preflight_within_timeout(started_at)
|
||||
self._validate_zip_download_root_target(resolved_target)
|
||||
if resolved_target.absolute.is_file():
|
||||
self._record_zip_download_file(
|
||||
state=state,
|
||||
entry_path=resolved_target.absolute,
|
||||
entry_relative=resolved_target.relative,
|
||||
)
|
||||
continue
|
||||
self._increment_zip_download_item_count(
|
||||
state=state,
|
||||
entry_relative=resolved_target.relative,
|
||||
)
|
||||
self._scan_zip_download_directory(
|
||||
state=state,
|
||||
resolved_target=resolved_target,
|
||||
started_at=started_at,
|
||||
)
|
||||
|
||||
def _validate_zip_download_root_target(self, resolved_target) -> None:
|
||||
_, _, lexical_source, _ = self._path_guard.resolve_lexical_path(resolved_target.relative)
|
||||
if lexical_source.is_symlink():
|
||||
raise AppError(
|
||||
code="type_conflict",
|
||||
message="Source must not be a symlink",
|
||||
status_code=409,
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="symlink_detected",
|
||||
details={"path": resolved_target.relative},
|
||||
)
|
||||
if resolved_target.absolute.is_file():
|
||||
if resolved_target.absolute.is_file() or resolved_target.absolute.is_dir():
|
||||
return
|
||||
if resolved_target.absolute.is_dir():
|
||||
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
|
||||
root_path = Path(root)
|
||||
for name in [*dirnames, *filenames]:
|
||||
entry = root_path / name
|
||||
if entry.is_symlink():
|
||||
raise AppError(
|
||||
code="type_conflict",
|
||||
message="Source directory must not contain symlinks",
|
||||
status_code=409,
|
||||
details={"path": resolved_target.relative},
|
||||
)
|
||||
return
|
||||
raise AppError(
|
||||
code="type_conflict",
|
||||
message="Unsupported path type for download",
|
||||
status_code=409,
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="unsupported_path_type",
|
||||
details={"path": resolved_target.relative},
|
||||
)
|
||||
|
||||
def _scan_zip_download_directory(self, state: ZipDownloadPreflightState, resolved_target, started_at: float) -> None:
|
||||
for root, dirnames, filenames in os.walk(resolved_target.absolute, followlinks=False):
|
||||
root_path = Path(root)
|
||||
dirnames.sort()
|
||||
filenames.sort()
|
||||
for name in [*dirnames, *filenames]:
|
||||
self._ensure_zip_download_preflight_within_timeout(started_at)
|
||||
entry_path = root_path / name
|
||||
relative_suffix = entry_path.relative_to(resolved_target.absolute).as_posix()
|
||||
entry_relative = self._join_relative(resolved_target.relative, relative_suffix)
|
||||
if entry_path.is_symlink():
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="symlink_detected",
|
||||
details={"path": entry_relative},
|
||||
)
|
||||
if entry_path.is_dir():
|
||||
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
|
||||
continue
|
||||
self._record_zip_download_file(
|
||||
state=state,
|
||||
entry_path=entry_path,
|
||||
entry_relative=entry_relative,
|
||||
)
|
||||
|
||||
def _record_zip_download_file(
|
||||
self,
|
||||
*,
|
||||
state: ZipDownloadPreflightState,
|
||||
entry_path: Path,
|
||||
entry_relative: str,
|
||||
) -> None:
|
||||
self._increment_zip_download_item_count(state=state, entry_relative=entry_relative)
|
||||
file_size = int(entry_path.stat().st_size)
|
||||
if file_size > self._zip_download_preflight_limits.max_individual_file_bytes:
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="max_individual_file_size_exceeded",
|
||||
details={
|
||||
"path": entry_relative,
|
||||
"limit_bytes": str(self._zip_download_preflight_limits.max_individual_file_bytes),
|
||||
"actual_bytes": str(file_size),
|
||||
},
|
||||
)
|
||||
state.total_input_bytes += file_size
|
||||
if state.total_input_bytes > self._zip_download_preflight_limits.max_total_input_bytes:
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="max_total_input_bytes_exceeded",
|
||||
details={
|
||||
"limit_bytes": str(self._zip_download_preflight_limits.max_total_input_bytes),
|
||||
"actual_bytes": str(state.total_input_bytes),
|
||||
},
|
||||
)
|
||||
|
||||
def _increment_zip_download_item_count(self, *, state: ZipDownloadPreflightState, entry_relative: str) -> None:
|
||||
state.item_count += 1
|
||||
if state.item_count > self._zip_download_preflight_limits.max_items:
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="max_items_exceeded",
|
||||
details={
|
||||
"path": entry_relative,
|
||||
"limit": str(self._zip_download_preflight_limits.max_items),
|
||||
"actual": str(state.item_count),
|
||||
},
|
||||
)
|
||||
|
||||
def _ensure_zip_download_preflight_within_timeout(self, started_at: float) -> None:
|
||||
elapsed = self._monotonic() - started_at
|
||||
if elapsed > self._zip_download_preflight_limits.scan_timeout_seconds:
|
||||
self._raise_zip_download_preflight_error(
|
||||
reason="preflight_timeout",
|
||||
details={
|
||||
"timeout_seconds": str(self._zip_download_preflight_limits.scan_timeout_seconds),
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _raise_zip_download_preflight_error(reason: str, details: dict[str, str]) -> None:
|
||||
raise AppError(
|
||||
code="download_preflight_failed",
|
||||
message="Zip download preflight failed",
|
||||
status_code=409,
|
||||
details={"reason": reason, **details},
|
||||
)
|
||||
|
||||
def _write_download_target_to_zip(self, archive: zipfile.ZipFile, resolved_target) -> None:
|
||||
root_name = resolved_target.absolute.name
|
||||
if resolved_target.absolute.is_file():
|
||||
|
||||
Reference in New Issue
Block a user