feat: B2 uit voor veilige archive-downloads

This commit is contained in:
kodi
2026-03-14 14:24:52 +01:00
parent 592b10acc2
commit d463b3977d
24 changed files with 754 additions and 195 deletions
@@ -0,0 +1,266 @@
from __future__ import annotations
import os
import uuid
import zipfile
from datetime import datetime, timedelta, timezone
from pathlib import Path
from backend.app.api.errors import AppError
from backend.app.api.schemas import TaskCreateResponse
from backend.app.db.history_repository import HistoryRepository
from backend.app.db.task_repository import TaskRepository
from backend.app.security.path_guard import PathGuard
from backend.app.services.file_ops_service import FileOpsService
from backend.app.tasks_runner import TaskRunner
ARCHIVE_DOWNLOAD_TTL_SECONDS = 30 * 60
class ArchiveDownloadTaskService:
def __init__(
self,
path_guard: PathGuard,
repository: TaskRepository,
runner: TaskRunner,
history_repository: HistoryRepository | None,
file_ops_service: FileOpsService,
artifact_root: Path,
artifact_ttl_seconds: int = ARCHIVE_DOWNLOAD_TTL_SECONDS,
):
self._path_guard = path_guard
self._repository = repository
self._runner = runner
self._history_repository = history_repository
self._file_ops_service = file_ops_service
self._artifact_root = artifact_root
self._artifact_ttl_seconds = artifact_ttl_seconds
self._artifact_root.mkdir(parents=True, exist_ok=True)
self.sweep_artifacts()
def create_archive_prepare_task(self, paths: list[str]) -> TaskCreateResponse:
if not paths:
raise AppError(
code="invalid_request",
message="At least one path is required",
status_code=400,
)
self.sweep_artifacts()
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in paths]
mode = self._file_ops_service._download_mode_from_resolved_targets(resolved_targets)
if mode == "single_file":
raise AppError(
code="invalid_request",
message="Single file downloads must use direct download",
status_code=400,
)
summary = self._file_ops_service._summarize_download_targets([target.relative for target in resolved_targets])
archive_name = self._file_ops_service._download_name_for_targets(resolved_targets)
task_id = str(uuid.uuid4())
task = self._repository.create_task(
operation="download",
source=summary,
destination=archive_name,
task_id=task_id,
status="requested",
)
self._record_history(
entry_id=task_id,
operation="download",
status="requested",
source=mode,
destination=archive_name,
path=summary,
)
target_paths = [target.relative for target in resolved_targets]
self._runner.enqueue_archive_prepare(
lambda: self._run_archive_prepare_task(
task_id=task_id,
target_paths=target_paths,
archive_name=archive_name,
history_mode=mode,
history_path=summary,
)
)
return TaskCreateResponse(task_id=task["id"], status=task["status"])
def prepare_ready_archive_download(self, task_id: str) -> dict:
self.sweep_artifacts()
task = self._repository.get_task(task_id)
if not task:
raise AppError(
code="task_not_found",
message="Task was not found",
status_code=404,
details={"task_id": task_id},
)
if task["operation"] != "download":
raise AppError(
code="invalid_request",
message="Task is not an archive download",
status_code=400,
details={"task_id": task_id},
)
if task["status"] != "ready":
raise AppError(
code="download_not_ready",
message="Archive download is not ready",
status_code=409,
details={"task_id": task_id, "status": task["status"]},
)
artifact = self._repository.get_artifact(task_id)
if not artifact:
raise AppError(
code="archive_not_found",
message="Prepared archive was not found",
status_code=404,
details={"task_id": task_id},
)
if self._is_expired(artifact["expires_at"]):
self._delete_artifact_record_and_file(task_id, artifact["file_path"])
raise AppError(
code="archive_expired",
message="Prepared archive expired",
status_code=410,
details={"task_id": task_id},
)
artifact_path = Path(artifact["file_path"])
if not artifact_path.exists():
self._repository.delete_artifact(task_id)
raise AppError(
code="archive_not_found",
message="Prepared archive was not found",
status_code=404,
details={"task_id": task_id},
)
return {
"content": self._file_ops_service._filesystem.stream_file(artifact_path),
"headers": {
"Content-Disposition": f'attachment; filename="{artifact["file_name"]}"',
"Content-Length": str(int(artifact_path.stat().st_size)),
},
"content_type": "application/zip",
}
def sweep_artifacts(self) -> None:
self._artifact_root.mkdir(parents=True, exist_ok=True)
referenced_paths: set[Path] = set()
for artifact in self._repository.list_artifacts():
artifact_path = Path(artifact["file_path"])
referenced_paths.add(artifact_path)
if self._is_expired(artifact["expires_at"]) or not artifact_path.exists():
self._delete_artifact_record_and_file(artifact["task_id"], artifact["file_path"])
for candidate in self._artifact_root.iterdir():
if candidate.is_file() and candidate not in referenced_paths:
try:
candidate.unlink()
except FileNotFoundError:
pass
def _run_archive_prepare_task(
self,
*,
task_id: str,
target_paths: list[str],
archive_name: str,
history_mode: str,
history_path: str,
) -> None:
partial_path = self._artifact_root / f"{task_id}.partial.zip"
final_path = self._artifact_root / f"{task_id}.zip"
total_items = len(target_paths)
try:
self._repository.mark_preparing(
task_id=task_id,
done_items=0,
total_items=total_items,
current_item=target_paths[0] if target_paths else None,
)
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in target_paths]
self._file_ops_service._validate_zip_download_archive_names(resolved_targets)
self._file_ops_service._run_zip_download_preflight(resolved_targets)
with zipfile.ZipFile(partial_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
for resolved_target in resolved_targets:
self._file_ops_service._write_download_target_to_zip(archive, resolved_target)
os.replace(partial_path, final_path)
self._repository.upsert_artifact(
task_id=task_id,
file_path=str(final_path),
file_name=archive_name,
expires_at=self._expires_at_iso(),
)
self._repository.mark_ready(
task_id=task_id,
done_items=total_items,
total_items=total_items,
)
self._update_history_ready(task_id)
except AppError as exc:
self._delete_artifact_record_and_file(task_id, str(partial_path))
self._delete_artifact_record_and_file(task_id, str(final_path))
self._repository.mark_failed(
task_id=task_id,
error_code=exc.code,
error_message=exc.message,
failed_item=history_path,
done_bytes=None,
total_bytes=None,
done_items=0,
total_items=total_items,
)
self._update_history_failed(task_id, exc.code, exc.message)
except OSError as exc:
self._delete_artifact_record_and_file(task_id, str(partial_path))
self._delete_artifact_record_and_file(task_id, str(final_path))
self._repository.mark_failed(
task_id=task_id,
error_code="io_error",
error_message=str(exc),
failed_item=history_path,
done_bytes=None,
total_bytes=None,
done_items=0,
total_items=total_items,
)
self._update_history_failed(task_id, "io_error", str(exc))
def _delete_artifact_record_and_file(self, task_id: str, file_path: str) -> None:
self._repository.delete_artifact(task_id)
path = Path(file_path)
try:
path.unlink()
except FileNotFoundError:
pass
def _update_history_ready(self, task_id: str) -> None:
if self._history_repository:
self._history_repository.update_entry(entry_id=task_id, status="ready")
def _update_history_failed(self, task_id: str, error_code: str, error_message: str) -> None:
if self._history_repository:
self._history_repository.update_entry(
entry_id=task_id,
status="failed",
error_code=error_code,
error_message=error_message,
)
def _record_history(self, **kwargs) -> None:
if self._history_repository:
self._history_repository.create_entry(**kwargs)
def _expires_at_iso(self) -> str:
return (datetime.now(timezone.utc) + timedelta(seconds=self._artifact_ttl_seconds)).replace(microsecond=0).isoformat().replace("+00:00", "Z")
@staticmethod
def _is_expired(expires_at: str) -> bool:
return datetime.now(timezone.utc) >= datetime.fromisoformat(expires_at.replace("Z", "+00:00"))
+22 -14
View File
@@ -411,6 +411,14 @@ class FileOpsService:
history_mode = self._download_mode_from_resolved_targets(resolved_targets)
history_path = self._summarize_download_targets([target.relative for target in resolved_targets])
history_download_name = self._download_name_for_targets(resolved_targets)
if history_mode != "single_file":
raise AppError(
code="invalid_request",
message="Archive downloads must be prepared first",
status_code=400,
)
history_entry_id = self._record_download_status(
status="requested",
mode=history_mode,
@@ -418,10 +426,7 @@ class FileOpsService:
download_name=history_download_name,
)
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_file():
prepared = self._prepare_single_file_download(resolved_targets[0])
else:
prepared = self._prepare_zip_download(resolved_targets, history_download_name)
prepared = self._prepare_single_file_download(resolved_targets[0])
self._record_download_status(
status="ready",
@@ -757,16 +762,7 @@ class FileOpsService:
}
def _prepare_zip_download(self, resolved_targets: list, download_name: str) -> dict:
archive_names: set[str] = set()
for resolved_target in resolved_targets:
archive_name = resolved_target.absolute.name
if archive_name in archive_names:
raise AppError(
code="invalid_request",
message="Selected items must have distinct top-level names",
status_code=400,
)
archive_names.add(archive_name)
self._validate_zip_download_archive_names(resolved_targets)
self._run_zip_download_preflight(resolved_targets)
buffer = BytesIO()
@@ -786,6 +782,18 @@ class FileOpsService:
"content_type": "application/zip",
}
def _validate_zip_download_archive_names(self, resolved_targets: list) -> None:
archive_names: set[str] = set()
for resolved_target in resolved_targets:
archive_name = resolved_target.absolute.name
if archive_name in archive_names:
raise AppError(
code="invalid_request",
message="Selected items must have distinct top-level names",
status_code=400,
)
archive_names.add(archive_name)
def _download_name_for_targets(self, resolved_targets: list) -> str:
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_file():
return resolved_targets[0].absolute.name