feat: B2 uit voor veilige archive-downloads
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,266 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import zipfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app.api.errors import AppError
|
||||
from backend.app.api.schemas import TaskCreateResponse
|
||||
from backend.app.db.history_repository import HistoryRepository
|
||||
from backend.app.db.task_repository import TaskRepository
|
||||
from backend.app.security.path_guard import PathGuard
|
||||
from backend.app.services.file_ops_service import FileOpsService
|
||||
from backend.app.tasks_runner import TaskRunner
|
||||
|
||||
ARCHIVE_DOWNLOAD_TTL_SECONDS = 30 * 60
|
||||
|
||||
|
||||
class ArchiveDownloadTaskService:
|
||||
def __init__(
|
||||
self,
|
||||
path_guard: PathGuard,
|
||||
repository: TaskRepository,
|
||||
runner: TaskRunner,
|
||||
history_repository: HistoryRepository | None,
|
||||
file_ops_service: FileOpsService,
|
||||
artifact_root: Path,
|
||||
artifact_ttl_seconds: int = ARCHIVE_DOWNLOAD_TTL_SECONDS,
|
||||
):
|
||||
self._path_guard = path_guard
|
||||
self._repository = repository
|
||||
self._runner = runner
|
||||
self._history_repository = history_repository
|
||||
self._file_ops_service = file_ops_service
|
||||
self._artifact_root = artifact_root
|
||||
self._artifact_ttl_seconds = artifact_ttl_seconds
|
||||
self._artifact_root.mkdir(parents=True, exist_ok=True)
|
||||
self.sweep_artifacts()
|
||||
|
||||
def create_archive_prepare_task(self, paths: list[str]) -> TaskCreateResponse:
|
||||
if not paths:
|
||||
raise AppError(
|
||||
code="invalid_request",
|
||||
message="At least one path is required",
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
self.sweep_artifacts()
|
||||
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in paths]
|
||||
mode = self._file_ops_service._download_mode_from_resolved_targets(resolved_targets)
|
||||
if mode == "single_file":
|
||||
raise AppError(
|
||||
code="invalid_request",
|
||||
message="Single file downloads must use direct download",
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
summary = self._file_ops_service._summarize_download_targets([target.relative for target in resolved_targets])
|
||||
archive_name = self._file_ops_service._download_name_for_targets(resolved_targets)
|
||||
task_id = str(uuid.uuid4())
|
||||
task = self._repository.create_task(
|
||||
operation="download",
|
||||
source=summary,
|
||||
destination=archive_name,
|
||||
task_id=task_id,
|
||||
status="requested",
|
||||
)
|
||||
self._record_history(
|
||||
entry_id=task_id,
|
||||
operation="download",
|
||||
status="requested",
|
||||
source=mode,
|
||||
destination=archive_name,
|
||||
path=summary,
|
||||
)
|
||||
target_paths = [target.relative for target in resolved_targets]
|
||||
self._runner.enqueue_archive_prepare(
|
||||
lambda: self._run_archive_prepare_task(
|
||||
task_id=task_id,
|
||||
target_paths=target_paths,
|
||||
archive_name=archive_name,
|
||||
history_mode=mode,
|
||||
history_path=summary,
|
||||
)
|
||||
)
|
||||
return TaskCreateResponse(task_id=task["id"], status=task["status"])
|
||||
|
||||
def prepare_ready_archive_download(self, task_id: str) -> dict:
|
||||
self.sweep_artifacts()
|
||||
task = self._repository.get_task(task_id)
|
||||
if not task:
|
||||
raise AppError(
|
||||
code="task_not_found",
|
||||
message="Task was not found",
|
||||
status_code=404,
|
||||
details={"task_id": task_id},
|
||||
)
|
||||
if task["operation"] != "download":
|
||||
raise AppError(
|
||||
code="invalid_request",
|
||||
message="Task is not an archive download",
|
||||
status_code=400,
|
||||
details={"task_id": task_id},
|
||||
)
|
||||
if task["status"] != "ready":
|
||||
raise AppError(
|
||||
code="download_not_ready",
|
||||
message="Archive download is not ready",
|
||||
status_code=409,
|
||||
details={"task_id": task_id, "status": task["status"]},
|
||||
)
|
||||
|
||||
artifact = self._repository.get_artifact(task_id)
|
||||
if not artifact:
|
||||
raise AppError(
|
||||
code="archive_not_found",
|
||||
message="Prepared archive was not found",
|
||||
status_code=404,
|
||||
details={"task_id": task_id},
|
||||
)
|
||||
if self._is_expired(artifact["expires_at"]):
|
||||
self._delete_artifact_record_and_file(task_id, artifact["file_path"])
|
||||
raise AppError(
|
||||
code="archive_expired",
|
||||
message="Prepared archive expired",
|
||||
status_code=410,
|
||||
details={"task_id": task_id},
|
||||
)
|
||||
|
||||
artifact_path = Path(artifact["file_path"])
|
||||
if not artifact_path.exists():
|
||||
self._repository.delete_artifact(task_id)
|
||||
raise AppError(
|
||||
code="archive_not_found",
|
||||
message="Prepared archive was not found",
|
||||
status_code=404,
|
||||
details={"task_id": task_id},
|
||||
)
|
||||
|
||||
return {
|
||||
"content": self._file_ops_service._filesystem.stream_file(artifact_path),
|
||||
"headers": {
|
||||
"Content-Disposition": f'attachment; filename="{artifact["file_name"]}"',
|
||||
"Content-Length": str(int(artifact_path.stat().st_size)),
|
||||
},
|
||||
"content_type": "application/zip",
|
||||
}
|
||||
|
||||
def sweep_artifacts(self) -> None:
|
||||
self._artifact_root.mkdir(parents=True, exist_ok=True)
|
||||
referenced_paths: set[Path] = set()
|
||||
for artifact in self._repository.list_artifacts():
|
||||
artifact_path = Path(artifact["file_path"])
|
||||
referenced_paths.add(artifact_path)
|
||||
if self._is_expired(artifact["expires_at"]) or not artifact_path.exists():
|
||||
self._delete_artifact_record_and_file(artifact["task_id"], artifact["file_path"])
|
||||
|
||||
for candidate in self._artifact_root.iterdir():
|
||||
if candidate.is_file() and candidate not in referenced_paths:
|
||||
try:
|
||||
candidate.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
def _run_archive_prepare_task(
|
||||
self,
|
||||
*,
|
||||
task_id: str,
|
||||
target_paths: list[str],
|
||||
archive_name: str,
|
||||
history_mode: str,
|
||||
history_path: str,
|
||||
) -> None:
|
||||
partial_path = self._artifact_root / f"{task_id}.partial.zip"
|
||||
final_path = self._artifact_root / f"{task_id}.zip"
|
||||
total_items = len(target_paths)
|
||||
|
||||
try:
|
||||
self._repository.mark_preparing(
|
||||
task_id=task_id,
|
||||
done_items=0,
|
||||
total_items=total_items,
|
||||
current_item=target_paths[0] if target_paths else None,
|
||||
)
|
||||
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in target_paths]
|
||||
self._file_ops_service._validate_zip_download_archive_names(resolved_targets)
|
||||
self._file_ops_service._run_zip_download_preflight(resolved_targets)
|
||||
|
||||
with zipfile.ZipFile(partial_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
|
||||
for resolved_target in resolved_targets:
|
||||
self._file_ops_service._write_download_target_to_zip(archive, resolved_target)
|
||||
|
||||
os.replace(partial_path, final_path)
|
||||
self._repository.upsert_artifact(
|
||||
task_id=task_id,
|
||||
file_path=str(final_path),
|
||||
file_name=archive_name,
|
||||
expires_at=self._expires_at_iso(),
|
||||
)
|
||||
self._repository.mark_ready(
|
||||
task_id=task_id,
|
||||
done_items=total_items,
|
||||
total_items=total_items,
|
||||
)
|
||||
self._update_history_ready(task_id)
|
||||
except AppError as exc:
|
||||
self._delete_artifact_record_and_file(task_id, str(partial_path))
|
||||
self._delete_artifact_record_and_file(task_id, str(final_path))
|
||||
self._repository.mark_failed(
|
||||
task_id=task_id,
|
||||
error_code=exc.code,
|
||||
error_message=exc.message,
|
||||
failed_item=history_path,
|
||||
done_bytes=None,
|
||||
total_bytes=None,
|
||||
done_items=0,
|
||||
total_items=total_items,
|
||||
)
|
||||
self._update_history_failed(task_id, exc.code, exc.message)
|
||||
except OSError as exc:
|
||||
self._delete_artifact_record_and_file(task_id, str(partial_path))
|
||||
self._delete_artifact_record_and_file(task_id, str(final_path))
|
||||
self._repository.mark_failed(
|
||||
task_id=task_id,
|
||||
error_code="io_error",
|
||||
error_message=str(exc),
|
||||
failed_item=history_path,
|
||||
done_bytes=None,
|
||||
total_bytes=None,
|
||||
done_items=0,
|
||||
total_items=total_items,
|
||||
)
|
||||
self._update_history_failed(task_id, "io_error", str(exc))
|
||||
|
||||
def _delete_artifact_record_and_file(self, task_id: str, file_path: str) -> None:
|
||||
self._repository.delete_artifact(task_id)
|
||||
path = Path(file_path)
|
||||
try:
|
||||
path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
def _update_history_ready(self, task_id: str) -> None:
|
||||
if self._history_repository:
|
||||
self._history_repository.update_entry(entry_id=task_id, status="ready")
|
||||
|
||||
def _update_history_failed(self, task_id: str, error_code: str, error_message: str) -> None:
|
||||
if self._history_repository:
|
||||
self._history_repository.update_entry(
|
||||
entry_id=task_id,
|
||||
status="failed",
|
||||
error_code=error_code,
|
||||
error_message=error_message,
|
||||
)
|
||||
|
||||
def _record_history(self, **kwargs) -> None:
|
||||
if self._history_repository:
|
||||
self._history_repository.create_entry(**kwargs)
|
||||
|
||||
def _expires_at_iso(self) -> str:
|
||||
return (datetime.now(timezone.utc) + timedelta(seconds=self._artifact_ttl_seconds)).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
||||
|
||||
@staticmethod
|
||||
def _is_expired(expires_at: str) -> bool:
|
||||
return datetime.now(timezone.utc) >= datetime.fromisoformat(expires_at.replace("Z", "+00:00"))
|
||||
@@ -411,6 +411,14 @@ class FileOpsService:
|
||||
history_mode = self._download_mode_from_resolved_targets(resolved_targets)
|
||||
history_path = self._summarize_download_targets([target.relative for target in resolved_targets])
|
||||
history_download_name = self._download_name_for_targets(resolved_targets)
|
||||
|
||||
if history_mode != "single_file":
|
||||
raise AppError(
|
||||
code="invalid_request",
|
||||
message="Archive downloads must be prepared first",
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
history_entry_id = self._record_download_status(
|
||||
status="requested",
|
||||
mode=history_mode,
|
||||
@@ -418,10 +426,7 @@ class FileOpsService:
|
||||
download_name=history_download_name,
|
||||
)
|
||||
|
||||
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_file():
|
||||
prepared = self._prepare_single_file_download(resolved_targets[0])
|
||||
else:
|
||||
prepared = self._prepare_zip_download(resolved_targets, history_download_name)
|
||||
prepared = self._prepare_single_file_download(resolved_targets[0])
|
||||
|
||||
self._record_download_status(
|
||||
status="ready",
|
||||
@@ -757,16 +762,7 @@ class FileOpsService:
|
||||
}
|
||||
|
||||
def _prepare_zip_download(self, resolved_targets: list, download_name: str) -> dict:
|
||||
archive_names: set[str] = set()
|
||||
for resolved_target in resolved_targets:
|
||||
archive_name = resolved_target.absolute.name
|
||||
if archive_name in archive_names:
|
||||
raise AppError(
|
||||
code="invalid_request",
|
||||
message="Selected items must have distinct top-level names",
|
||||
status_code=400,
|
||||
)
|
||||
archive_names.add(archive_name)
|
||||
self._validate_zip_download_archive_names(resolved_targets)
|
||||
self._run_zip_download_preflight(resolved_targets)
|
||||
|
||||
buffer = BytesIO()
|
||||
@@ -786,6 +782,18 @@ class FileOpsService:
|
||||
"content_type": "application/zip",
|
||||
}
|
||||
|
||||
def _validate_zip_download_archive_names(self, resolved_targets: list) -> None:
|
||||
archive_names: set[str] = set()
|
||||
for resolved_target in resolved_targets:
|
||||
archive_name = resolved_target.absolute.name
|
||||
if archive_name in archive_names:
|
||||
raise AppError(
|
||||
code="invalid_request",
|
||||
message="Selected items must have distinct top-level names",
|
||||
status_code=400,
|
||||
)
|
||||
archive_names.add(archive_name)
|
||||
|
||||
def _download_name_for_targets(self, resolved_targets: list) -> str:
|
||||
if len(resolved_targets) == 1 and resolved_targets[0].absolute.is_file():
|
||||
return resolved_targets[0].absolute.name
|
||||
|
||||
Reference in New Issue
Block a user