from __future__ import annotations import os import uuid import zipfile from datetime import datetime, timedelta, timezone from pathlib import Path from backend.app.api.errors import AppError from backend.app.api.schemas import TaskCreateResponse from backend.app.db.history_repository import HistoryRepository from backend.app.db.task_repository import TaskRepository from backend.app.security.path_guard import PathGuard from backend.app.services.file_ops_service import FileOpsService from backend.app.tasks_runner import TaskRunner ARCHIVE_DOWNLOAD_TTL_SECONDS = 30 * 60 class ArchivePrepareCancelled(Exception): pass class ArchiveDownloadTaskService: def __init__( self, path_guard: PathGuard, repository: TaskRepository, runner: TaskRunner, history_repository: HistoryRepository | None, file_ops_service: FileOpsService, artifact_root: Path, artifact_ttl_seconds: int = ARCHIVE_DOWNLOAD_TTL_SECONDS, ): self._path_guard = path_guard self._repository = repository self._runner = runner self._history_repository = history_repository self._file_ops_service = file_ops_service self._artifact_root = artifact_root self._artifact_ttl_seconds = artifact_ttl_seconds self._artifact_root.mkdir(parents=True, exist_ok=True) self.sweep_artifacts() def create_archive_prepare_task(self, paths: list[str]) -> TaskCreateResponse: if not paths: raise AppError( code="invalid_request", message="At least one path is required", status_code=400, ) self.sweep_artifacts() resolved_targets = [self._path_guard.resolve_existing_path(path) for path in paths] mode = self._file_ops_service._download_mode_from_resolved_targets(resolved_targets) if mode == "single_file": raise AppError( code="invalid_request", message="Single file downloads must use direct download", status_code=400, ) summary = self._file_ops_service._summarize_download_targets([target.relative for target in resolved_targets]) archive_name = self._file_ops_service._download_name_for_targets(resolved_targets) task_id = str(uuid.uuid4()) task = self._repository.create_task( operation="download", source=summary, destination=archive_name, task_id=task_id, status="requested", ) self._record_history( entry_id=task_id, operation="download", status="requested", source=mode, destination=archive_name, path=summary, ) target_paths = [target.relative for target in resolved_targets] self._runner.enqueue_archive_prepare( lambda: self._run_archive_prepare_task( task_id=task_id, target_paths=target_paths, archive_name=archive_name, history_mode=mode, history_path=summary, ) ) return TaskCreateResponse(task_id=task["id"], status=task["status"]) def prepare_ready_archive_download(self, task_id: str) -> dict: self.sweep_artifacts() task = self._repository.get_task(task_id) if not task: raise AppError( code="task_not_found", message="Task was not found", status_code=404, details={"task_id": task_id}, ) if task["operation"] != "download": raise AppError( code="invalid_request", message="Task is not an archive download", status_code=400, details={"task_id": task_id}, ) if task["status"] == "cancelled": raise AppError( code="download_cancelled", message="Archive download was cancelled", status_code=409, details={"task_id": task_id}, ) if task["status"] != "ready": raise AppError( code="download_not_ready", message="Archive download is not ready", status_code=409, details={"task_id": task_id, "status": task["status"]}, ) artifact = self._repository.get_artifact(task_id) if not artifact: raise AppError( code="archive_not_found", message="Prepared archive was not found", status_code=404, details={"task_id": task_id}, ) if self._is_expired(artifact["expires_at"]): self._delete_artifact_record_and_file(task_id, artifact["file_path"]) raise AppError( code="archive_expired", message="Prepared archive expired", status_code=410, details={"task_id": task_id}, ) artifact_path = Path(artifact["file_path"]) if not artifact_path.exists(): self._repository.delete_artifact(task_id) raise AppError( code="archive_not_found", message="Prepared archive was not found", status_code=404, details={"task_id": task_id}, ) return { "content": self._file_ops_service._filesystem.stream_file(artifact_path), "headers": { "Content-Disposition": f'attachment; filename="{artifact["file_name"]}"', "Content-Length": str(int(artifact_path.stat().st_size)), }, "content_type": "application/zip", } def cancel_archive_prepare_task(self, task_id: str) -> dict: self.sweep_artifacts() task = self._repository.get_task(task_id) if not task: raise AppError( code="task_not_found", message="Task was not found", status_code=404, details={"task_id": task_id}, ) if task["operation"] != "download": raise AppError( code="invalid_request", message="Task is not an archive download", status_code=400, details={"task_id": task_id}, ) if task["status"] == "ready": raise AppError( code="download_not_cancellable", message="Archive download is already ready", status_code=409, details={"task_id": task_id, "status": task["status"]}, ) if task["status"] in {"failed", "cancelled"}: raise AppError( code="download_not_cancellable", message="Archive download cannot be cancelled", status_code=409, details={"task_id": task_id, "status": task["status"]}, ) if not self._repository.mark_cancelled(task_id): current = self._repository.get_task(task_id) current_status = current["status"] if current else task["status"] raise AppError( code="download_not_cancellable", message="Archive download cannot be cancelled", status_code=409, details={"task_id": task_id, "status": current_status}, ) self._cleanup_task_artifacts(task_id) self._update_history_cancelled(task_id) cancelled_task = self._repository.get_task(task_id) if not cancelled_task: raise AppError( code="task_not_found", message="Task was not found", status_code=404, details={"task_id": task_id}, ) return cancelled_task def sweep_artifacts(self) -> None: self._artifact_root.mkdir(parents=True, exist_ok=True) referenced_paths: set[Path] = set() for artifact in self._repository.list_artifacts(): artifact_path = Path(artifact["file_path"]) referenced_paths.add(artifact_path) if self._is_expired(artifact["expires_at"]) or not artifact_path.exists(): self._delete_artifact_record_and_file(artifact["task_id"], artifact["file_path"]) for candidate in self._artifact_root.iterdir(): if candidate.is_file() and candidate not in referenced_paths: try: candidate.unlink() except FileNotFoundError: pass def _run_archive_prepare_task( self, *, task_id: str, target_paths: list[str], archive_name: str, history_mode: str, history_path: str, ) -> None: partial_path = self._artifact_root / f"{task_id}.partial.zip" final_path = self._artifact_root / f"{task_id}.zip" total_items = len(target_paths) try: self._raise_if_cancelled(task_id) if not self._repository.mark_preparing( task_id=task_id, done_items=0, total_items=total_items, current_item=target_paths[0] if target_paths else None, ): self._raise_if_cancelled(task_id) return resolved_targets = [self._path_guard.resolve_existing_path(path) for path in target_paths] self._raise_if_cancelled(task_id) self._file_ops_service._validate_zip_download_archive_names(resolved_targets) self._file_ops_service._run_zip_download_preflight(resolved_targets) self._raise_if_cancelled(task_id) with zipfile.ZipFile(partial_path, "w", compression=zipfile.ZIP_DEFLATED) as archive: for index, resolved_target in enumerate(resolved_targets): self._raise_if_cancelled(task_id) self._repository.update_progress( task_id=task_id, done_items=index, total_items=total_items, current_item=resolved_target.relative, ) self._file_ops_service._write_download_target_to_zip( archive, resolved_target, on_each_item=lambda: self._raise_if_cancelled(task_id), ) self._raise_if_cancelled(task_id) os.replace(partial_path, final_path) self._raise_if_cancelled(task_id) self._repository.upsert_artifact( task_id=task_id, file_path=str(final_path), file_name=archive_name, expires_at=self._expires_at_iso(), ) if not self._repository.mark_ready( task_id=task_id, done_items=total_items, total_items=total_items, ): self._cleanup_task_artifacts(task_id) self._raise_if_cancelled(task_id) return self._update_history_ready(task_id) except ArchivePrepareCancelled: self._cleanup_task_artifacts(task_id) except AppError as exc: self._cleanup_task_artifacts(task_id) if self._repository.mark_failed_if_not_cancelled( task_id=task_id, error_code=exc.code, error_message=exc.message, failed_item=history_path, done_bytes=None, total_bytes=None, done_items=0, total_items=total_items, ): self._update_history_failed(task_id, exc.code, exc.message) except OSError as exc: self._cleanup_task_artifacts(task_id) if self._repository.mark_failed_if_not_cancelled( task_id=task_id, error_code="io_error", error_message=str(exc), failed_item=history_path, done_bytes=None, total_bytes=None, done_items=0, total_items=total_items, ): self._update_history_failed(task_id, "io_error", str(exc)) def _cleanup_task_artifacts(self, task_id: str) -> None: self._delete_artifact_record_and_file(task_id, str(self._artifact_root / f"{task_id}.partial.zip")) self._delete_artifact_record_and_file(task_id, str(self._artifact_root / f"{task_id}.zip")) def _delete_artifact_record_and_file(self, task_id: str, file_path: str) -> None: self._repository.delete_artifact(task_id) path = Path(file_path) try: path.unlink() except FileNotFoundError: pass def _update_history_ready(self, task_id: str) -> None: if self._history_repository: self._history_repository.update_entry(entry_id=task_id, status="ready") def _update_history_failed(self, task_id: str, error_code: str, error_message: str) -> None: if self._history_repository: self._history_repository.update_entry( entry_id=task_id, status="failed", error_code=error_code, error_message=error_message, ) def _update_history_cancelled(self, task_id: str) -> None: if self._history_repository: self._history_repository.update_entry(entry_id=task_id, status="cancelled") def _record_history(self, **kwargs) -> None: if self._history_repository: self._history_repository.create_entry(**kwargs) def _expires_at_iso(self) -> str: return (datetime.now(timezone.utc) + timedelta(seconds=self._artifact_ttl_seconds)).replace(microsecond=0).isoformat().replace("+00:00", "Z") @staticmethod def _is_expired(expires_at: str) -> bool: return datetime.now(timezone.utc) >= datetime.fromisoformat(expires_at.replace("Z", "+00:00")) def _raise_if_cancelled(self, task_id: str) -> None: task = self._repository.get_task(task_id) if task and task["status"] == "cancelled": raise ArchivePrepareCancelled()