364 lines
14 KiB
Python
364 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import uuid
|
|
import zipfile
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
from backend.app.api.errors import AppError
|
|
from backend.app.api.schemas import TaskCreateResponse
|
|
from backend.app.db.history_repository import HistoryRepository
|
|
from backend.app.db.task_repository import TaskRepository
|
|
from backend.app.security.path_guard import PathGuard
|
|
from backend.app.services.file_ops_service import FileOpsService
|
|
from backend.app.tasks_runner import TaskRunner
|
|
|
|
ARCHIVE_DOWNLOAD_TTL_SECONDS = 30 * 60
|
|
|
|
|
|
class ArchivePrepareCancelled(Exception):
|
|
pass
|
|
|
|
|
|
class ArchiveDownloadTaskService:
|
|
def __init__(
|
|
self,
|
|
path_guard: PathGuard,
|
|
repository: TaskRepository,
|
|
runner: TaskRunner,
|
|
history_repository: HistoryRepository | None,
|
|
file_ops_service: FileOpsService,
|
|
artifact_root: Path,
|
|
artifact_ttl_seconds: int = ARCHIVE_DOWNLOAD_TTL_SECONDS,
|
|
):
|
|
self._path_guard = path_guard
|
|
self._repository = repository
|
|
self._runner = runner
|
|
self._history_repository = history_repository
|
|
self._file_ops_service = file_ops_service
|
|
self._artifact_root = artifact_root
|
|
self._artifact_ttl_seconds = artifact_ttl_seconds
|
|
self._artifact_root.mkdir(parents=True, exist_ok=True)
|
|
self.sweep_artifacts()
|
|
|
|
def create_archive_prepare_task(self, paths: list[str]) -> TaskCreateResponse:
|
|
if not paths:
|
|
raise AppError(
|
|
code="invalid_request",
|
|
message="At least one path is required",
|
|
status_code=400,
|
|
)
|
|
|
|
self.sweep_artifacts()
|
|
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in paths]
|
|
mode = self._file_ops_service._download_mode_from_resolved_targets(resolved_targets)
|
|
if mode == "single_file":
|
|
raise AppError(
|
|
code="invalid_request",
|
|
message="Single file downloads must use direct download",
|
|
status_code=400,
|
|
)
|
|
|
|
summary = self._file_ops_service._summarize_download_targets([target.relative for target in resolved_targets])
|
|
archive_name = self._file_ops_service._download_name_for_targets(resolved_targets)
|
|
task_id = str(uuid.uuid4())
|
|
task = self._repository.create_task(
|
|
operation="download",
|
|
source=summary,
|
|
destination=archive_name,
|
|
task_id=task_id,
|
|
status="requested",
|
|
)
|
|
self._record_history(
|
|
entry_id=task_id,
|
|
operation="download",
|
|
status="requested",
|
|
source=mode,
|
|
destination=archive_name,
|
|
path=summary,
|
|
)
|
|
target_paths = [target.relative for target in resolved_targets]
|
|
self._runner.enqueue_archive_prepare(
|
|
lambda: self._run_archive_prepare_task(
|
|
task_id=task_id,
|
|
target_paths=target_paths,
|
|
archive_name=archive_name,
|
|
history_mode=mode,
|
|
history_path=summary,
|
|
)
|
|
)
|
|
return TaskCreateResponse(task_id=task["id"], status=task["status"])
|
|
|
|
def prepare_ready_archive_download(self, task_id: str) -> dict:
|
|
self.sweep_artifacts()
|
|
task = self._repository.get_task(task_id)
|
|
if not task:
|
|
raise AppError(
|
|
code="task_not_found",
|
|
message="Task was not found",
|
|
status_code=404,
|
|
details={"task_id": task_id},
|
|
)
|
|
if task["operation"] != "download":
|
|
raise AppError(
|
|
code="invalid_request",
|
|
message="Task is not an archive download",
|
|
status_code=400,
|
|
details={"task_id": task_id},
|
|
)
|
|
if task["status"] == "cancelled":
|
|
raise AppError(
|
|
code="download_cancelled",
|
|
message="Archive download was cancelled",
|
|
status_code=409,
|
|
details={"task_id": task_id},
|
|
)
|
|
if task["status"] != "ready":
|
|
raise AppError(
|
|
code="download_not_ready",
|
|
message="Archive download is not ready",
|
|
status_code=409,
|
|
details={"task_id": task_id, "status": task["status"]},
|
|
)
|
|
|
|
artifact = self._repository.get_artifact(task_id)
|
|
if not artifact:
|
|
raise AppError(
|
|
code="archive_not_found",
|
|
message="Prepared archive was not found",
|
|
status_code=404,
|
|
details={"task_id": task_id},
|
|
)
|
|
if self._is_expired(artifact["expires_at"]):
|
|
self._delete_artifact_record_and_file(task_id, artifact["file_path"])
|
|
raise AppError(
|
|
code="archive_expired",
|
|
message="Prepared archive expired",
|
|
status_code=410,
|
|
details={"task_id": task_id},
|
|
)
|
|
|
|
artifact_path = Path(artifact["file_path"])
|
|
if not artifact_path.exists():
|
|
self._repository.delete_artifact(task_id)
|
|
raise AppError(
|
|
code="archive_not_found",
|
|
message="Prepared archive was not found",
|
|
status_code=404,
|
|
details={"task_id": task_id},
|
|
)
|
|
|
|
return {
|
|
"content": self._file_ops_service._filesystem.stream_file(artifact_path),
|
|
"headers": {
|
|
"Content-Disposition": f'attachment; filename="{artifact["file_name"]}"',
|
|
"Content-Length": str(int(artifact_path.stat().st_size)),
|
|
},
|
|
"content_type": "application/zip",
|
|
}
|
|
|
|
def cancel_archive_prepare_task(self, task_id: str) -> dict:
|
|
self.sweep_artifacts()
|
|
task = self._repository.get_task(task_id)
|
|
if not task:
|
|
raise AppError(
|
|
code="task_not_found",
|
|
message="Task was not found",
|
|
status_code=404,
|
|
details={"task_id": task_id},
|
|
)
|
|
if task["operation"] != "download":
|
|
raise AppError(
|
|
code="invalid_request",
|
|
message="Task is not an archive download",
|
|
status_code=400,
|
|
details={"task_id": task_id},
|
|
)
|
|
if task["status"] == "ready":
|
|
raise AppError(
|
|
code="download_not_cancellable",
|
|
message="Archive download is already ready",
|
|
status_code=409,
|
|
details={"task_id": task_id, "status": task["status"]},
|
|
)
|
|
if task["status"] in {"failed", "cancelled"}:
|
|
raise AppError(
|
|
code="download_not_cancellable",
|
|
message="Archive download cannot be cancelled",
|
|
status_code=409,
|
|
details={"task_id": task_id, "status": task["status"]},
|
|
)
|
|
if not self._repository.mark_cancelled(task_id):
|
|
current = self._repository.get_task(task_id)
|
|
current_status = current["status"] if current else task["status"]
|
|
raise AppError(
|
|
code="download_not_cancellable",
|
|
message="Archive download cannot be cancelled",
|
|
status_code=409,
|
|
details={"task_id": task_id, "status": current_status},
|
|
)
|
|
self._cleanup_task_artifacts(task_id)
|
|
self._update_history_cancelled(task_id)
|
|
cancelled_task = self._repository.get_task(task_id)
|
|
if not cancelled_task:
|
|
raise AppError(
|
|
code="task_not_found",
|
|
message="Task was not found",
|
|
status_code=404,
|
|
details={"task_id": task_id},
|
|
)
|
|
return cancelled_task
|
|
|
|
def sweep_artifacts(self) -> None:
|
|
self._artifact_root.mkdir(parents=True, exist_ok=True)
|
|
referenced_paths: set[Path] = set()
|
|
for artifact in self._repository.list_artifacts():
|
|
artifact_path = Path(artifact["file_path"])
|
|
referenced_paths.add(artifact_path)
|
|
if self._is_expired(artifact["expires_at"]) or not artifact_path.exists():
|
|
self._delete_artifact_record_and_file(artifact["task_id"], artifact["file_path"])
|
|
|
|
for candidate in self._artifact_root.iterdir():
|
|
if candidate.is_file() and candidate not in referenced_paths:
|
|
try:
|
|
candidate.unlink()
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
def _run_archive_prepare_task(
|
|
self,
|
|
*,
|
|
task_id: str,
|
|
target_paths: list[str],
|
|
archive_name: str,
|
|
history_mode: str,
|
|
history_path: str,
|
|
) -> None:
|
|
partial_path = self._artifact_root / f"{task_id}.partial.zip"
|
|
final_path = self._artifact_root / f"{task_id}.zip"
|
|
total_items = len(target_paths)
|
|
|
|
try:
|
|
self._raise_if_cancelled(task_id)
|
|
if not self._repository.mark_preparing(
|
|
task_id=task_id,
|
|
done_items=0,
|
|
total_items=total_items,
|
|
current_item=target_paths[0] if target_paths else None,
|
|
):
|
|
self._raise_if_cancelled(task_id)
|
|
return
|
|
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in target_paths]
|
|
self._raise_if_cancelled(task_id)
|
|
self._file_ops_service._validate_zip_download_archive_names(resolved_targets)
|
|
self._file_ops_service._run_zip_download_preflight(resolved_targets)
|
|
self._raise_if_cancelled(task_id)
|
|
|
|
with zipfile.ZipFile(partial_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
|
|
for index, resolved_target in enumerate(resolved_targets):
|
|
self._raise_if_cancelled(task_id)
|
|
self._repository.update_progress(
|
|
task_id=task_id,
|
|
done_items=index,
|
|
total_items=total_items,
|
|
current_item=resolved_target.relative,
|
|
)
|
|
self._file_ops_service._write_download_target_to_zip(
|
|
archive,
|
|
resolved_target,
|
|
on_each_item=lambda: self._raise_if_cancelled(task_id),
|
|
)
|
|
|
|
self._raise_if_cancelled(task_id)
|
|
os.replace(partial_path, final_path)
|
|
self._raise_if_cancelled(task_id)
|
|
self._repository.upsert_artifact(
|
|
task_id=task_id,
|
|
file_path=str(final_path),
|
|
file_name=archive_name,
|
|
expires_at=self._expires_at_iso(),
|
|
)
|
|
if not self._repository.mark_ready(
|
|
task_id=task_id,
|
|
done_items=total_items,
|
|
total_items=total_items,
|
|
):
|
|
self._cleanup_task_artifacts(task_id)
|
|
self._raise_if_cancelled(task_id)
|
|
return
|
|
self._update_history_ready(task_id)
|
|
except ArchivePrepareCancelled:
|
|
self._cleanup_task_artifacts(task_id)
|
|
except AppError as exc:
|
|
self._cleanup_task_artifacts(task_id)
|
|
if self._repository.mark_failed_if_not_cancelled(
|
|
task_id=task_id,
|
|
error_code=exc.code,
|
|
error_message=exc.message,
|
|
failed_item=history_path,
|
|
done_bytes=None,
|
|
total_bytes=None,
|
|
done_items=0,
|
|
total_items=total_items,
|
|
):
|
|
self._update_history_failed(task_id, exc.code, exc.message)
|
|
except OSError as exc:
|
|
self._cleanup_task_artifacts(task_id)
|
|
if self._repository.mark_failed_if_not_cancelled(
|
|
task_id=task_id,
|
|
error_code="io_error",
|
|
error_message=str(exc),
|
|
failed_item=history_path,
|
|
done_bytes=None,
|
|
total_bytes=None,
|
|
done_items=0,
|
|
total_items=total_items,
|
|
):
|
|
self._update_history_failed(task_id, "io_error", str(exc))
|
|
|
|
def _cleanup_task_artifacts(self, task_id: str) -> None:
|
|
self._delete_artifact_record_and_file(task_id, str(self._artifact_root / f"{task_id}.partial.zip"))
|
|
self._delete_artifact_record_and_file(task_id, str(self._artifact_root / f"{task_id}.zip"))
|
|
|
|
def _delete_artifact_record_and_file(self, task_id: str, file_path: str) -> None:
|
|
self._repository.delete_artifact(task_id)
|
|
path = Path(file_path)
|
|
try:
|
|
path.unlink()
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
def _update_history_ready(self, task_id: str) -> None:
|
|
if self._history_repository:
|
|
self._history_repository.update_entry(entry_id=task_id, status="ready")
|
|
|
|
def _update_history_failed(self, task_id: str, error_code: str, error_message: str) -> None:
|
|
if self._history_repository:
|
|
self._history_repository.update_entry(
|
|
entry_id=task_id,
|
|
status="failed",
|
|
error_code=error_code,
|
|
error_message=error_message,
|
|
)
|
|
|
|
def _update_history_cancelled(self, task_id: str) -> None:
|
|
if self._history_repository:
|
|
self._history_repository.update_entry(entry_id=task_id, status="cancelled")
|
|
|
|
def _record_history(self, **kwargs) -> None:
|
|
if self._history_repository:
|
|
self._history_repository.create_entry(**kwargs)
|
|
|
|
def _expires_at_iso(self) -> str:
|
|
return (datetime.now(timezone.utc) + timedelta(seconds=self._artifact_ttl_seconds)).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
|
|
@staticmethod
|
|
def _is_expired(expires_at: str) -> bool:
|
|
return datetime.now(timezone.utc) >= datetime.fromisoformat(expires_at.replace("Z", "+00:00"))
|
|
|
|
def _raise_if_cancelled(self, task_id: str) -> None:
|
|
task = self._repository.get_task(task_id)
|
|
if task and task["status"] == "cancelled":
|
|
raise ArchivePrepareCancelled()
|