Files
webmanager-mvp/webui/backend/app/services/archive_download_task_service.py
T

267 lines
10 KiB
Python

from __future__ import annotations
import os
import uuid
import zipfile
from datetime import datetime, timedelta, timezone
from pathlib import Path
from backend.app.api.errors import AppError
from backend.app.api.schemas import TaskCreateResponse
from backend.app.db.history_repository import HistoryRepository
from backend.app.db.task_repository import TaskRepository
from backend.app.security.path_guard import PathGuard
from backend.app.services.file_ops_service import FileOpsService
from backend.app.tasks_runner import TaskRunner
ARCHIVE_DOWNLOAD_TTL_SECONDS = 30 * 60
class ArchiveDownloadTaskService:
def __init__(
self,
path_guard: PathGuard,
repository: TaskRepository,
runner: TaskRunner,
history_repository: HistoryRepository | None,
file_ops_service: FileOpsService,
artifact_root: Path,
artifact_ttl_seconds: int = ARCHIVE_DOWNLOAD_TTL_SECONDS,
):
self._path_guard = path_guard
self._repository = repository
self._runner = runner
self._history_repository = history_repository
self._file_ops_service = file_ops_service
self._artifact_root = artifact_root
self._artifact_ttl_seconds = artifact_ttl_seconds
self._artifact_root.mkdir(parents=True, exist_ok=True)
self.sweep_artifacts()
def create_archive_prepare_task(self, paths: list[str]) -> TaskCreateResponse:
if not paths:
raise AppError(
code="invalid_request",
message="At least one path is required",
status_code=400,
)
self.sweep_artifacts()
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in paths]
mode = self._file_ops_service._download_mode_from_resolved_targets(resolved_targets)
if mode == "single_file":
raise AppError(
code="invalid_request",
message="Single file downloads must use direct download",
status_code=400,
)
summary = self._file_ops_service._summarize_download_targets([target.relative for target in resolved_targets])
archive_name = self._file_ops_service._download_name_for_targets(resolved_targets)
task_id = str(uuid.uuid4())
task = self._repository.create_task(
operation="download",
source=summary,
destination=archive_name,
task_id=task_id,
status="requested",
)
self._record_history(
entry_id=task_id,
operation="download",
status="requested",
source=mode,
destination=archive_name,
path=summary,
)
target_paths = [target.relative for target in resolved_targets]
self._runner.enqueue_archive_prepare(
lambda: self._run_archive_prepare_task(
task_id=task_id,
target_paths=target_paths,
archive_name=archive_name,
history_mode=mode,
history_path=summary,
)
)
return TaskCreateResponse(task_id=task["id"], status=task["status"])
def prepare_ready_archive_download(self, task_id: str) -> dict:
self.sweep_artifacts()
task = self._repository.get_task(task_id)
if not task:
raise AppError(
code="task_not_found",
message="Task was not found",
status_code=404,
details={"task_id": task_id},
)
if task["operation"] != "download":
raise AppError(
code="invalid_request",
message="Task is not an archive download",
status_code=400,
details={"task_id": task_id},
)
if task["status"] != "ready":
raise AppError(
code="download_not_ready",
message="Archive download is not ready",
status_code=409,
details={"task_id": task_id, "status": task["status"]},
)
artifact = self._repository.get_artifact(task_id)
if not artifact:
raise AppError(
code="archive_not_found",
message="Prepared archive was not found",
status_code=404,
details={"task_id": task_id},
)
if self._is_expired(artifact["expires_at"]):
self._delete_artifact_record_and_file(task_id, artifact["file_path"])
raise AppError(
code="archive_expired",
message="Prepared archive expired",
status_code=410,
details={"task_id": task_id},
)
artifact_path = Path(artifact["file_path"])
if not artifact_path.exists():
self._repository.delete_artifact(task_id)
raise AppError(
code="archive_not_found",
message="Prepared archive was not found",
status_code=404,
details={"task_id": task_id},
)
return {
"content": self._file_ops_service._filesystem.stream_file(artifact_path),
"headers": {
"Content-Disposition": f'attachment; filename="{artifact["file_name"]}"',
"Content-Length": str(int(artifact_path.stat().st_size)),
},
"content_type": "application/zip",
}
def sweep_artifacts(self) -> None:
self._artifact_root.mkdir(parents=True, exist_ok=True)
referenced_paths: set[Path] = set()
for artifact in self._repository.list_artifacts():
artifact_path = Path(artifact["file_path"])
referenced_paths.add(artifact_path)
if self._is_expired(artifact["expires_at"]) or not artifact_path.exists():
self._delete_artifact_record_and_file(artifact["task_id"], artifact["file_path"])
for candidate in self._artifact_root.iterdir():
if candidate.is_file() and candidate not in referenced_paths:
try:
candidate.unlink()
except FileNotFoundError:
pass
def _run_archive_prepare_task(
self,
*,
task_id: str,
target_paths: list[str],
archive_name: str,
history_mode: str,
history_path: str,
) -> None:
partial_path = self._artifact_root / f"{task_id}.partial.zip"
final_path = self._artifact_root / f"{task_id}.zip"
total_items = len(target_paths)
try:
self._repository.mark_preparing(
task_id=task_id,
done_items=0,
total_items=total_items,
current_item=target_paths[0] if target_paths else None,
)
resolved_targets = [self._path_guard.resolve_existing_path(path) for path in target_paths]
self._file_ops_service._validate_zip_download_archive_names(resolved_targets)
self._file_ops_service._run_zip_download_preflight(resolved_targets)
with zipfile.ZipFile(partial_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
for resolved_target in resolved_targets:
self._file_ops_service._write_download_target_to_zip(archive, resolved_target)
os.replace(partial_path, final_path)
self._repository.upsert_artifact(
task_id=task_id,
file_path=str(final_path),
file_name=archive_name,
expires_at=self._expires_at_iso(),
)
self._repository.mark_ready(
task_id=task_id,
done_items=total_items,
total_items=total_items,
)
self._update_history_ready(task_id)
except AppError as exc:
self._delete_artifact_record_and_file(task_id, str(partial_path))
self._delete_artifact_record_and_file(task_id, str(final_path))
self._repository.mark_failed(
task_id=task_id,
error_code=exc.code,
error_message=exc.message,
failed_item=history_path,
done_bytes=None,
total_bytes=None,
done_items=0,
total_items=total_items,
)
self._update_history_failed(task_id, exc.code, exc.message)
except OSError as exc:
self._delete_artifact_record_and_file(task_id, str(partial_path))
self._delete_artifact_record_and_file(task_id, str(final_path))
self._repository.mark_failed(
task_id=task_id,
error_code="io_error",
error_message=str(exc),
failed_item=history_path,
done_bytes=None,
total_bytes=None,
done_items=0,
total_items=total_items,
)
self._update_history_failed(task_id, "io_error", str(exc))
def _delete_artifact_record_and_file(self, task_id: str, file_path: str) -> None:
self._repository.delete_artifact(task_id)
path = Path(file_path)
try:
path.unlink()
except FileNotFoundError:
pass
def _update_history_ready(self, task_id: str) -> None:
if self._history_repository:
self._history_repository.update_entry(entry_id=task_id, status="ready")
def _update_history_failed(self, task_id: str, error_code: str, error_message: str) -> None:
if self._history_repository:
self._history_repository.update_entry(
entry_id=task_id,
status="failed",
error_code=error_code,
error_message=error_message,
)
def _record_history(self, **kwargs) -> None:
if self._history_repository:
self._history_repository.create_entry(**kwargs)
def _expires_at_iso(self) -> str:
return (datetime.now(timezone.utc) + timedelta(seconds=self._artifact_ttl_seconds)).replace(microsecond=0).isoformat().replace("+00:00", "Z")
@staticmethod
def _is_expired(expires_at: str) -> bool:
return datetime.now(timezone.utc) >= datetime.fromisoformat(expires_at.replace("Z", "+00:00"))