"""Create studies."""
from __future__ import annotations
import datetime
from itertools import groupby
import json
import logging
from pathlib import Path
import shutil
from tempfile import TemporaryDirectory
from typing import Any
from typing import ClassVar
from pydantic import AliasChoices
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import FieldSerializationInfo
from pydantic import SerializerFunctionWrapHandler
from pydantic import field_serializer
from shortuuid import uuid
from autojob import SETTINGS
from autojob import legacy
from autojob.task import Task
from autojob.utils.files import find_job_dirs
from autojob.utils.schemas import space_capitalize
logger = logging.getLogger(__name__)
[docs]
class Study(BaseModel):
"""A collection of tasks."""
tasks: list[Task] = Field(
default=[],
validation_alias=AliasChoices("Tasks", "Calculations"),
serialization_alias="Tasks",
)
date_created: datetime.datetime = Field(
default_factory=lambda: datetime.datetime.now(tz=datetime.UTC)
)
study_id: str = Field(
default_factory=lambda: "s" + uuid()[:9], alias="Study ID"
)
study_group_id: str = Field(
default_factory=lambda: "g" + uuid()[:9], alias="Study Group ID"
)
name: str = ""
notes: str = ""
study_type: legacy.StudyType | None = None
model_config: ClassVar = ConfigDict(
populate_by_name=True, alias_generator=space_capitalize
)
[docs]
@field_serializer("tasks", mode="wrap", return_type=list[Task] | list[str])
def serialize_tasks(
self,
v: Any,
_: SerializerFunctionWrapHandler,
info: FieldSerializationInfo,
) -> list[Task] | list[str]:
"""Serialize the tasks in the study."""
if info.mode == "json":
return [str(t.task_metadata.task_id) for t in self.tasks]
return v
[docs]
@field_serializer("date_created", when_used="json")
def serialize_date_created(self, v: datetime.datetime) -> str:
"""Serialize the study creation date."""
return v.isoformat()
[docs]
@classmethod
def from_directory(
cls,
dir_name: Path,
*,
strict_mode: bool = SETTINGS.STRICT_MODE,
legacy_mode: bool = False,
) -> Study:
"""Recreate a study from a directory.
Args:
dir_name: The directory of a completed Task.
strict_mode: Whether or not to require all outputs. If True,
errors will be thrown on missing outputs. Defaults to
``SETTINGS.STRICT_MODE``.
legacy_mode: Whether or not use the legacy mode directory structure.
Defaults to False.
Returns:
The :class:`Study` contained in ``dir_name``.
"""
metadata_file = dir_name.joinpath(SETTINGS.STUDY_FILE)
with metadata_file.open(mode="r", encoding="utf-8") as file:
metadata: dict[str, Any] = json.load(file)
key = "Calculations" if legacy_mode else "Tasks"
task_names: list[str] = metadata[key]
sources: list[Path] = []
if legacy_mode:
for task in task_names:
new_sources = find_job_dirs(dir_name.joinpath(task))
sources.extend(new_sources)
else:
sources.extend(dir_name.joinpath(task) for task in task_names)
tasks: list[Task] = []
for source in sources:
tasks.append(
Task.from_directory(
source,
strict_mode=strict_mode,
magic_mode=True,
)
)
metadata[key] = tasks
return cls(**metadata)
[docs]
def to_directory(
self, dir_name: Path, *, legacy_mode: bool = False
) -> None:
"""Dump a study and its tasks to a directory.
Args:
dir_name: The directory in which to dump the :class:`Study`.
legacy_mode: Whether or not use the legacy mode directory structure.
Defaults to False.
"""
with TemporaryDirectory() as tmpdir:
metadata = self.model_dump(
mode="json",
exclude={"Study Type"} if legacy_mode else None,
by_alias=True,
)
if legacy_mode:
metadata["Calculations"] = []
del metadata["Tasks"]
study_path = Path(tmpdir).joinpath(self.study_id)
study_path.mkdir()
tasks = sorted(
self.tasks, key=lambda t: str(t.task_metadata.calculation_id)
)
for calc_id, calc_tasks in groupby(
tasks, key=lambda t: t.task_metadata.calculation_id
):
jobs = []
dest = None
for task in calc_tasks:
dest = task.create_new_task_tree(
root=study_path, create_legacy_dir=legacy_mode
)
task.to_directory(
dst=dest,
legacy_mode=legacy_mode,
)
jobs.append(str(task.task_metadata.task_id))
if legacy_mode and dest is not None:
metadata["Calculations"].append(calc_id)
legacy_metadata_file = dest.parent.joinpath(
SETTINGS.CALCULATION_FILE
)
legacy_metadata = task.task_metadata.model_dump_legacy()
legacy_metadata["Jobs"] = jobs
with legacy_metadata_file.open(
mode="w", encoding="utf-8"
) as file:
json.dump(legacy_metadata, file, indent=4)
with study_path.joinpath(SETTINGS.STUDY_FILE).open(
mode="w", encoding="utf-8"
) as file:
json.dump(metadata, file, indent=4)
shutil.copytree(study_path, dir_name, dirs_exist_ok=True)