"""Store the results of a minima hopping simulation."""
from copy import deepcopy
import logging
from pathlib import Path
import re
import sys
from typing import Any
from typing import ClassVar
from typing import Self
import warnings
import numpy as np
if sys.version_info < (3, 12):
from typing_extensions import TypedDict
else:
from typing import TypedDict
from ase import Atoms
import ase.io
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import FieldSerializationInfo
from pydantic import SerializerFunctionWrapHandler
from pydantic import TypeAdapter
from pydantic import field_serializer
from pydantic import model_validator
from autojob import SETTINGS
from autojob.tasks.calculation import Calculation
from autojob.utils.schemas import PydanticAtoms
logger = logging.getLogger(__name__)
_TRAJECTORY_KEY = "_trajectory_file"
[docs]
class MHInitParams(TypedDict, total=False):
"""Initialization parameters for a minima hopping calculation.
Keys:
T0: The initial temperature for molecular dynamics steps (in Kelvin).
beta1: The first temperature adjustment parameter. The temperature for
the molecular dynamics step will be multiplied by this number if
the previous optimum is found.
beta2: The second temperature adjustment parameter. The temperature for
the molecular dynamics step will be multiplied by this number if
a previous (but not the immediately previous) optimum is found.
beta3: The third temperature adjustment parameter. The temperature for
the molecular dynamics step will be multiplied by this number if
a new minimum is found with energy below `Ediff`.
Ediff0: The initial energy acceptance threshold.
alpha1: The first energy threshold adjustment parameter. `Ediff` will
be multiplied by this number whenever a unique minima is found with
energy lower than `Ediff`.
alpha2: The first energy threshold adjustment parameter. `Ediff` will
be multiplied by this number whenever a unique minima is found with
energy higher than `Ediff`.
mdmin: The number of minima that must be passed to stop MD simulations.
logfile: A text log file for recording the progress of the calculation.
minima_threshold: A distance threshold (in Angstromgs) for identifying
identical structures.
timestep: The timestep for MD simulations.
optimizer: The fully qualified class name for an ASE optimizer to use.
minima_traj: The storage file for minima. Currently, this key **must**
be set for things to work correctly.
fmax: Maximum force criteria for structure optimizations.
rng: An integer to be used as a random seed.
"""
T0: float
beta1: float
beta2: float
beta3: float
Ediff0: float
alpha1: float
alpha2: float
mdmin: int
logfile: str
minima_threshold: float
timestep: float
optimizer: str
minima_traj: str
fmax: float
rng: int
[docs]
class MHRunParams(TypedDict, total=False):
"""Run parameters for a minima hopping calculation.
Keys:
totalsteps: The maximum number of steps that the calculation will run.
maxtemp: The maximum temperature (in Kelvin) for the calculation.
"""
totalsteps: int
maxtemp: float
def _harvest_minima_results(
src: Path,
minima_traj: str,
strict_mode: bool | None = None,
) -> tuple[list[Atoms], list[dict[str, Any]]]:
minima: list[PydanticAtoms] = []
minima_results = []
try:
minima = ase.io.read(Path(src, minima_traj), ":")
for i, image in enumerate(minima):
try:
results = image.calc.results.copy()
minima_results.append(results)
except AttributeError:
if strict_mode:
raise
logger.info(
"Unable to load results from image %s of trajectory: %s",
i,
image,
)
minima_results.append({})
image.calc = None
except FileNotFoundError:
if strict_mode:
raise
logger.exception(
"Unable to load minima from directory: %s.",
src,
)
return minima, minima_results
def _harvest_traj_and_results(
src: Path,
traj_pattern: str,
strict_mode: bool | None = None,
) -> tuple[list[list[Atoms]], list[list[dict[str, Any]]]]:
traj_regex = re.compile(traj_pattern)
traj_files: list[tuple[int, Path]] = []
for file in src.iterdir():
if match := traj_regex.match(file.name):
index = int(match.group("index"))
traj_files.append((index, file))
logger.debug(
"%d trajectories found matching regex %s.",
len(traj_files),
traj_pattern,
)
traj_files = sorted(traj_files, key=lambda x: x[0])
trajs: list[list[Atoms]] = []
traj_results: list[list[dict[str, Any]]] = []
for i, traj in traj_files:
images: list[Atoms] = ase.io.read(traj, index=":") # type: ignore[assignment]
trajs.append(images)
traj_result: list[dict[str, Any]] = []
for j, image in enumerate(images):
try:
traj_result.append(image.calc.results.copy())
except AttributeError:
if strict_mode:
raise
logger.info(
"Unable to load results from image %s of trajectory %s: %s",
i,
j,
traj,
)
traj_result.append({})
image.calc = None
traj_results.append(traj_result)
logger.debug("%d trajectories loaded.", len(trajs))
return trajs, traj_results
[docs]
class MHOutputs(BaseModel):
"""The outputs of a minima hopping calculation."""
minima: list[PydanticAtoms] | None = Field(
default=None,
description="A list of all minima found during the minima hopping calculation.",
)
minima_results: list[dict[str, Any]] | None = Field(
default=None,
description="A list of of dictionaries, where each dictionary "
"corresponds to the calculated results for a minimum.",
)
md_trajectories: list[list[PydanticAtoms]] | None = Field(
default=None,
description="A list in which each element is a list of Atoms objects "
"for a molecular dynamics step in a minima hopping task",
)
md_trajectory_results: list[list[dict[str, Any]]] | None = Field(
default=None,
description="A list of lists of dictionaries, where each dictionary "
"corresponds to the calculated results from an image in the MD "
"trajectory",
)
opt_trajectories: list[list[PydanticAtoms]] | None = Field(
default=None,
description="A list in which each element is a list of Atoms objects "
"for a optimization step in a minima hopping task",
)
opt_trajectory_results: list[list[dict[str, Any]]] | None = Field(
default=None,
description="A list of lists of dictionaries, where each dictionary "
"corresponds to the calculated results from an image in the "
"optimization trajectory",
)
[docs]
@field_serializer("minima_results", mode="wrap")
def serialize_minima_results(
self,
v: list[dict[str, Any]] | None,
_: SerializerFunctionWrapHandler,
info: FieldSerializationInfo,
) -> list[dict[str, Any]] | None:
"""Serialize trajectory results."""
if v is None:
return None
if info.mode == "json":
listified: list[dict[str, Any]] = []
for minima_result in v:
result: dict[str, Any] = {}
for k, value in minima_result.items():
result[k] = (
value.tolist()
if isinstance(value, np.ndarray)
# for mutable results
else deepcopy(value)
)
listified.append(result)
return listified
return v
[docs]
@field_serializer(
"md_trajectory_results", "opt_trajectory_results", mode="wrap"
)
def serialize_trajectory_results(
self,
v: list[list[dict[str, Any]]] | None,
_: SerializerFunctionWrapHandler,
info: FieldSerializationInfo,
) -> list[list[dict[str, Any]]] | None:
"""Serialize trajectory results."""
if v is None:
return None
if info.mode == "json":
listified: list[list[dict[str, Any]]] = []
for traj_results in v:
listified_result: list[dict[str, Any]] = []
for traj_result in traj_results:
result: dict[str, Any] = {}
for k, value in traj_result.items():
result[k] = (
value.tolist()
if isinstance(value, np.ndarray)
# for mutable results
else deepcopy(value)
)
listified_result.append(result)
listified.append(listified_result)
return listified
return v
[docs]
@classmethod
def from_directory(
cls,
src: Path,
*,
minima_traj: str,
md_traj_pattern: str,
opt_traj_pattern: str,
strict_mode: bool | None = None,
) -> Self:
"""Load the outputs of a minima hopping run from a directory.
Args:
src: The directory from which to load the results.
minima_traj: The name of the minima trajectory file.
md_traj_pattern: A regex pattern for identifying trajectory files
of molecular dynamics steps.
opt_traj_pattern: str: A regex pattern for identifying trajectory files
of optimization steps.
strict_mode: Whether or not to require all outputs. If True,
errors will be thrown on missing outputs. Defaults to
``SETTINGS.STRICT_MODE``.
"""
logger.info(
"Loading molecular dynamics outputs from directory: %s", src
)
logger.debug("Strict mode: %sabled", "en" if strict_mode else "dis")
minima, minima_results = _harvest_minima_results(
src, minima_traj, strict_mode
)
md_trajs, md_traj_results = _harvest_traj_and_results(
src, md_traj_pattern, strict_mode
)
opt_trajs, opt_traj_results = _harvest_traj_and_results(
src, opt_traj_pattern, strict_mode
)
logger.info(
"Successfully loaded molecular dynamics outputs from "
"directory: %s",
src,
)
return cls(
minima=minima,
minima_results=minima_results,
md_trajectories=md_trajs,
md_trajectory_results=md_traj_results,
opt_trajectories=opt_trajs,
opt_trajectory_results=opt_traj_results,
)
[docs]
class MinimaHopping(Calculation):
"""A minima hopping calculation."""
mh_inputs: MHInputs = Field(
default_factory=MHInputs,
description="The inputs of a minima hopping calculation",
)
mh_outputs: MHOutputs | None = Field(
default=None,
description="The outputs of a minima hopping calculation",
)
[docs]
@classmethod
def from_directory(cls, src, **kwargs):
"""Generate a ``MinimaHopping`` document from a task directory.
Args:
src: The directory of a molecular dynamics simulation.
kwargs: Additional keyword arguments:
- strict_mode: Whether or not to fail on any error. Defaults to
`SETTINGS.STRICT_MODE`.
- magic_mode: Whether or not to instantiate subclasses. If
True, the task returned must be an instance determined by
metadata in the directory. Defaults to False.
Returns:
A :class:`MinimaHopping` or a subclass of a
:class:`MinimaHopping`.
.. seealso::
:meth:`.md.Calculation.from_directory`
"""
strict_mode = kwargs.get("strict_mode", SETTINGS.STRICT_MODE)
magic_mode = kwargs.get("magic_mode", False)
logger.info(
"Loading minima hopping calculation from directory: %s", src
)
logger.debug("Magic mode: %sabled", "en" if magic_mode else "dis")
logger.debug("Strict mode: %sabled", "en" if strict_mode else "dis")
if magic_mode:
return cls.load_magic(src, strict_mode=strict_mode)
calculation = Calculation.from_directory(
src=src, strict_mode=strict_mode, magic_mode=False
)
data = calculation.task_inputs.model_extra.pop("mh_inputs", {})
mh_inputs = MHInputs(**data)
mh_outputs = MHOutputs.from_directory(
src,
minima_traj=mh_inputs.mh_init_params["minima_traj"],
md_traj_pattern=mh_inputs.md_traj_pattern,
opt_traj_pattern=mh_inputs.opt_traj_pattern,
strict_mode=strict_mode,
)
if mh_outputs.opt_trajectories and mh_outputs.opt_trajectories[-1]:
output_atoms = mh_outputs.opt_trajectories[-1][-1]
elif mh_outputs.md_trajectories and mh_outputs.md_trajectories[-1]:
output_atoms = mh_outputs.md_trajectories[-1][-1]
elif mh_outputs.minima:
output_atoms = mh_outputs.minima[-1]
else:
output_atoms = None
if isinstance(calculation.task_inputs.atoms, list):
input_atoms = (
calculation.task_inputs.atoms[0]
if calculation.task_inputs.atoms
else None
)
else:
input_atoms = calculation.task_inputs.atoms
cls.patch_task(
task_outputs=calculation.task_outputs,
input_atoms=input_atoms,
output_atoms=output_atoms,
state=calculation.scheduler_outputs.state,
converged=calculation.calculation_outputs.converged,
)
logger.info(
"Successfully loaded minima hopping calculation from "
"directory: %s",
src,
)
return cls(
**calculation.model_dump(),
mh_inputs=mh_inputs,
mh_outputs=mh_outputs,
)