Source code for autojob.tasks.mh

"""Store the results of a minima hopping simulation."""

from copy import deepcopy
import logging
from pathlib import Path
import re
import sys
from typing import Any
from typing import ClassVar
from typing import Self
import warnings

import numpy as np

if sys.version_info < (3, 12):
    from typing_extensions import TypedDict
else:
    from typing import TypedDict


from ase import Atoms
import ase.io
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import FieldSerializationInfo
from pydantic import SerializerFunctionWrapHandler
from pydantic import TypeAdapter
from pydantic import field_serializer
from pydantic import model_validator

from autojob import SETTINGS
from autojob.tasks.calculation import Calculation
from autojob.utils.schemas import PydanticAtoms

logger = logging.getLogger(__name__)


_TRAJECTORY_KEY = "_trajectory_file"


[docs] class MHInitParams(TypedDict, total=False): """Initialization parameters for a minima hopping calculation. Keys: T0: The initial temperature for molecular dynamics steps (in Kelvin). beta1: The first temperature adjustment parameter. The temperature for the molecular dynamics step will be multiplied by this number if the previous optimum is found. beta2: The second temperature adjustment parameter. The temperature for the molecular dynamics step will be multiplied by this number if a previous (but not the immediately previous) optimum is found. beta3: The third temperature adjustment parameter. The temperature for the molecular dynamics step will be multiplied by this number if a new minimum is found with energy below `Ediff`. Ediff0: The initial energy acceptance threshold. alpha1: The first energy threshold adjustment parameter. `Ediff` will be multiplied by this number whenever a unique minima is found with energy lower than `Ediff`. alpha2: The first energy threshold adjustment parameter. `Ediff` will be multiplied by this number whenever a unique minima is found with energy higher than `Ediff`. mdmin: The number of minima that must be passed to stop MD simulations. logfile: A text log file for recording the progress of the calculation. minima_threshold: A distance threshold (in Angstromgs) for identifying identical structures. timestep: The timestep for MD simulations. optimizer: The fully qualified class name for an ASE optimizer to use. minima_traj: The storage file for minima. Currently, this key **must** be set for things to work correctly. fmax: Maximum force criteria for structure optimizations. rng: An integer to be used as a random seed. """ T0: float beta1: float beta2: float beta3: float Ediff0: float alpha1: float alpha2: float mdmin: int logfile: str minima_threshold: float timestep: float optimizer: str minima_traj: str fmax: float rng: int
[docs] class MHRunParams(TypedDict, total=False): """Run parameters for a minima hopping calculation. Keys: totalsteps: The maximum number of steps that the calculation will run. maxtemp: The maximum temperature (in Kelvin) for the calculation. """ totalsteps: int maxtemp: float
[docs] class MHInputs(BaseModel): """The inputs of a minima hopping calculation.""" mh_init_params: MHInitParams = Field( default_factory=dict, description="The initialization parameters for the minima hopping object.", ) mh_run_params: MHRunParams = Field( default_factory=dict, description="The run parameters for the minima hopping object.", ) md_traj_pattern: str = Field( default=r"md(?P<index>\d{5}).traj", description="A regex pattern for identifying trajectory files of " "molecular dynamics steps. To ensure appropriate loading of the outputs, " "the regex should contain a named group with the label 'index'.", ) opt_traj_pattern: str = Field( default=r"qn(?P<index>\d{5}).traj", description="A regex pattern for identifying trajectory files of " "optimization steps. To ensure appropriate loading of the outputs, " "the regex should contain a named group with the label 'index'.", ) model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow") # This validator exists for compatibility with v0.0.2 and can be removed # once old use-cases have been ported to the new data model
[docs] @model_validator(mode="after") def md_params_to_inputs(self) -> Self: """Set MHInputs from old md_params data model.""" if mh_params := self.model_extra.pop("md_params", None): msg = ( "Setting MHInputs from MHInputs.md_params. " "This fallback behaviour will be removed in a future version " "of autojob. You should update the source of this MHInputs to " "follow the current data model for MHInputs." ) warnings.warn(msg, category=FutureWarning, stacklevel=1) init_params = mh_params.get("init", {}) run_params = mh_params.get("run", {}) # Only set minima_traj if it is defined because it MUST be a string # and cannot be None if _TRAJECTORY_KEY in mh_params: init_params["minima_traj"] = mh_params[_TRAJECTORY_KEY] self.mh_init_params = TypeAdapter(MHInitParams).validate_python( init_params ) self.mh_run_params = TypeAdapter(MHRunParams).validate_python( run_params ) # trajectory_file used to be a direct attribute of MDInputs. If it was # set directly, the value set therein should take priority over that # defined in md_params["init"] if trajectory := self.model_extra.pop("trajectory_file", None): self.mh_init_params["minima_traj"] = trajectory return self
def _harvest_minima_results( src: Path, minima_traj: str, strict_mode: bool | None = None, ) -> tuple[list[Atoms], list[dict[str, Any]]]: minima: list[PydanticAtoms] = [] minima_results = [] try: minima = ase.io.read(Path(src, minima_traj), ":") for i, image in enumerate(minima): try: results = image.calc.results.copy() minima_results.append(results) except AttributeError: if strict_mode: raise logger.info( "Unable to load results from image %s of trajectory: %s", i, image, ) minima_results.append({}) image.calc = None except FileNotFoundError: if strict_mode: raise logger.exception( "Unable to load minima from directory: %s.", src, ) return minima, minima_results def _harvest_traj_and_results( src: Path, traj_pattern: str, strict_mode: bool | None = None, ) -> tuple[list[list[Atoms]], list[list[dict[str, Any]]]]: traj_regex = re.compile(traj_pattern) traj_files: list[tuple[int, Path]] = [] for file in src.iterdir(): if match := traj_regex.match(file.name): index = int(match.group("index")) traj_files.append((index, file)) logger.debug( "%d trajectories found matching regex %s.", len(traj_files), traj_pattern, ) traj_files = sorted(traj_files, key=lambda x: x[0]) trajs: list[list[Atoms]] = [] traj_results: list[list[dict[str, Any]]] = [] for i, traj in traj_files: images: list[Atoms] = ase.io.read(traj, index=":") # type: ignore[assignment] trajs.append(images) traj_result: list[dict[str, Any]] = [] for j, image in enumerate(images): try: traj_result.append(image.calc.results.copy()) except AttributeError: if strict_mode: raise logger.info( "Unable to load results from image %s of trajectory %s: %s", i, j, traj, ) traj_result.append({}) image.calc = None traj_results.append(traj_result) logger.debug("%d trajectories loaded.", len(trajs)) return trajs, traj_results
[docs] class MHOutputs(BaseModel): """The outputs of a minima hopping calculation.""" minima: list[PydanticAtoms] | None = Field( default=None, description="A list of all minima found during the minima hopping calculation.", ) minima_results: list[dict[str, Any]] | None = Field( default=None, description="A list of of dictionaries, where each dictionary " "corresponds to the calculated results for a minimum.", ) md_trajectories: list[list[PydanticAtoms]] | None = Field( default=None, description="A list in which each element is a list of Atoms objects " "for a molecular dynamics step in a minima hopping task", ) md_trajectory_results: list[list[dict[str, Any]]] | None = Field( default=None, description="A list of lists of dictionaries, where each dictionary " "corresponds to the calculated results from an image in the MD " "trajectory", ) opt_trajectories: list[list[PydanticAtoms]] | None = Field( default=None, description="A list in which each element is a list of Atoms objects " "for a optimization step in a minima hopping task", ) opt_trajectory_results: list[list[dict[str, Any]]] | None = Field( default=None, description="A list of lists of dictionaries, where each dictionary " "corresponds to the calculated results from an image in the " "optimization trajectory", )
[docs] @field_serializer("minima_results", mode="wrap") def serialize_minima_results( self, v: list[dict[str, Any]] | None, _: SerializerFunctionWrapHandler, info: FieldSerializationInfo, ) -> list[dict[str, Any]] | None: """Serialize trajectory results.""" if v is None: return None if info.mode == "json": listified: list[dict[str, Any]] = [] for minima_result in v: result: dict[str, Any] = {} for k, value in minima_result.items(): result[k] = ( value.tolist() if isinstance(value, np.ndarray) # for mutable results else deepcopy(value) ) listified.append(result) return listified return v
[docs] @field_serializer( "md_trajectory_results", "opt_trajectory_results", mode="wrap" ) def serialize_trajectory_results( self, v: list[list[dict[str, Any]]] | None, _: SerializerFunctionWrapHandler, info: FieldSerializationInfo, ) -> list[list[dict[str, Any]]] | None: """Serialize trajectory results.""" if v is None: return None if info.mode == "json": listified: list[list[dict[str, Any]]] = [] for traj_results in v: listified_result: list[dict[str, Any]] = [] for traj_result in traj_results: result: dict[str, Any] = {} for k, value in traj_result.items(): result[k] = ( value.tolist() if isinstance(value, np.ndarray) # for mutable results else deepcopy(value) ) listified_result.append(result) listified.append(listified_result) return listified return v
[docs] @classmethod def from_directory( cls, src: Path, *, minima_traj: str, md_traj_pattern: str, opt_traj_pattern: str, strict_mode: bool | None = None, ) -> Self: """Load the outputs of a minima hopping run from a directory. Args: src: The directory from which to load the results. minima_traj: The name of the minima trajectory file. md_traj_pattern: A regex pattern for identifying trajectory files of molecular dynamics steps. opt_traj_pattern: str: A regex pattern for identifying trajectory files of optimization steps. strict_mode: Whether or not to require all outputs. If True, errors will be thrown on missing outputs. Defaults to ``SETTINGS.STRICT_MODE``. """ logger.info( "Loading molecular dynamics outputs from directory: %s", src ) logger.debug("Strict mode: %sabled", "en" if strict_mode else "dis") minima, minima_results = _harvest_minima_results( src, minima_traj, strict_mode ) md_trajs, md_traj_results = _harvest_traj_and_results( src, md_traj_pattern, strict_mode ) opt_trajs, opt_traj_results = _harvest_traj_and_results( src, opt_traj_pattern, strict_mode ) logger.info( "Successfully loaded molecular dynamics outputs from " "directory: %s", src, ) return cls( minima=minima, minima_results=minima_results, md_trajectories=md_trajs, md_trajectory_results=md_traj_results, opt_trajectories=opt_trajs, opt_trajectory_results=opt_traj_results, )
[docs] class MinimaHopping(Calculation): """A minima hopping calculation.""" mh_inputs: MHInputs = Field( default_factory=MHInputs, description="The inputs of a minima hopping calculation", ) mh_outputs: MHOutputs | None = Field( default=None, description="The outputs of a minima hopping calculation", )
[docs] @classmethod def from_directory(cls, src, **kwargs): """Generate a ``MinimaHopping`` document from a task directory. Args: src: The directory of a molecular dynamics simulation. kwargs: Additional keyword arguments: - strict_mode: Whether or not to fail on any error. Defaults to `SETTINGS.STRICT_MODE`. - magic_mode: Whether or not to instantiate subclasses. If True, the task returned must be an instance determined by metadata in the directory. Defaults to False. Returns: A :class:`MinimaHopping` or a subclass of a :class:`MinimaHopping`. .. seealso:: :meth:`.md.Calculation.from_directory` """ strict_mode = kwargs.get("strict_mode", SETTINGS.STRICT_MODE) magic_mode = kwargs.get("magic_mode", False) logger.info( "Loading minima hopping calculation from directory: %s", src ) logger.debug("Magic mode: %sabled", "en" if magic_mode else "dis") logger.debug("Strict mode: %sabled", "en" if strict_mode else "dis") if magic_mode: return cls.load_magic(src, strict_mode=strict_mode) calculation = Calculation.from_directory( src=src, strict_mode=strict_mode, magic_mode=False ) data = calculation.task_inputs.model_extra.pop("mh_inputs", {}) mh_inputs = MHInputs(**data) mh_outputs = MHOutputs.from_directory( src, minima_traj=mh_inputs.mh_init_params["minima_traj"], md_traj_pattern=mh_inputs.md_traj_pattern, opt_traj_pattern=mh_inputs.opt_traj_pattern, strict_mode=strict_mode, ) if mh_outputs.opt_trajectories and mh_outputs.opt_trajectories[-1]: output_atoms = mh_outputs.opt_trajectories[-1][-1] elif mh_outputs.md_trajectories and mh_outputs.md_trajectories[-1]: output_atoms = mh_outputs.md_trajectories[-1][-1] elif mh_outputs.minima: output_atoms = mh_outputs.minima[-1] else: output_atoms = None if isinstance(calculation.task_inputs.atoms, list): input_atoms = ( calculation.task_inputs.atoms[0] if calculation.task_inputs.atoms else None ) else: input_atoms = calculation.task_inputs.atoms cls.patch_task( task_outputs=calculation.task_outputs, input_atoms=input_atoms, output_atoms=output_atoms, state=calculation.scheduler_outputs.state, converged=calculation.calculation_outputs.converged, ) logger.info( "Successfully loaded minima hopping calculation from " "directory: %s", src, ) return cls( **calculation.model_dump(), mh_inputs=mh_inputs, mh_outputs=mh_outputs, )
[docs] def write_inputs_json( self, dest: str | Path, *, additional_data: dict[str, Any] | None = None, **kwargs, # noqa: ARG002 ) -> Path: """Write the inputs JSON to a file. Args: dest: The directory in which to write the inputs JSON. additional_data: A dictionary mapping strings to JSON-serializable values to be merged with the task inputs that will be written to the inputs JSON. Defaults to an empty dictionary. kwargs: Additional keyword arguments. Returns: The filename in which the inputs JSON written. """ additional_data = additional_data or {} additional_data = { "mh_inputs": self.mh_inputs.model_dump(mode="json"), **additional_data, } return super().write_inputs_json(dest, additional_data=additional_data)