Source code for autojob.tasks.md

"""Store the results of a molecular dynamics simulation."""

from copy import deepcopy
import logging
from pathlib import Path
import sys
from typing import Any
from typing import ClassVar
from typing import Self

if sys.version_info < (3, 12):
    from typing_extensions import TypedDict
else:
    from typing import TypedDict

import warnings

import ase.io
import numpy as np
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import FieldSerializationInfo
from pydantic import SerializerFunctionWrapHandler
from pydantic import TypeAdapter
from pydantic import field_serializer
from pydantic import model_validator

from autojob import SETTINGS
from autojob.tasks.calculation import Calculation
from autojob.utils.schemas import PydanticAtoms

logger = logging.getLogger(__name__)


_TRAJECTORY_KEY = "_trajectory_file"


[docs] class MDInitParams(TypedDict, total=False): """The initialization parameters for an ASE molecular dynamics object. Keys: timestep: The time step in ASE time units. Note that although this key is not required, :class:`ase.md.md.MolecularDynamics` objects require this parameter for initialization. trajectory: The name of a file in which to save the molecular dynamics trajectory. logfile: The name of a file to be used to log the results. Use '-' for stdout. loginterval: The frequency with which results are logged. append_trajectory: Whether or not to append to the trajectory with each step in the molecular dynamics run. """ timestep: float trajectory: str | None logfile: str | None loginterval: int append_trajectory: bool
[docs] class MDRunParams(TypedDict, total=False): """The initialization parameters for an ASE molecular dynamics object. Keys: steps: The number of molecular dynamics steps for which to run the calculation. """ steps: int
[docs] class MDInputs(BaseModel): """The inputs of a molecular dynamics calculation.""" md_init_params: MDInitParams = Field( default_factory=MDInitParams, description="The parameters used to initialize the molecular dynamics " "calculation.", ) md_run_params: MDRunParams = Field( default_factory=MDRunParams, description="The parameters used to run the molecular dynamics " "calculation.", ) model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow") # This validator exists for compatibility with v0.0.2 and can be removed # once old use-cases have been ported to the new data model
[docs] @model_validator(mode="after") def md_params_to_inputs(self) -> Self: """Set MDInputs from old md_params data model.""" if md_params := self.model_extra.pop("md_params", None): msg = ( "Setting MDInputs from MDInputs.md_params. " "This fallback behaviour will be removed in a future version " "of autojob. You should update the source of this MDInputs to " "follow the current data model for MDInputs." ) warnings.warn(msg, category=FutureWarning, stacklevel=1) init_params = md_params.get("init", {}) run_params = md_params.get("run", {}) init_params["trajectory"] = md_params.get(_TRAJECTORY_KEY) self.md_init_params = TypeAdapter(MDInitParams).validate_python( init_params ) self.md_run_params = TypeAdapter(MDRunParams).validate_python( run_params ) # trajectory_file used to be a direct attribute of MDInputs. If it was # set directly, the value set therein should take priority over that # defined in md_params["init"] if trajectory := self.model_extra.pop("trajectory_file", None): self.md_init_params["trajectory"] = trajectory return self
[docs] class MDOutputs(BaseModel): """The outputs of a molecular dynamics calculation.""" md_trajectory: list[PydanticAtoms] | None = Field( default=None, description="A list of atoms representing the trajectory of the " "system throughout a molecular dynamics simulation.", ) md_trajectory_results: list[dict[str, Any]] | None = Field( default=None, description="A list of dictionaries where each dictionary contains " "the calculated results associated with each image in the MD " "trajectory.", )
[docs] @field_serializer("md_trajectory_results", mode="wrap") def serialize_md_trajectory_results( self, v: list[dict[str, Any]] | None, _: SerializerFunctionWrapHandler, info: FieldSerializationInfo, ) -> list[dict[str, Any]] | None: """Serialize the molecular dynamics results.""" if v is None: return None if info.mode == "json": listified: list[dict[str, Any]] = [] for md_result in v: listified_result: dict[str, Any] = {} for k, value in md_result.items(): listified_result[k] = ( value.tolist() if isinstance(value, np.ndarray) # for mutable results else deepcopy(value) ) listified.append(listified_result) return listified return v
[docs] @classmethod def from_directory( cls, src: Path, *, trajectory: str | None = None, strict_mode: bool | None = None, ) -> Self: """Load the outputs of a molecular dynamics run from a directory. Args: src: The directory from which to load the results. trajectory: The name of the trajectory file. Defaults to None in which case no trajectory will be loaded. strict_mode: Whether or not to require all outputs. If True, errors will be thrown on missing outputs. Defaults to ``SETTINGS.STRICT_MODE``. """ logger.info( "Loading molecular dynamics outputs from directory: %s", src ) logger.debug("Strict mode: %sabled", "en" if strict_mode else "dis") md_outputs = {} try: images = ase.io.read(Path(src, trajectory), ":") md_outputs["md_trajectory"] = images md_outputs["md_trajectory_results"] = [] for i, image in enumerate(images): try: results = image.calc.results.copy() md_outputs["md_trajectory_results"].append(results) except AttributeError: if strict_mode: raise logger.info( "Unable to load results from image %s of trajectory: %s", i, trajectory, ) md_outputs["md_trajectory_results"].append({}) image.calc = None except (FileNotFoundError, TypeError): logger.exception( "Unable to load molecular dynamics trajectory %s from" "directory: %s.", trajectory, src, ) if strict_mode: raise logger.info( "Successfully loaded molecular dynamics outputs from " "directory: %s", src, ) return cls(**md_outputs)
[docs] class MolecularDynamics(Calculation): """A molecular dynamics calculation.""" md_inputs: MDInputs = Field( default_factory=MDInputs, description="The inputs of a molecular dynamics simulation", ) md_outputs: MDOutputs | None = Field( default=None, description="The outputs of a molecular dynamics simulation", )
[docs] @classmethod def from_directory(cls, src, **kwargs): """Generate a ``MolecularDynamics`` document from a task directory. Args: src: The directory of a molecular dynamics simulation. kwargs: Additional keyword arguments: - strict_mode: Whether or not to fail on any error. Defaults to `SETTINGS.STRICT_MODE`. - magic_mode: Whether or not to instantiate subclasses. If True, the task returned must be an instance determined by metadata in the directory. Defaults to False. Returns: A :class:`MolecularDynamics` or a subclass of a :class:`MolecularDynamics`. .. seealso:: :meth:`.calculation.Calculation.from_directory` """ strict_mode = kwargs.get("strict_mode", SETTINGS.STRICT_MODE) magic_mode = kwargs.get("magic_mode", False) logger.info( "Loading molecular dynamics calculation from directory: %s", src ) logger.debug("Magic mode: %sabled", "en" if magic_mode else "dis") logger.debug("Strict mode: %sabled", "en" if strict_mode else "dis") if magic_mode: return cls.load_magic(src, strict_mode=strict_mode) calculation = Calculation.from_directory( src=src, strict_mode=strict_mode, magic_mode=False ) data = calculation.task_inputs.model_extra.pop("md_inputs", {}) md_inputs = MDInputs(**data) md_outputs = MDOutputs.from_directory( src, trajectory=md_inputs.md_init_params["trajectory"], strict_mode=strict_mode, ) if md_outputs.md_trajectory: output_atoms = md_outputs.md_trajectory[-1] else: output_atoms = None if isinstance(calculation.task_inputs.atoms, list): input_atoms = ( calculation.task_inputs.atoms[0] if calculation.task_inputs.atoms else None ) else: input_atoms = calculation.task_inputs.atoms cls.patch_task( task_outputs=calculation.task_outputs, input_atoms=input_atoms, output_atoms=output_atoms, state=calculation.scheduler_outputs.state, converged=calculation.calculation_outputs.converged, ) logger.info( "Successfully loaded molecular dynamics calculation from " "directory: %s", src, ) return cls( **calculation.model_dump(), md_inputs=md_inputs, md_outputs=md_outputs, )
[docs] def write_inputs_json( self, dest: str | Path, *, additional_data: dict[str, Any] | None = None, **kwargs, # noqa: ARG002 ) -> Path: """Write the inputs JSON to a file. Args: dest: The directory in which to write the inputs JSON. additional_data: A dictionary mapping strings to JSON-serializable values to be merged with the task inputs that will be written to the inputs JSON. Defaults to an empty dictionary. kwargs: Additional keyword arguments. Returns: The filename in which the inputs JSON written. """ additional_data = additional_data or {} additional_data = { "md_inputs": self.md_inputs.model_dump(mode="json"), **additional_data, } return super().write_inputs_json(dest, additional_data=additional_data)