Source code for autojob.harvest.harvesters.vasp

"""VASP calculation harvesting utilities.

This module provides the :func:`harvest_vasp_results`
and :func:`get_output_atoms` functions for retrieving
calculation outputs and output atoms from the directory
of a VASP calculation.

Example:
    from pathlib import Path
    from autojob.harvest.harvesters.vasp import get_output_atoms
    from autojob.harvest.harvesters.vasp import harvest_vasp_results

    outputs = harvest_vasp_results(Path.cwd())
    atoms = get_output_atoms(Path.cwd())
"""

import logging
from pathlib import Path
from typing import Any
from xml.etree import ElementTree

from ase import Atoms
import ase.io
from emmet.core.tasks import TaskDoc  # type: ignore[import-untyped]
from emmet.core.tasks import TaskState
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.io.vasp.outputs import Vasprun

from autojob import SETTINGS
from autojob.utils.atoms import copy_atom_metadata

logger = logging.getLogger(__name__)

ALTERNATE_OUTPUT_STRUCTURES = ("vasprun.xml", "CONTCAR")
FILES_TO_CARRYOVER = ("CHGCAR", "WAVECAR")


[docs] def harvest_vasp_results(src: str | Path) -> dict[str, Any]: """Harvest VASP calculation results from a directory. Args: src: The directory from which to load VASP outputs. Returns: A dictionary with, at minimum, the required keys to initialize a :class:`autojob.calculation.calculation.Calculation` but also with same keys as an instance of :class:`emmet.core.tasks.OutputDoc` and additional keys mapping to a dictionary representation of a :class:`pymatgen.io.vasp.outputs.Vasprun` object and a dictionary representation of a :class:`pymatgen.electronic_structure.dos.CompleteDos` object. """ logger.info(f"Loading VASP calculation outputs from {src}") results = {} try: doc = TaskDoc.from_directory(src) structure = doc.output.structure atoms = AseAtomsAdaptor.get_atoms(structure) if structure else None output_doc = doc.output.model_dump() if doc.output else {} dumped_doc = doc.model_dump(exclude={"output"}) results["energy"] = output_doc.pop("energy") results["forces"] = output_doc.pop("forces") results["converged"] = dumped_doc.pop("state") == TaskState.SUCCESS results["calculator_results"] = { **output_doc, **dumped_doc, "atoms": atoms, } vasprun_xml = Path(src, "vasprun.xml") if SETTINGS.VASP_KEEP_DOS and vasprun_xml.exists(): logger.info("Keeping VASP DOS outputs") vasprun = Vasprun(vasprun_xml) dos = vasprun.complete_dos results["calculator_results"]["complete_dos"] = dos.as_dict() results["calculator_results"]["vasprun"] = vasprun.as_dict() else: logger.info("Discarding VASP DOS outputs") except TypeError as err: if "Calculation.from_vasp_files" in err.args[0]: msg = "Unable to find VASP file" raise FileNotFoundError(msg) from err raise logger.debug(f"Successfully loaded VASP calculation outputs from {src}") return results
# TODO: Unit test def _reorder_atoms(output_atoms: Atoms, src: str | Path) -> Atoms: """Creates a new Atoms object reordered according to ase-sort.dat. This function assumes that the Atoms object passed is ordered in accordance to the POSCAR/POTCAR. """ logger.debug("Reordering atoms") sort_file = Path(src).joinpath("ase-sort.dat") with Path(sort_file).open(mode="r", encoding="utf-8") as file: lines = file.readlines() # First column: if the VASP index of an atom is i, then the index of the # corresponding atom in the ASE Atoms object is the integer in row i conversion_table = [int(line.split()[0]) for line in lines] ase_ordering = [conversion_table[atom.index] for atom in output_atoms] atoms = [output_atoms[i] for i in ase_ordering] logger.debug( "Successfully reordered atoms: " f"{[atom.index for atom in output_atoms]!r} -> {ase_ordering!r}" ) return Atoms( # type: ignore[no-untyped-call] atoms, cell=output_atoms.cell, pbc=output_atoms.pbc, celldisp=output_atoms.get_celldisp(), # type: ignore[no-untyped-call] )
[docs] def get_output_atoms( src: str | Path, alt_filename_index: int | None = None, input_atoms: Atoms | None = None, ) -> Atoms: """Retrieve an Atoms object representing the output structure. This function also copies tags and constraints from the input structure in the case that the output structure must be read from a non-ASE file (e.g., vasprun.xml). Args: src: The directory from which to retrieve the output structure. alt_filename_index: An integer pointing to which alternative structure file should be used. This number will be used to index `ALTERNATE_OUTPUT_STRUCTURES`. input_atoms: An Atoms object representing the corresponding input structure. Returns: An Atoms object representing the output structure. """ if alt_filename_index is None: alt_filename_index = 0 filename = SETTINGS.OUTPUT_ATOMS_FILE else: filename = ALTERNATE_OUTPUT_STRUCTURES[alt_filename_index] alt_filename_index += 1 full_filename = Path(src).joinpath(filename) logger.debug(f"Retrieving output atoms from {full_filename}") atoms: Atoms | None = None try: atoms = ase.io.read(full_filename, -1) # type: ignore[assignment] except (FileNotFoundError, AttributeError, ElementTree.ParseError): msg = ( f"Unable to retrieve atoms from: {full_filename}.\nFile not found." ) logger.warning(msg) try: atoms = get_output_atoms( src=src, alt_filename_index=alt_filename_index, input_atoms=input_atoms, ) atoms = _reorder_atoms(output_atoms=atoms, src=src) copy_atom_metadata( input_atoms=input_atoms, output_atoms=atoms, ) except IndexError as err: msg = ( f"No output atoms found in {SETTINGS.OUTPUT_ATOMS_FILE} or " f"{ALTERNATE_OUTPUT_STRUCTURES!r}" ) raise FileNotFoundError(msg) from err except FileNotFoundError: if atoms is None: raise logger.warning("Unable to reorder atoms") if atoms is None: msg = "Unable to reorder atoms" raise RuntimeError(msg) logger.debug(f"Successfully retrieved output atoms from {full_filename}") return atoms