Source code for autojob.harvest.harvesters.vasp
"""VASP calculation harvesting utilities.
This module provides the :func:`harvest_vasp_results`
and :func:`get_output_atoms` functions for retrieving
calculation outputs and output atoms from the directory
of a VASP calculation.
Example:
from pathlib import Path
from autojob.harvest.harvesters.vasp import get_output_atoms
from autojob.harvest.harvesters.vasp import harvest_vasp_results
outputs = harvest_vasp_results(Path.cwd())
atoms = get_output_atoms(Path.cwd())
"""
import logging
from pathlib import Path
from typing import Any
from xml.etree import ElementTree
from ase import Atoms
import ase.io
from emmet.core.tasks import TaskDoc # type: ignore[import-untyped]
from emmet.core.tasks import TaskState
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.io.vasp.outputs import Vasprun
from autojob import SETTINGS
from autojob.utils.atoms import copy_atom_metadata
logger = logging.getLogger(__name__)
ALTERNATE_OUTPUT_STRUCTURES = ("vasprun.xml", "CONTCAR")
FILES_TO_CARRYOVER = ("CHGCAR", "WAVECAR")
[docs]
def harvest_vasp_results(src: str | Path) -> dict[str, Any]:
"""Harvest VASP calculation results from a directory.
Args:
src: The directory from which to load VASP outputs.
Returns:
A dictionary with, at minimum, the required keys to initialize
a :class:`autojob.calculation.calculation.Calculation` but
also with same keys as an instance of
:class:`emmet.core.tasks.OutputDoc` and additional keys mapping
to a dictionary representation of a
:class:`pymatgen.io.vasp.outputs.Vasprun` object and a dictionary
representation of a
:class:`pymatgen.electronic_structure.dos.CompleteDos` object.
"""
logger.info(f"Loading VASP calculation outputs from {src}")
results = {}
try:
doc = TaskDoc.from_directory(src)
structure = doc.output.structure
atoms = AseAtomsAdaptor.get_atoms(structure) if structure else None
output_doc = doc.output.model_dump() if doc.output else {}
dumped_doc = doc.model_dump(exclude={"output"})
results["energy"] = output_doc.pop("energy")
results["forces"] = output_doc.pop("forces")
results["converged"] = dumped_doc.pop("state") == TaskState.SUCCESS
results["calculator_results"] = {
**output_doc,
**dumped_doc,
"atoms": atoms,
}
vasprun_xml = Path(src, "vasprun.xml")
if SETTINGS.VASP_KEEP_DOS and vasprun_xml.exists():
logger.info("Keeping VASP DOS outputs")
vasprun = Vasprun(vasprun_xml)
dos = vasprun.complete_dos
results["calculator_results"]["complete_dos"] = dos.as_dict()
results["calculator_results"]["vasprun"] = vasprun.as_dict()
else:
logger.info("Discarding VASP DOS outputs")
except TypeError as err:
if "Calculation.from_vasp_files" in err.args[0]:
msg = "Unable to find VASP file"
raise FileNotFoundError(msg) from err
raise
logger.debug(f"Successfully loaded VASP calculation outputs from {src}")
return results
# TODO: Unit test
def _reorder_atoms(output_atoms: Atoms, src: str | Path) -> Atoms:
"""Creates a new Atoms object reordered according to ase-sort.dat.
This function assumes that the Atoms object passed is ordered in
accordance to the POSCAR/POTCAR.
"""
logger.debug("Reordering atoms")
sort_file = Path(src).joinpath("ase-sort.dat")
with Path(sort_file).open(mode="r", encoding="utf-8") as file:
lines = file.readlines()
# First column: if the VASP index of an atom is i, then the index of the
# corresponding atom in the ASE Atoms object is the integer in row i
conversion_table = [int(line.split()[0]) for line in lines]
ase_ordering = [conversion_table[atom.index] for atom in output_atoms]
atoms = [output_atoms[i] for i in ase_ordering]
logger.debug(
"Successfully reordered atoms: "
f"{[atom.index for atom in output_atoms]!r} -> {ase_ordering!r}"
)
return Atoms( # type: ignore[no-untyped-call]
atoms,
cell=output_atoms.cell,
pbc=output_atoms.pbc,
celldisp=output_atoms.get_celldisp(), # type: ignore[no-untyped-call]
)
[docs]
def get_output_atoms(
src: str | Path,
alt_filename_index: int | None = None,
input_atoms: Atoms | None = None,
) -> Atoms:
"""Retrieve an Atoms object representing the output structure.
This function also copies tags and constraints from the input structure
in the case that the output structure must be read from a non-ASE file
(e.g., vasprun.xml).
Args:
src: The directory from which to retrieve the output structure.
alt_filename_index: An integer pointing to which alternative structure
file should be used. This number will be used to index
`ALTERNATE_OUTPUT_STRUCTURES`.
input_atoms: An Atoms object representing the corresponding input
structure.
Returns:
An Atoms object representing the output structure.
"""
if alt_filename_index is None:
alt_filename_index = 0
filename = SETTINGS.OUTPUT_ATOMS_FILE
else:
filename = ALTERNATE_OUTPUT_STRUCTURES[alt_filename_index]
alt_filename_index += 1
full_filename = Path(src).joinpath(filename)
logger.debug(f"Retrieving output atoms from {full_filename}")
atoms: Atoms | None = None
try:
atoms = ase.io.read(full_filename, -1) # type: ignore[assignment]
except (FileNotFoundError, AttributeError, ElementTree.ParseError):
msg = (
f"Unable to retrieve atoms from: {full_filename}.\nFile not found."
)
logger.warning(msg)
try:
atoms = get_output_atoms(
src=src,
alt_filename_index=alt_filename_index,
input_atoms=input_atoms,
)
atoms = _reorder_atoms(output_atoms=atoms, src=src)
copy_atom_metadata(
input_atoms=input_atoms,
output_atoms=atoms,
)
except IndexError as err:
msg = (
f"No output atoms found in {SETTINGS.OUTPUT_ATOMS_FILE} or "
f"{ALTERNATE_OUTPUT_STRUCTURES!r}"
)
raise FileNotFoundError(msg) from err
except FileNotFoundError:
if atoms is None:
raise
logger.warning("Unable to reorder atoms")
if atoms is None:
msg = "Unable to reorder atoms"
raise RuntimeError(msg)
logger.debug(f"Successfully retrieved output atoms from {full_filename}")
return atoms