Source code for autojob.harvest.harvesters.ddec6

"""DDEC6 charge analysis harvesting utilities.

This module defines the :func:`.get_ddec6_index_map` and
:func:`.harvest_ddec6_results` functions and the :class:`.DDEC6Analysis` class.
:func:`.get_ddec6_index_map` and :func:`.harvest_ddec6_results` can be used
to parse DDEC6 charge data. :class:`.DDEC6Analysis` is the data model for
DDEC6 charge analysis data.

Example:
    from pathlib import Path
    from autojob.harvest.harvesters.ddec6 import harvest_ddec6_results

    outputs = harvest_ddec6_results(Path.cwd())
"""

import json
import logging
import math
from pathlib import Path
from typing import Annotated
from typing import TypeVar
import warnings

import ase
from ase.geometry import geometry
import ase.io
from pydantic import PlainSerializer
from pymatgen.command_line.chargemol_caller import ChargemolAnalysis
from pymatgen.core.periodic_table import Element
from typing_extensions import TypedDict

from autojob import SETTINGS

logger = logging.getLogger(__name__)


DDEC_NET_ATOMIC_CHARGE_FILE = "DDEC6_even_tempered_net_atomic_charges.xyz"
CHGCAR = "CHGCAR"
POSCAR = "POSCAR"
POTCAR = "POTCAR"


logger = logging.getLogger(__name__)


_T = TypeVar("_T")

StringSerialized = Annotated[
    _T, PlainSerializer(lambda x: str(x), return_type=str)
]


[docs] class BondedToDict(TypedDict): """Bond order information between a primary atom and another atom. This dictionary assumes knowledge of the primary atom. Keys: index: The index of the non-primary atom. element: The element of the non-primary atom. bond_order: The order of the bond between the primary atom and the atom specified by the ``"index"`` and ``"element"`` keys. direction: The direction of the bond between the primary atom and the atom specified by the ``"index"`` and ``"element"`` keys. spin_polarization: The spin polarization of the bond between the primary atom and the atom specified by the ``"index"`` and ``"element"`` keys. """ index: int element: StringSerialized[Element] bond_order: float direction: tuple[float, float, float] spin_polarization: float
[docs] class BondOrders(TypedDict): """Bond order information between a "primary" atom and other atoms. Keys: element: The element of the "primary" atom to which the bond order information belongs. bonded_to: A list of dictionaries with bond order information. bond_order_sum: The sum of all bond orders between the primary atom and other atoms. """ element: StringSerialized[Element] bonded_to: list[BondedToDict] bond_order_sum: float
[docs] class DDEC6Analysis(TypedDict): """DDEC6 analysis data. Keys: partial_charges: A list of floats representing the partial charges. spin_moments: A list of floats representing the first spin moments. dipoles: A list of floats representing the dipoles. rsquared_moments: A list of floats representing the second spin moments. rcubed_moments: A list of floats representing the third spin moments. rfourth_moments: A list of floats representing the fourth spin moments. bond_order_dict: A dictionary mapping atomic indices to bond order dictionaries. Each bond order dictionary maps atomic indices to bond order information between other atoms. The keys are the indices of the "primary" atoms mentioned in the class docstrings of :class:`.BondedToDict` and :class:`.BondOrders`. """ partial_charges: list[float] spin_moments: list[float] dipoles: list[list[float]] rsquared_moments: list[float] rcubed_moments: list[float] rfourth_moments: list[float] bond_order_dict: dict[int, BondOrders]
[docs] def get_ddec6_index_map(src: str | Path, *, tol: float = 1e-3) -> list[int]: """Map DDEC6 structure indices to ASE structure indices. The ASE index of the atom at index i in the DDEC6 structure can be found as follows: index_map = get_ddec6_index_map(dir_name) index = index_map[i] Args: src: The directory containing a calculation. tol: The maximum allowed deviation in atomic positions in Angstroms. """ with Path(src, SETTINGS.INPUTS_FILE).open(mode="r", encoding="utf-8") as f: inputs = json.load(f) try: input_atoms_filename = inputs["task_inputs"]["atoms_filename"] except KeyError: input_atoms_filename = SETTINGS.INPUT_ATOMS_FILE ase_atoms = ase.io.read(Path(src, input_atoms_filename)) ase_atoms = ( ase_atoms if isinstance(ase_atoms, ase.Atoms) else ase_atoms[-1] ) ase_atoms.center() # type: ignore[no-untyped-call] xyz_file = Path(src, DDEC_NET_ATOMIC_CHARGE_FILE) ddec_atoms = ase.io.read(xyz_file) ddec_atoms = ( ddec_atoms if isinstance(ddec_atoms, ase.Atoms) else ddec_atoms[-1] ) ddec_atoms.cell = ase_atoms.cell ddec_atoms.center() # type: ignore[no-untyped-call] ddec6_index_map = [] # (i, j): distance between ith atom in ddec_atoms and jth atom in ase_atoms _, distances = geometry.get_distances( # type: ignore[no-untyped-call] ddec_atoms.positions, ase_atoms.positions, cell=ddec_atoms.cell, pbc=True, ) min_distances = [] for atom_distances in distances: min_distance = math.inf for j, distance in enumerate(atom_distances): if distance <= min_distance and j not in ddec6_index_map: closest_ase_atom_index = j min_distance = distance ddec6_index_map.append(closest_ase_atom_index) min_distances.append(min_distance) if len(ddec6_index_map) != len(ddec_atoms): msg = f"DDEC6 map is incomplete for calculation in directory: {src}" raise RuntimeError(msg) if any(d > tol for d in min_distances): max_distance = max(min_distances) msg = f"Atomic displacement exceeds tolerance: {max_distance} > {tol}" warnings.warn(msg, category=UserWarning, stacklevel=1) return ddec6_index_map
[docs] def harvest_ddec6_results(src: str | Path) -> DDEC6Analysis: """Harvest DDEC6 charge analysis results from a directory. Args: src: The directory of the completed calculation. Returns: A :class:`.DDEC6Analysis` dictionary. Warning: The success of this function depends on whether :func:`.get_ddec6_index_map` accurately maps the indices of the structure identified by the ``pymatgen`` ChargemolAnalysis runner. """ logger.debug(f"Loading DDEC6 data for {src!s}") analysis = ChargemolAnalysis(path=src, run_chargemol=False) ddec6_index_map = get_ddec6_index_map(src) charges = [0.0] * len(ddec6_index_map) spin_densities = [0.0] * len(ddec6_index_map) bond_orders: dict[int, BondOrders] = {} dipoles = [[0.0] * 3] * len(ddec6_index_map) rsquared_moments = [0.0] * len(ddec6_index_map) rcubed_moments = [0.0] * len(ddec6_index_map) rfourth_moments = [0.0] * len(ddec6_index_map) for i_ddec, i_ase in enumerate(ddec6_index_map): charges[i_ase] = analysis.ddec_charges[i_ddec] spin_densities[i_ase] = analysis.ddec_spin_moments[i_ddec] bond_orders[i_ase] = analysis.bond_order_dict[i_ddec] dipoles[i_ase] = analysis.dipoles[i_ddec] rsquared_moments[i_ase] = analysis.ddec_rsquared_moments[i_ddec] rcubed_moments[i_ase] = analysis.ddec_rcubed_moments[i_ddec] rfourth_moments[i_ase] = analysis.ddec_rfourth_moments[i_ddec] for bond_order in bond_orders.values(): for bonded_to in bond_order["bonded_to"]: # DDEC6 Data is 1-indexed bonded_to["index"] = ddec6_index_map[bonded_to["index"] - 1] ddec6_data = DDEC6Analysis( partial_charges=charges, spin_moments=spin_densities, dipoles=dipoles, rsquared_moments=rsquared_moments, rcubed_moments=rcubed_moments, rfourth_moments=rfourth_moments, bond_order_dict=bond_orders, ) logger.info(f"Successfully loaded DDEC6 data for {src!s}") return ddec6_data